From cc8b6801b240dad881d8400793b63e94ab7369bb Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 16 Jan 2024 17:11:35 +0000 Subject: [PATCH] fixing extension count bug, tidying up router --- backend/app/plan/router.py | 47 +++--- backend/app/utils.py | 1 - etl/epc/Record.py | 311 ++++++++++++++++++++----------------- 3 files changed, 184 insertions(+), 175 deletions(-) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index d35ea98b..a3732856 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -2,7 +2,6 @@ from datetime import datetime import numpy as np import pandas as pd -from epc_api.client import EpcClient from etl.epc.Record import EPCRecord from backend.SearchEpc import SearchEpc from fastapi import APIRouter, Depends @@ -25,7 +24,7 @@ from backend.app.db.models.portfolio import rating_lookup from backend.app.dependencies import validate_token from backend.app.plan.schemas import PlanTriggerRequest from backend.app.plan.utils import create_recommendation_scoring_data, get_cleaned -from backend.app.utils import epc_to_sap_lower_bound, read_csv_from_s3, read_parquet_from_s3, sap_to_epc +from backend.app.utils import epc_to_sap_lower_bound, read_csv_from_s3, sap_to_epc from backend.ml_models.api import ModelApi from backend.Property import Property @@ -53,7 +52,6 @@ router = APIRouter( responses={404: {"description": "Not found"}} ) -# TODO: Need to install base.txt requirements into new env @router.post("/trigger") async def trigger_plan(body: PlanTriggerRequest): @@ -64,8 +62,10 @@ async def trigger_plan(body: PlanTriggerRequest): try: session.begin() logger.info("Getting the inputs") - epc_client = EpcClient(auth_token=get_settings().EPC_AUTH_TOKEN) plan_input = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path) + cleaning_data = read_dataframe_from_s3_parquet( + bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet", + ) input_properties = [] @@ -95,26 +95,25 @@ async def trigger_plan(body: PlanTriggerRequest): heat_demand_target=None ) - epc_records ={ + epc_records = { 'original_epc': epc_searcher.newest_epc, 'full_sap_epc': epc_searcher.full_sap_epc, - 'old_data': epc_searcher.old_data, + 'old_data': epc_searcher.older_epcs, } - prepared_epc = EPCRecord(epc_records=epc_records, run_mode="newdata", cleaning_data=cleaning_data) # This uses all the epc records to clean the data - + prepared_epc = EPCRecord(epc_records=epc_records, run_mode="newdata", + cleaning_data=cleaning_data) # This uses all the epc records to clean the data + input_properties.append( Property( id=property_id, - address1=config['address'], - postcode=config['postcode'], + address=epc_searcher.address_clean, + postcode=epc_searcher.postcode_clean, epc_record=prepared_epc, ) ) - - if not input_properties: - + if not input_properties: return Response(status_code=204) # The materials data could be cached or local so we don't need to make @@ -127,9 +126,6 @@ async def trigger_plan(body: PlanTriggerRequest): uprn_filenames = read_dataframe_from_s3_parquet( bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet" ) - cleaning_data = read_dataframe_from_s3_parquet( - bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet", - ) photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket=get_settings().DATA_BUCKET) logger.info("Getting spatial data") @@ -160,12 +156,12 @@ async def trigger_plan(body: PlanTriggerRequest): recommendations_scoring_data.extend(p.recommendations_scoring_data) - logger.info("Preparing data for scoring in sap change api") recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data) recommendations_scoring_data = recommendations_scoring_data.drop( - columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", "carbon_ending"] - ) + columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", + "carbon_ending"] + ) model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at) all_predictions = model_api.predict_all( @@ -308,20 +304,20 @@ async def trigger_plan(body: PlanTriggerRequest): combined_recommendations_scoring_data = pd.DataFrame(combined_recommendations_scoring_data) # Perform the same cleaning as in the model - first clean number of room variables though - combined_recommendations_scoring_data = DataProcessor.apply_averages_cleaning( + combined_recommendations_scoring_data = EPCDataProcessor.apply_averages_cleaning( data_to_clean=combined_recommendations_scoring_data, cleaning_data=cleaning_data, cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'], colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"], ) - combined_recommendations_scoring_data = DataProcessor.apply_averages_cleaning( + combined_recommendations_scoring_data = EPCDataProcessor.apply_averages_cleaning( data_to_clean=combined_recommendations_scoring_data, cleaning_data=cleaning_data, cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"], ).drop(columns=["LOCAL_AUTHORITY"]) - combined_recommendations_scoring_data = DataProcessor.clean_missings_after_description_process( + combined_recommendations_scoring_data = EPCDataProcessor.clean_missings_after_description_process( combined_recommendations_scoring_data, ignore_cols=[ c for c in combined_recommendations_scoring_data.columns if ("thermal_transmittance" in c) or ( @@ -329,7 +325,7 @@ async def trigger_plan(body: PlanTriggerRequest): ] ) - combined_recommendations_scoring_data = DataProcessor.clean_efficiency_variables( + combined_recommendations_scoring_data = EPCDataProcessor.clean_efficiency_variables( combined_recommendations_scoring_data ) @@ -472,11 +468,6 @@ async def trigger_plan(body: PlanTriggerRequest): update_or_create_property_spatial_details(session, p.uprn, p.spatial) - # TODO: TEMP - if p.data["uprn"] == "": - print("Get rid of me!") - p.data["uprn"] = 0 - property_data = p.get_full_property_data() update_property_data( session, property_id=p.id, portfolio_id=body.portfolio_id, property_data=property_data diff --git a/backend/app/utils.py b/backend/app/utils.py index 9a03ab21..6801da65 100644 --- a/backend/app/utils.py +++ b/backend/app/utils.py @@ -4,7 +4,6 @@ from io import StringIO import string import secrets import logging -import pandas as pd from io import BytesIO diff --git a/etl/epc/Record.py b/etl/epc/Record.py index 85a8f0f2..39bcf971 100644 --- a/etl/epc/Record.py +++ b/etl/epc/Record.py @@ -1,8 +1,8 @@ from datetime import datetime from dataclasses import dataclass from etl.epc.ValidationConfiguration import ( - EPCRecordValidationConfiguration, - EPCDifferenceRecordValidationConfiguration, + EPCRecordValidationConfiguration, + EPCDifferenceRecordValidationConfiguration, EPCDifferenceRecordFixedDataValidationConfiguration ) from etl.epc.DataProcessor import EPCDataProcessor @@ -33,6 +33,7 @@ EFFICIENCY_FEATURES = [x.lower() for x in EFFICIENCY_FEATURES] ENVIRONMENT = os.environ.get('ENVIRONMENT', 'dev') DATA_BUCKET = os.environ.get('DATA_BUCKET', 'retrofit-data-dev' if ENVIRONMENT == 'dev' else None) + @dataclass class EPCRecord: """ @@ -41,44 +42,44 @@ class EPCRecord: uprn: int = None walls_description: str = None - floor_description : str = None - lighting_description : str = None - roof_description : str = None - mainheat_description : str = None - hotwater_description : str = None - main_fuel : str = None - mechanical_ventilation : str = None - secondheat_description : str = None - windows_description : str = None - glazed_type : str = None - multi_glaze_proportion : float = None - low_energy_lighting : float = None - number_open_fireplaces : float = None - mainheatcont_description : str = None - solar_water_heating_flag : str = None - photo_supply : float = None - transaction_type : str = None - energy_tariff : str = None - extension_count : float = None - total_floor_area : float = None - floor_height : float = None - hot_water_energy_eff : str = None - floor_energy_eff : str = None - windows_energy_eff : str = None - walls_energy_eff : str = None - sheating_energy_eff : str = None - roof_energy_eff : str = None - mainheat_energy_eff : str = None - mainheatc_energy_eff : str = None - lighting_energy_eff : str = None - potential_energy_efficiency : float = None - environment_impact_potential : float = None - energy_consumption_potential : float = None - co2_emissions_potential : float = None - lodgement_date : str = None - current_energy_efficiency : int = None - energy_consumption_current : int = None - co2_emissions_current : float = None + floor_description: str = None + lighting_description: str = None + roof_description: str = None + mainheat_description: str = None + hotwater_description: str = None + main_fuel: str = None + mechanical_ventilation: str = None + secondheat_description: str = None + windows_description: str = None + glazed_type: str = None + multi_glaze_proportion: float = None + low_energy_lighting: float = None + number_open_fireplaces: float = None + mainheatcont_description: str = None + solar_water_heating_flag: str = None + photo_supply: float = None + transaction_type: str = None + energy_tariff: str = None + extension_count: float = None + total_floor_area: float = None + floor_height: float = None + hot_water_energy_eff: str = None + floor_energy_eff: str = None + windows_energy_eff: str = None + walls_energy_eff: str = None + sheating_energy_eff: str = None + roof_energy_eff: str = None + mainheat_energy_eff: str = None + mainheatc_energy_eff: str = None + lighting_energy_eff: str = None + potential_energy_efficiency: float = None + environment_impact_potential: float = None + energy_consumption_potential: float = None + co2_emissions_potential: float = None + lodgement_date: str = None + current_energy_efficiency: int = None + energy_consumption_current: int = None + co2_emissions_current: float = None # u_values_walls = None # u_values_roof = None @@ -107,7 +108,7 @@ class EPCRecord: # self.WALLS_DESCRIPTION = 'check' # Could also have cleaning of records if needed - if self.run_mode == "training": + if self.run_mode == "training": self.validation_configuration = EPCRecordValidationConfiguration # self._field_validation() return @@ -115,7 +116,7 @@ class EPCRecord: # We are running in newdata mode if self.epc_records is None: raise ValueError("Must provide epc records if running in newdata mode") - + self.prepared_epc = self.epc_records['original_epc'] self.original_epc = self.epc_records['original_epc'].copy() @@ -123,8 +124,8 @@ class EPCRecord: self.old_data = self.epc_records['old_data'] if self.cleaning_data is None: - raise ValueError("Must provide cleaning data if running in newdata mode") - + raise ValueError("Must provide cleaning data if running in newdata mode") + self._clean_records_using_epc_records() self._clean_with_data_processor() self._temp_uprn_catch() @@ -175,7 +176,6 @@ class EPCRecord: pd.to_datetime(lodgement_date) - pd.to_datetime(EARLIEST_EPC_DATE) ).dt.days - def _temp_uprn_catch(self): """ Catch the case we do now have uprn @@ -188,15 +188,14 @@ class EPCRecord: This method will clean the records using the data processor """ epc_data_processor = EPCDataProcessor( - data=self.epc_record_as_dataframe("prepared_epc"), - run_mode="newdata", + data=self.epc_record_as_dataframe("prepared_epc"), + run_mode="newdata", cleaning_averages=self.cleaning_data ) epc_data_processor.prepare_data() self.prepared_epc = epc_data_processor.data.to_dict(orient="records")[0] - def _expand_prepared_epc_to_attributes(self): """ This method will expand the prepared epc to attributes @@ -207,44 +206,44 @@ class EPCRecord: self.uprn: int = int(self.prepared_epc["uprn"]) self.walls_description: str = self.prepared_epc["walls_description"] - self.floor_description : str = self.prepared_epc["floor_description"] - self.lighting_description : str = self.prepared_epc["lighting_description"] - self.roof_description : str = self.prepared_epc["roof_description"] - self.mainheat_description : str = self.prepared_epc["mainheat_description"] - self.hotwater_description : str = self.prepared_epc["hotwater_description"] - self.main_fuel : str = self.prepared_epc["main_fuel"] - self.mechanical_ventilation : str = self.prepared_epc["mechanical_ventilation"] - self.secondheat_description : str = self.prepared_epc["secondheat_description"] - self.windows_description : str = self.prepared_epc["windows_description"] - self.glazed_type : str = self.prepared_epc["glazed_type"] - self.multi_glaze_proportion : float = float(self.prepared_epc["multi_glaze_proportion"]) - self.low_energy_lighting : float = float(self.prepared_epc["low_energy_lighting"]) - self.number_open_fireplaces : float = float(self.prepared_epc["number_open_fireplaces"]) - self.mainheatcont_description : str = self.prepared_epc["mainheatcont_description"] - self.solar_water_heating_flag : str = self.prepared_epc["solar_water_heating_flag"] - self.photo_supply : float = float(self.prepared_epc["photo_supply"]) - self.transaction_type : str = self.prepared_epc["transaction_type"] - self.energy_tariff : str = self.prepared_epc["energy_tariff"] - self.extension_count : float = float(self.prepared_epc["extension_count"]) - self.total_floor_area : float = float(self.prepared_epc["total_floor_area"]) - self.floor_height : float = float(self.prepared_epc["floor_height"]) - self.hot_water_energy_eff : str = self.prepared_epc["hot_water_energy_eff"] - self.floor_energy_eff : str = self.prepared_epc["floor_energy_eff"] - self.windows_energy_eff : str = self.prepared_epc["windows_energy_eff"] - self.walls_energy_eff : str = self.prepared_epc["walls_energy_eff"] - self.sheating_energy_eff : str = self.prepared_epc["sheating_energy_eff"] - self.roof_energy_eff : str = self.prepared_epc["roof_energy_eff"] - self.mainheat_energy_eff : str = self.prepared_epc["mainheat_energy_eff"] - self.mainheatc_energy_eff : str = self.prepared_epc["mainheatc_energy_eff"] - self.lighting_energy_eff : str = self.prepared_epc["lighting_energy_eff"] - self.potential_energy_efficiency : float = float(self.prepared_epc["potential_energy_efficiency"]) - self.environment_impact_potential : float = float(self.prepared_epc["environment_impact_potential"]) - self.energy_consumption_potential : float = float(self.prepared_epc["energy_consumption_potential"]) - self.co2_emissions_potential : float = float(self.prepared_epc["co2_emissions_potential"]) - self.lodgement_date : str = self.prepared_epc["lodgement_date"] - self.current_energy_efficiency : int = int(self.prepared_epc["current_energy_efficiency"]) - self.energy_consumption_current : int = int(self.prepared_epc["energy_consumption_current"]) - self.co2_emissions_current : float = float(self.prepared_epc["co2_emissions_current"]) + self.floor_description: str = self.prepared_epc["floor_description"] + self.lighting_description: str = self.prepared_epc["lighting_description"] + self.roof_description: str = self.prepared_epc["roof_description"] + self.mainheat_description: str = self.prepared_epc["mainheat_description"] + self.hotwater_description: str = self.prepared_epc["hotwater_description"] + self.main_fuel: str = self.prepared_epc["main_fuel"] + self.mechanical_ventilation: str = self.prepared_epc["mechanical_ventilation"] + self.secondheat_description: str = self.prepared_epc["secondheat_description"] + self.windows_description: str = self.prepared_epc["windows_description"] + self.glazed_type: str = self.prepared_epc["glazed_type"] + self.multi_glaze_proportion: float = float(self.prepared_epc["multi_glaze_proportion"]) + self.low_energy_lighting: float = float(self.prepared_epc["low_energy_lighting"]) + self.number_open_fireplaces: float = float(self.prepared_epc["number_open_fireplaces"]) + self.mainheatcont_description: str = self.prepared_epc["mainheatcont_description"] + self.solar_water_heating_flag: str = self.prepared_epc["solar_water_heating_flag"] + self.photo_supply: float = float(self.prepared_epc["photo_supply"]) + self.transaction_type: str = self.prepared_epc["transaction_type"] + self.energy_tariff: str = self.prepared_epc["energy_tariff"] + self.extension_count: float = float(self.prepared_epc["extension_count"]) + self.total_floor_area: float = float(self.prepared_epc["total_floor_area"]) + self.floor_height: float = float(self.prepared_epc["floor_height"]) + self.hot_water_energy_eff: str = self.prepared_epc["hot_water_energy_eff"] + self.floor_energy_eff: str = self.prepared_epc["floor_energy_eff"] + self.windows_energy_eff: str = self.prepared_epc["windows_energy_eff"] + self.walls_energy_eff: str = self.prepared_epc["walls_energy_eff"] + self.sheating_energy_eff: str = self.prepared_epc["sheating_energy_eff"] + self.roof_energy_eff: str = self.prepared_epc["roof_energy_eff"] + self.mainheat_energy_eff: str = self.prepared_epc["mainheat_energy_eff"] + self.mainheatc_energy_eff: str = self.prepared_epc["mainheatc_energy_eff"] + self.lighting_energy_eff: str = self.prepared_epc["lighting_energy_eff"] + self.potential_energy_efficiency: float = float(self.prepared_epc["potential_energy_efficiency"]) + self.environment_impact_potential: float = float(self.prepared_epc["environment_impact_potential"]) + self.energy_consumption_potential: float = float(self.prepared_epc["energy_consumption_potential"]) + self.co2_emissions_potential: float = float(self.prepared_epc["co2_emissions_potential"]) + self.lodgement_date: str = self.prepared_epc["lodgement_date"] + self.current_energy_efficiency: int = int(self.prepared_epc["current_energy_efficiency"]) + self.energy_consumption_current: int = int(self.prepared_epc["energy_consumption_current"]) + self.co2_emissions_current: float = float(self.prepared_epc["co2_emissions_current"]) def _identify_delta_between_prepared_and_original_records(self): """ @@ -258,14 +257,13 @@ class EPCRecord: same_index = df.apply(pd.Series.duplicated).any() self.prepared_epc_delta_metadata = df[same_index[~same_index].index] - def _expand_description_to_features(self): pass def _expand_description_to_uvalues(self): # TODO: can be loop over all the descriptions, or done in one pass - + # def _process_and_prune(self, cleaned_lookup: dict): # """ # This method will merge on the cleaned lookup table and ensure that the building fabric in the @@ -283,7 +281,7 @@ class EPCRecord: # left_on_ending = ( # f"{component}_ending" if component == "main-fuel" else f"{component}_description_ending" # ) - + # self.df2 = self.df.merge( # pd.DataFrame(cleaned_lookup[cleaned_key]), # how="left", @@ -296,7 +294,6 @@ class EPCRecord: # right_on="original_description", # suffixes=("", "_ending") # ) - def _clean_records_using_epc_records(self): """ @@ -328,20 +325,21 @@ class EPCRecord: # self._clean_energy_consumption_current() # self._clean_co2_emissions_current() - def epc_record_as_dataframe(self, epc_type: str = "prepared_epc", use_upper_columns: bool = True, replace_empty_string: bool = False): + def epc_record_as_dataframe(self, epc_type: str = "prepared_epc", use_upper_columns: bool = True, + replace_empty_string: bool = False): """ This method will return the dataframe representation of the epc record """ df = pd.DataFrame.from_dict(self.get(epc_type), orient="index").T if use_upper_columns: - df.columns = [x.upper().replace("-","_") for x in df.columns] + df.columns = [x.upper().replace("-", "_") for x in df.columns] if replace_empty_string: df = df.replace("", np.nan) return df - + def _clean_floor_level(self): """ This method will clean the floor level, if empty or invalid @@ -360,7 +358,7 @@ class EPCRecord: """ if not self.prepared_epc: raise ValueError("EPC Recrod doesn not contain epc data") - + if self.prepared_epc["fixed-lighting-outlets-count"] == "": # We check old EPCs and the full SAP EPC @@ -380,13 +378,15 @@ class EPCRecord: if lighting_data: self.prepared_epc["fixed-lighting-outlets-count"] = round(np.median(lighting_data)) else: - # Use averages from the cleaning dataset, based on the property type, built form, construction age band and local authority + # Use averages from the cleaning dataset, based on the property type, built form, construction age + # band and local authority cleaned_property_data = EPCDataProcessor.apply_averages_cleaning( data_to_clean=self.epc_record_as_dataframe("prepared_epc", replace_empty_string=True), cleaning_data=self.cleaning_data, cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'], ) - self.prepared_epc["fixed-lighting-outlets-count"] = round(cleaned_property_data["FIXED_LIGHTING_OUTLETS_COUNT"].values[0]) + self.prepared_epc["fixed-lighting-outlets-count"] = round( + cleaned_property_data["FIXED_LIGHTING_OUTLETS_COUNT"].values[0]) else: self.prepared_epc["fixed-lighting-outlets-count"] = float(self.prepared_epc["fixed-lighting-outlets-count"]) @@ -402,7 +402,8 @@ class EPCRecord: if self.construction_age_band is not None and self.construction_age_band not in DATA_ANOMALY_MATCHES: result = result[(result["CONSTRUCTION_AGE_BAND"] == self.construction_age_band)] - if self.prepared_epc["built-form"] not in DATA_ANOMALY_MATCHES and self.prepared_epc["built-form"] in result["BUILT_FORM"]: + if self.prepared_epc["built-form"] not in DATA_ANOMALY_MATCHES and self.prepared_epc["built-form"] in result[ + "BUILT_FORM"]: result = result[(result["BUILT_FORM"] == self.prepared_epc["built-form"])] return result[["NUMBER_HABITABLE_ROOMS", "TOTAL_FLOOR_AREA", "FLOOR_HEIGHT"]].mean() @@ -424,7 +425,8 @@ class EPCRecord: self.property_dimensions = self._filter_property_dimensions(property_dimensions) if not self.prepared_epc["number-habitable-rooms"]: - self.prepared_epc["number-habitable-rooms"] = float(self.property_dimensions["NUMBER_HABITABLE_ROOMS"].round()) + self.prepared_epc["number-habitable-rooms"] = float( + self.property_dimensions["NUMBER_HABITABLE_ROOMS"].round()) else: self.prepared_epc["number-habitable-rooms"] = float(self.prepared_epc["number-habitable-rooms"]) @@ -451,7 +453,7 @@ class EPCRecord: self.prepared_epc["total-floor-area"] = float(self.prepared_epc["total-floor-area"]) - def _clean_mains_gas(self): + def _clean_mains_gas(self): """ This method will clean the mains gas, if empty or invalid """ @@ -465,7 +467,7 @@ class EPCRecord: self.prepared_epc["mains-gas-flag"] = None if ( self.prepared_epc["mains-gas-flag"] == "" or self.prepared_epc["mains-gas-flag"] in DATA_ANOMALY_MATCHES - ) else map[self.prepared_epc["mains-gas-flag"]] + ) else map[self.prepared_epc["mains-gas-flag"]] def _clean_heat_loss_corridor(self): """ @@ -480,10 +482,14 @@ class EPCRecord: "heated corridor": False } - self.prepared_epc["heat-loss-corridor"] = False if self.prepared_epc["heat-loss-corridor"] in DATA_ANOMALY_MATCHES else map[self.prepared_epc["heat-loss-corridor"]] - - self.prepared_epc["unheated-corridor-length"] = float(self.prepared_epc["unheated-corridor-length"]) if self.prepared_epc["unheated-corridor-length"] != "" else None + self.prepared_epc["heat-loss-corridor"] = False if self.prepared_epc[ + "heat-loss-corridor"] in DATA_ANOMALY_MATCHES else map[ + self.prepared_epc["heat-loss-corridor"]] + self.prepared_epc["unheated-corridor-length"] = ( + float(self.prepared_epc["unheated-corridor-length"]) if + self.prepared_epc["unheated-corridor-length"] != "" else None + ) def _clean_count_variables(self): """ @@ -502,8 +508,6 @@ class EPCRecord: null_attributes = ["number_of_storeys", "number_of_rooms"] for attribute, epc_field in fields.items(): - # TODO: check this - # value = self.data["extension-count"] value = self.prepared_epc[epc_field] if value == "" or value in DATA_ANOMALY_MATCHES: if attribute in null_attributes: @@ -522,7 +526,8 @@ class EPCRecord: if not self.prepared_epc: raise ValueError("EPC Recrod doesn not contain epc data") - self.prepared_epc['wind-turbine-count'] = int(self.prepared_epc['wind-turbine-count']) if self.prepared_epc['wind-turbine-count'] != "" else None + self.prepared_epc['wind-turbine-count'] = int(self.prepared_epc['wind-turbine-count']) if self.prepared_epc[ + 'wind-turbine-count'] != "" else None def _clean_solar_hot_water(self): """ @@ -530,7 +535,7 @@ class EPCRecord: """ if not self.prepared_epc: raise ValueError("EPC Recrod doesn not contain epc data") - + value_map = { "Y": True, "N": False, @@ -546,7 +551,9 @@ class EPCRecord: if not self.prepared_epc: raise ValueError("EPC Recrod doesn not contain epc data") - self.prepared_epc['photo-supply'] = float(self.prepared_epc['photo-supply']) if self.prepared_epc['photo-supply'] != "" else None + self.prepared_epc['photo-supply'] = float(self.prepared_epc['photo-supply']) if self.prepared_epc[ + 'photo-supply'] != "" \ + else None def _clean_energy(self): """ @@ -558,7 +565,6 @@ class EPCRecord: self.prepared_epc['energy-consumption-current'] = float(self.prepared_epc["energy-consumption-current"]) self.prepared_epc['co2-emissions-current'] = float(self.prepared_epc["co2-emissions-current"]) - def _clean_built_form(self): """ This method will clean the build form, if empty or invalid @@ -566,7 +572,8 @@ class EPCRecord: if not self.prepared_epc: raise ValueError("EPC Recrod doesn not contain epc data") - self.prepared_epc['built-form'] = BUILT_FORM_REMAP.get(self.prepared_epc["built-form"], self.prepared_epc["built-form"]) + self.prepared_epc['built-form'] = BUILT_FORM_REMAP.get(self.prepared_epc["built-form"], + self.prepared_epc["built-form"]) if self.prepared_epc["built-form"] in DATA_ANOMALY_MATCHES: if self.prepared_epc["property-type"] == "Flat": self.prepared_epc["built-form"] = "Semi-Detached" @@ -578,7 +585,8 @@ class EPCRecord: if not self.prepared_epc: raise ValueError("EPC Recrod doesn not contain epc data") - self.construction_age_band = EPCDataProcessor.clean_construction_age_band(self.prepared_epc["construction-age-band"]) + self.construction_age_band = EPCDataProcessor.clean_construction_age_band( + self.prepared_epc["construction-age-band"]) if self.construction_age_band in DATA_ANOMALY_MATCHES: if self.old_data: # Take the most recent @@ -586,7 +594,8 @@ class EPCRecord: [old_record["lodgement-datetime"] for old_record in self.old_data if old_record["construction-age-band"] not in DATA_ANOMALY_MATCHES] ) - most_recent = [old_record for old_record in self.old_data if old_record["lodgement-datetime"] == max_datetime] + most_recent = [old_record for old_record in self.old_data if + old_record["lodgement-datetime"] == max_datetime] self.construction_age_band = EPCDataProcessor.clean_construction_age_band( most_recent[0]["construction-age-band"] @@ -625,14 +634,15 @@ class EPCRecord: """ This method will clean the ventilation, if empty or invalid """ - self.prepared_epc['mechanical-ventilation'] = None if (self.mechanical_ventilation == "" or self.mechanical_ventilation in DATA_ANOMALY_MATCHES) else self.mechanical_ventilation - + self.prepared_epc['mechanical-ventilation'] = None if ( + self.mechanical_ventilation == "" or self.mechanical_ventilation in DATA_ANOMALY_MATCHES) else ( + self.mechanical_ventilation) def _field_validation(self): """ This method will validate each of the fields in the EPC record """ - + for record_key, validation_config in self.validation_configuration.items(): # Get the variable named record key from self field_value = self.__dict__[record_key] @@ -650,81 +660,89 @@ class EPCRecord: """ if not isinstance(field_value, str): raise ValueError(f"Field {record_key} has value {field_value} which is not a string") - + if 'function' in validation_config: try: validation_config['function'](field_value) except: - raise ValueError(f"Field {record_key} has value {field_value} which does not pass the validation function {validation_config['function']}") + raise ValueError( + f"Field {record_key} has value {field_value} which does not pass the validation function " + f"{validation_config['function']}") if validation_config['acceptable_values'] is not None: if field_value not in validation_config['acceptable_values']: - raise ValueError(f"Field {record_key} has value {field_value} which is not in the acceptable values of {validation_config['acceptable_values']}") - + raise ValueError( + f"Field {record_key} has value {field_value} which is not in the acceptable values of " + f"{validation_config['acceptable_values']}") + def _validate_float(self, record_key: str, field_value: Union[str, float], validation_config: dict): """ Validate a float field """ if not isinstance(field_value, float): raise ValueError(f"Field {record_key} has value {field_value} which is not a float") - + if 'function' in validation_config: try: validation_config['function'](field_value) except: - raise ValueError(f"Field {record_key} has value {field_value} which does not pass the validation function {validation_config['function']}") - + raise ValueError( + f"Field {record_key} has value {field_value} which does not pass the validation function " + f"{validation_config['function']}") + if validation_config['range'] is not None: if field_value < validation_config['range'][0] or field_value > validation_config['range'][1]: - raise ValueError(f"Field {record_key} has value {field_value} which is not in the acceptable range of {validation_config['range']}") - + raise ValueError( + f"Field {record_key} has value {field_value} which is not in the acceptable range of " + f"{validation_config['range']}") + def __sub__(self, other): """ This method will return the difference between two EPC records """ if not isinstance(other, EPCRecord): raise ValueError("Can only subtract EPCRecord from EPCRecord") - + difference_record = EPCDifferenceRecord(record1=self, record2=other, auto_sort=True) - + return difference_record - + def __gt__(self, other): """ This method will return True if the EPC record is greater than or equal to the other """ if not isinstance(other, EPCRecord): raise ValueError("Can only compare EPCRecord to EPCRecord") - + return self.__dict__[RDSAP_RESPONSE] > other.__dict__[RDSAP_RESPONSE] - + def __ge__(self, other): """ This method will return True if the EPC record is greater than or equal to the other """ if not isinstance(other, EPCRecord): raise ValueError("Can only compare EPCRecord to EPCRecord") - + return self.__dict__[RDSAP_RESPONSE] >= other.__dict__[RDSAP_RESPONSE] - + def __lt__(self, other): """ This method will return True if the EPC record is greater than or equal to the other """ if not isinstance(other, EPCRecord): raise ValueError("Can only compare EPCRecord to EPCRecord") - + return self.__dict__[RDSAP_RESPONSE] < other.__dict__[RDSAP_RESPONSE] - + def __le__(self, other): """ This method will return True if the EPC record is greater than or equal to the other """ if not isinstance(other, EPCRecord): raise ValueError("Can only compare EPCRecord to EPCRecord") - + return self.__dict__[RDSAP_RESPONSE] <= other.__dict__[RDSAP_RESPONSE] - + def get(self, key: Union[str, List[str]], return_asdict: bool = False, key_suffix: str | None = None) -> Any: """ This method will return the value of the key @@ -738,8 +756,8 @@ class EPCRecord: if isinstance(key, list): return [self.__dict__[x] if x in self.__dict__.keys() else None for x in key] elif isinstance(key, str): - return self.__dict__[key] if key in self.__dict__.keys() else None - + return self.__dict__[key] if key in self.__dict__.keys() else None + class EPCDifferenceRecord: """ @@ -767,7 +785,6 @@ class EPCDifferenceRecord: self._validate_difference_record() # self._detect_fabric_consistency() - def _construct_difference_record(self): """ This method will construct the difference record between the two records @@ -778,8 +795,10 @@ class EPCDifferenceRecord: carbon_change = self.record2.get(CARBON_RESPONSE) - self.record1.get(CARBON_RESPONSE) component_variables = COMPONENT_FEATURES + EFFICIENCY_FEATURES - ending_record = self.record2.get(component_variables + ["lodgement_date"], return_asdict=True, key_suffix="_ending") - starting_record = self.record1.get(component_variables + ["lodgement_date"], return_asdict=True, key_suffix="_starting") + ending_record = self.record2.get(component_variables + ["lodgement_date"], return_asdict=True, + key_suffix="_ending") + starting_record = self.record1.get(component_variables + ["lodgement_date"], return_asdict=True, + key_suffix="_starting") self.difference_record = { "uprn": self.record1.get("uprn"), @@ -812,30 +831,30 @@ class EPCDifferenceRecord: # if value < 0: # raise ValueError(f"Difference record has negative value for {key}") pass - + def compare_fields_in_records(self, fields: List[str]): """ This method will compare the records, for specific fields """ - + all_equal = True for field in fields: if self.record1.get(field) != self.record2.get(field): return False - + if all_equal: return True - + def get(self, key: str): """ This method will return the value of the key """ - return self.difference_record[key] if key in self.difference_record.keys() else None + return self.difference_record[key] if key in self.difference_record.keys() else None def append_fixed_data(self, fixed_data: dict): """ This method will append fixed data to the difference record - """ + """ self._validate_fixed_data(fixed_data) self.difference_record.update(fixed_data)