diff --git a/etl/epc/Pipeline.py b/etl/epc/Pipeline.py index 3a078703..bc3bfd91 100644 --- a/etl/epc/Pipeline.py +++ b/etl/epc/Pipeline.py @@ -22,6 +22,7 @@ from etl.epc.settings import ( EFFICIENCY_FEATURES, POTENTIAL_COLUMNS, ROOM_FEATURES, + COST_FEATURES, ) # TODO: change in setting file @@ -39,9 +40,10 @@ VARIABLE_DATA_FEATURES = ( COMPONENT_FEATURES + ROOM_FEATURES + EFFICIENCY_FEATURES - + POTENTIAL_COLUMNS + # + POTENTIAL_COLUMNS + ["lodgement_date", RDSAP_RESPONSE, HEAT_DEMAND_RESPONSE, CARBON_RESPONSE] ) +COST_FEATURES = [x.lower() for x in COST_FEATURES] def get_cleaned_description_mapping(): @@ -66,14 +68,10 @@ clean_lookup = get_cleaned_description_mapping() # TODO: THIS IS A TEMPORARY FIX new_walls_description_mapping = pd.DataFrame(clean_lookup["walls-description"]) - -import numpy as np - -new_walls_description_mapping["thermal_transmittance_unit"] = np.where( - ~pd.isnull(new_walls_description_mapping["thermal_transmittance_unit"]), - "w/m-¦k", - new_walls_description_mapping["thermal_transmittance_unit"], -) +new_walls_description_mapping.loc[ + ~new_walls_description_mapping["thermal_transmittance_unit"].isnull(), + "thermal_transmittance_unit", +] = "w/m-¦k" clean_lookup["walls-description"] = new_walls_description_mapping.to_dict( orient="records" @@ -282,7 +280,7 @@ class EPCPipeline: # We include the lodgement date here as we probably need to factor time into the # model, since EPC standards and rigour have changed over time - variable_data = property_data[VARIABLE_DATA_FEATURES] + variable_data = property_data[VARIABLE_DATA_FEATURES + COST_FEATURES] uprn = str(uprn) epc_records = [ diff --git a/etl/epc/Record.py b/etl/epc/Record.py index 9a965c6a..b8471ccf 100644 --- a/etl/epc/Record.py +++ b/etl/epc/Record.py @@ -76,10 +76,13 @@ class EPCRecord: mainheat_energy_eff: str = None mainheatc_energy_eff: str = None lighting_energy_eff: str = None - potential_energy_efficiency: float = None - environment_impact_potential: float = None - energy_consumption_potential: float = None - co2_emissions_potential: float = None + lighting_cost_current: float = None + heating_cost_current: float = None + hot_water_cost_current: float = None + # potential_energy_efficiency: float = None + # environment_impact_potential: float = None + # energy_consumption_potential: float = None + # co2_emissions_potential: float = None lodgement_date: str = None current_energy_efficiency: int = None energy_consumption_current: int = None @@ -249,18 +252,21 @@ class EPCRecord: self.mainheat_energy_eff: str = self.prepared_epc["mainheat_energy_eff"] self.mainheatc_energy_eff: str = self.prepared_epc["mainheatc_energy_eff"] self.lighting_energy_eff: str = self.prepared_epc["lighting_energy_eff"] - self.potential_energy_efficiency: float = float( - self.prepared_epc["potential_energy_efficiency"] - ) - self.environment_impact_potential: float = float( - self.prepared_epc["environment_impact_potential"] - ) - self.energy_consumption_potential: float = float( - self.prepared_epc["energy_consumption_potential"] - ) - self.co2_emissions_potential: float = float( - self.prepared_epc["co2_emissions_potential"] - ) + self.lighting_cost_current: float = self.prepared_epc["lighting_cost_current"] + self.heating_cost_current: float = self.prepared_epc["heating_cost_current"] + self.hot_water_cost_current: float = self.prepared_epc["hot_water_cost_current"] + # self.potential_energy_efficiency: float = float( + # self.prepared_epc["potential_energy_efficiency"] + # ) + # self.environment_impact_potential: float = float( + # self.prepared_epc["environment_impact_potential"] + # ) + # self.energy_consumption_potential: float = float( + # self.prepared_epc["energy_consumption_potential"] + # ) + # self.co2_emissions_potential: float = float( + # self.prepared_epc["co2_emissions_potential"] + # ) self.lodgement_date: str = self.prepared_epc["lodgement_date"] self.current_energy_efficiency: int = int( self.prepared_epc["current_energy_efficiency"] @@ -466,9 +472,7 @@ class EPCRecord: (property_dimensions["PROPERTY_TYPE"] == self.prepared_epc["property-type"]) ] - if ( - self.construction_age_band not in DATA_ANOMALY_MATCHES - ): + if self.construction_age_band not in DATA_ANOMALY_MATCHES: result = result[ (result["CONSTRUCTION_AGE_BAND"] == self.construction_age_band) ] @@ -480,7 +484,12 @@ class EPCRecord: result = result[(result["BUILT_FORM"] == self.prepared_epc["built-form"])] return result[ - ["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS", "TOTAL_FLOOR_AREA", "FLOOR_HEIGHT"] + [ + "NUMBER_HABITABLE_ROOMS", + "NUMBER_HEATED_ROOMS", + "TOTAL_FLOOR_AREA", + "FLOOR_HEIGHT", + ] ].mean() def _clean_property_dimensions(self): @@ -491,9 +500,11 @@ class EPCRecord: if not self.prepared_epc: raise ValueError("EPC Record doesn not contain epc data") - if (self.prepared_epc["number-habitable-rooms"] in DATA_ANOMALY_MATCHES) or ( - self.prepared_epc["floor-height"] in DATA_ANOMALY_MATCHES - ) or (self.prepared_epc["number-heated-rooms"] in DATA_ANOMALY_MATCHES): + if ( + (self.prepared_epc["number-habitable-rooms"] in DATA_ANOMALY_MATCHES) + or (self.prepared_epc["floor-height"] in DATA_ANOMALY_MATCHES) + or (self.prepared_epc["number-heated-rooms"] in DATA_ANOMALY_MATCHES) + ): property_dimensions = read_dataframe_from_s3_parquet( bucket_name=DATA_BUCKET, file_key=f"property_dimensions/{self.prepared_epc['local-authority']}.parquet", @@ -507,12 +518,18 @@ class EPCRecord: self.property_dimensions["NUMBER_HABITABLE_ROOMS"].round() ) else: - self.prepared_epc["number-habitable-rooms"] = float(self.prepared_epc["number-habitable-rooms"]) + self.prepared_epc["number-habitable-rooms"] = float( + self.prepared_epc["number-habitable-rooms"] + ) if self.prepared_epc["number-heated-rooms"] in DATA_ANOMALY_MATCHES: - self.prepared_epc["number-heated-rooms"] = float(self.property_dimensions["NUMBER_HEATED_ROOMS"].round()) + self.prepared_epc["number-heated-rooms"] = float( + self.property_dimensions["NUMBER_HEATED_ROOMS"].round() + ) else: - self.prepared_epc["number-heated-rooms"] = float(self.prepared_epc["number-heated-rooms"]) + self.prepared_epc["number-heated-rooms"] = float( + self.prepared_epc["number-heated-rooms"] + ) self.number_of_floors = estimate_number_of_floors( self.prepared_epc["property-type"] @@ -1033,18 +1050,24 @@ class EPCDifferenceRecord: "heat_demand_ending": self.record2.get(HEAT_DEMAND_RESPONSE), "carbon_starting": self.record1.get(CARBON_RESPONSE), "carbon_ending": self.record2.get(CARBON_RESPONSE), - "potential_energy_efficiency": self.earliest_record.get( - "potential_energy_efficiency" - ), - "environment_impact_potential": self.earliest_record.get( - "environment_impact_potential" - ), - "energy_consumption_potential": self.earliest_record.get( - "energy_consumption_potential" - ), - "co2_emissions_potential": self.earliest_record.get( - "co2_emissions_potential" - ), + "lighting_cost_starting": self.record1.get("lighting_cost_current"), + "lighting_cost_ending": self.record2.get("lighting_cost_current"), + "heating_cost_starting": self.record1.get("heating_cost_current"), + "heating_cost_ending": self.record2.get("heating_cost_current"), + "hot_water_cost_starting": self.record1.get("hot_water_cost_current"), + "hot_water_cost_ending": self.record2.get("hot_water_cost_current"), + # "potential_energy_efficiency": self.earliest_record.get( + # "potential_energy_efficiency" + # ), + # "environment_impact_potential": self.earliest_record.get( + # "environment_impact_potential" + # ), + # "energy_consumption_potential": self.earliest_record.get( + # "energy_consumption_potential" + # ), + # "co2_emissions_potential": self.earliest_record.get( + # "co2_emissions_potential" + # ), **ending_record, **starting_record, } diff --git a/etl/epc/settings.py b/etl/epc/settings.py index 18dbaa7c..a814750f 100644 --- a/etl/epc/settings.py +++ b/etl/epc/settings.py @@ -110,6 +110,12 @@ DEPLOYMENT_FOLDER = "deployment" TOTAL_FLOOR_AREA_NATIONAL_AVERAGE = 70 FLOOR_HEIGHT_NATIONAL_AVERAGE = 2.45 +COST_FEATURES = [ + "LIGHTING_COST_CURRENT", + "HEATING_COST_CURRENT", + "HOT_WATER_COST_CURRENT", +] + AVERAGE_FIXED_FEATURES = [ "TOTAL_FLOOR_AREA", "FLOOR_HEIGHT",