diff --git a/etl/epc/Pipeline.py b/etl/epc/Pipeline.py index 3a078703..47cddeb0 100644 --- a/etl/epc/Pipeline.py +++ b/etl/epc/Pipeline.py @@ -39,7 +39,7 @@ VARIABLE_DATA_FEATURES = ( COMPONENT_FEATURES + ROOM_FEATURES + EFFICIENCY_FEATURES - + POTENTIAL_COLUMNS + # + POTENTIAL_COLUMNS + ["lodgement_date", RDSAP_RESPONSE, HEAT_DEMAND_RESPONSE, CARBON_RESPONSE] ) @@ -66,14 +66,10 @@ clean_lookup = get_cleaned_description_mapping() # TODO: THIS IS A TEMPORARY FIX new_walls_description_mapping = pd.DataFrame(clean_lookup["walls-description"]) - -import numpy as np - -new_walls_description_mapping["thermal_transmittance_unit"] = np.where( - ~pd.isnull(new_walls_description_mapping["thermal_transmittance_unit"]), - "w/m-¦k", - new_walls_description_mapping["thermal_transmittance_unit"], -) +new_walls_description_mapping.loc[ + ~new_walls_description_mapping["thermal_transmittance_unit"].isnull(), + "thermal_transmittance_unit", +] = "w/m-¦k" clean_lookup["walls-description"] = new_walls_description_mapping.to_dict( orient="records" diff --git a/etl/epc/Record.py b/etl/epc/Record.py index 9a965c6a..9b69c33a 100644 --- a/etl/epc/Record.py +++ b/etl/epc/Record.py @@ -76,10 +76,10 @@ class EPCRecord: mainheat_energy_eff: str = None mainheatc_energy_eff: str = None lighting_energy_eff: str = None - potential_energy_efficiency: float = None - environment_impact_potential: float = None - energy_consumption_potential: float = None - co2_emissions_potential: float = None + # potential_energy_efficiency: float = None + # environment_impact_potential: float = None + # energy_consumption_potential: float = None + # co2_emissions_potential: float = None lodgement_date: str = None current_energy_efficiency: int = None energy_consumption_current: int = None @@ -249,18 +249,18 @@ class EPCRecord: self.mainheat_energy_eff: str = self.prepared_epc["mainheat_energy_eff"] self.mainheatc_energy_eff: str = self.prepared_epc["mainheatc_energy_eff"] self.lighting_energy_eff: str = self.prepared_epc["lighting_energy_eff"] - self.potential_energy_efficiency: float = float( - self.prepared_epc["potential_energy_efficiency"] - ) - self.environment_impact_potential: float = float( - self.prepared_epc["environment_impact_potential"] - ) - self.energy_consumption_potential: float = float( - self.prepared_epc["energy_consumption_potential"] - ) - self.co2_emissions_potential: float = float( - self.prepared_epc["co2_emissions_potential"] - ) + # self.potential_energy_efficiency: float = float( + # self.prepared_epc["potential_energy_efficiency"] + # ) + # self.environment_impact_potential: float = float( + # self.prepared_epc["environment_impact_potential"] + # ) + # self.energy_consumption_potential: float = float( + # self.prepared_epc["energy_consumption_potential"] + # ) + # self.co2_emissions_potential: float = float( + # self.prepared_epc["co2_emissions_potential"] + # ) self.lodgement_date: str = self.prepared_epc["lodgement_date"] self.current_energy_efficiency: int = int( self.prepared_epc["current_energy_efficiency"] @@ -466,9 +466,7 @@ class EPCRecord: (property_dimensions["PROPERTY_TYPE"] == self.prepared_epc["property-type"]) ] - if ( - self.construction_age_band not in DATA_ANOMALY_MATCHES - ): + if self.construction_age_band not in DATA_ANOMALY_MATCHES: result = result[ (result["CONSTRUCTION_AGE_BAND"] == self.construction_age_band) ] @@ -480,7 +478,12 @@ class EPCRecord: result = result[(result["BUILT_FORM"] == self.prepared_epc["built-form"])] return result[ - ["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS", "TOTAL_FLOOR_AREA", "FLOOR_HEIGHT"] + [ + "NUMBER_HABITABLE_ROOMS", + "NUMBER_HEATED_ROOMS", + "TOTAL_FLOOR_AREA", + "FLOOR_HEIGHT", + ] ].mean() def _clean_property_dimensions(self): @@ -491,9 +494,11 @@ class EPCRecord: if not self.prepared_epc: raise ValueError("EPC Record doesn not contain epc data") - if (self.prepared_epc["number-habitable-rooms"] in DATA_ANOMALY_MATCHES) or ( - self.prepared_epc["floor-height"] in DATA_ANOMALY_MATCHES - ) or (self.prepared_epc["number-heated-rooms"] in DATA_ANOMALY_MATCHES): + if ( + (self.prepared_epc["number-habitable-rooms"] in DATA_ANOMALY_MATCHES) + or (self.prepared_epc["floor-height"] in DATA_ANOMALY_MATCHES) + or (self.prepared_epc["number-heated-rooms"] in DATA_ANOMALY_MATCHES) + ): property_dimensions = read_dataframe_from_s3_parquet( bucket_name=DATA_BUCKET, file_key=f"property_dimensions/{self.prepared_epc['local-authority']}.parquet", @@ -507,12 +512,18 @@ class EPCRecord: self.property_dimensions["NUMBER_HABITABLE_ROOMS"].round() ) else: - self.prepared_epc["number-habitable-rooms"] = float(self.prepared_epc["number-habitable-rooms"]) + self.prepared_epc["number-habitable-rooms"] = float( + self.prepared_epc["number-habitable-rooms"] + ) if self.prepared_epc["number-heated-rooms"] in DATA_ANOMALY_MATCHES: - self.prepared_epc["number-heated-rooms"] = float(self.property_dimensions["NUMBER_HEATED_ROOMS"].round()) + self.prepared_epc["number-heated-rooms"] = float( + self.property_dimensions["NUMBER_HEATED_ROOMS"].round() + ) else: - self.prepared_epc["number-heated-rooms"] = float(self.prepared_epc["number-heated-rooms"]) + self.prepared_epc["number-heated-rooms"] = float( + self.prepared_epc["number-heated-rooms"] + ) self.number_of_floors = estimate_number_of_floors( self.prepared_epc["property-type"] @@ -1033,18 +1044,18 @@ class EPCDifferenceRecord: "heat_demand_ending": self.record2.get(HEAT_DEMAND_RESPONSE), "carbon_starting": self.record1.get(CARBON_RESPONSE), "carbon_ending": self.record2.get(CARBON_RESPONSE), - "potential_energy_efficiency": self.earliest_record.get( - "potential_energy_efficiency" - ), - "environment_impact_potential": self.earliest_record.get( - "environment_impact_potential" - ), - "energy_consumption_potential": self.earliest_record.get( - "energy_consumption_potential" - ), - "co2_emissions_potential": self.earliest_record.get( - "co2_emissions_potential" - ), + # "potential_energy_efficiency": self.earliest_record.get( + # "potential_energy_efficiency" + # ), + # "environment_impact_potential": self.earliest_record.get( + # "environment_impact_potential" + # ), + # "energy_consumption_potential": self.earliest_record.get( + # "energy_consumption_potential" + # ), + # "co2_emissions_potential": self.earliest_record.get( + # "co2_emissions_potential" + # ), **ending_record, **starting_record, }