Merge pull request #312 from Hestia-Homes/etl-michael-cost

Etl michael cost
This commit is contained in:
KhalimCK 2024-07-05 12:15:59 +01:00 committed by GitHub
commit 2208a3d74d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 75 additions and 48 deletions

View file

@ -22,6 +22,7 @@ from etl.epc.settings import (
EFFICIENCY_FEATURES,
POTENTIAL_COLUMNS,
ROOM_FEATURES,
COST_FEATURES,
)
# TODO: change in setting file
@ -39,9 +40,10 @@ VARIABLE_DATA_FEATURES = (
COMPONENT_FEATURES
+ ROOM_FEATURES
+ EFFICIENCY_FEATURES
+ POTENTIAL_COLUMNS
# + POTENTIAL_COLUMNS
+ ["lodgement_date", RDSAP_RESPONSE, HEAT_DEMAND_RESPONSE, CARBON_RESPONSE]
)
COST_FEATURES = [x.lower() for x in COST_FEATURES]
def get_cleaned_description_mapping():
@ -66,14 +68,10 @@ clean_lookup = get_cleaned_description_mapping()
# TODO: THIS IS A TEMPORARY FIX
new_walls_description_mapping = pd.DataFrame(clean_lookup["walls-description"])
import numpy as np
new_walls_description_mapping["thermal_transmittance_unit"] = np.where(
~pd.isnull(new_walls_description_mapping["thermal_transmittance_unit"]),
"w/m-¦k",
new_walls_description_mapping["thermal_transmittance_unit"],
)
new_walls_description_mapping.loc[
~new_walls_description_mapping["thermal_transmittance_unit"].isnull(),
"thermal_transmittance_unit",
] = "w/m-¦k"
clean_lookup["walls-description"] = new_walls_description_mapping.to_dict(
orient="records"
@ -282,7 +280,7 @@ class EPCPipeline:
# We include the lodgement date here as we probably need to factor time into the
# model, since EPC standards and rigour have changed over time
variable_data = property_data[VARIABLE_DATA_FEATURES]
variable_data = property_data[VARIABLE_DATA_FEATURES + COST_FEATURES]
uprn = str(uprn)
epc_records = [

View file

@ -76,10 +76,13 @@ class EPCRecord:
mainheat_energy_eff: str = None
mainheatc_energy_eff: str = None
lighting_energy_eff: str = None
potential_energy_efficiency: float = None
environment_impact_potential: float = None
energy_consumption_potential: float = None
co2_emissions_potential: float = None
lighting_cost_current: float = None
heating_cost_current: float = None
hot_water_cost_current: float = None
# potential_energy_efficiency: float = None
# environment_impact_potential: float = None
# energy_consumption_potential: float = None
# co2_emissions_potential: float = None
lodgement_date: str = None
current_energy_efficiency: int = None
energy_consumption_current: int = None
@ -249,18 +252,21 @@ class EPCRecord:
self.mainheat_energy_eff: str = self.prepared_epc["mainheat_energy_eff"]
self.mainheatc_energy_eff: str = self.prepared_epc["mainheatc_energy_eff"]
self.lighting_energy_eff: str = self.prepared_epc["lighting_energy_eff"]
self.potential_energy_efficiency: float = float(
self.prepared_epc["potential_energy_efficiency"]
)
self.environment_impact_potential: float = float(
self.prepared_epc["environment_impact_potential"]
)
self.energy_consumption_potential: float = float(
self.prepared_epc["energy_consumption_potential"]
)
self.co2_emissions_potential: float = float(
self.prepared_epc["co2_emissions_potential"]
)
self.lighting_cost_current: float = self.prepared_epc["lighting_cost_current"]
self.heating_cost_current: float = self.prepared_epc["heating_cost_current"]
self.hot_water_cost_current: float = self.prepared_epc["hot_water_cost_current"]
# self.potential_energy_efficiency: float = float(
# self.prepared_epc["potential_energy_efficiency"]
# )
# self.environment_impact_potential: float = float(
# self.prepared_epc["environment_impact_potential"]
# )
# self.energy_consumption_potential: float = float(
# self.prepared_epc["energy_consumption_potential"]
# )
# self.co2_emissions_potential: float = float(
# self.prepared_epc["co2_emissions_potential"]
# )
self.lodgement_date: str = self.prepared_epc["lodgement_date"]
self.current_energy_efficiency: int = int(
self.prepared_epc["current_energy_efficiency"]
@ -466,9 +472,7 @@ class EPCRecord:
(property_dimensions["PROPERTY_TYPE"] == self.prepared_epc["property-type"])
]
if (
self.construction_age_band not in DATA_ANOMALY_MATCHES
):
if self.construction_age_band not in DATA_ANOMALY_MATCHES:
result = result[
(result["CONSTRUCTION_AGE_BAND"] == self.construction_age_band)
]
@ -480,7 +484,12 @@ class EPCRecord:
result = result[(result["BUILT_FORM"] == self.prepared_epc["built-form"])]
return result[
["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS", "TOTAL_FLOOR_AREA", "FLOOR_HEIGHT"]
[
"NUMBER_HABITABLE_ROOMS",
"NUMBER_HEATED_ROOMS",
"TOTAL_FLOOR_AREA",
"FLOOR_HEIGHT",
]
].mean()
def _clean_property_dimensions(self):
@ -491,9 +500,11 @@ class EPCRecord:
if not self.prepared_epc:
raise ValueError("EPC Record doesn not contain epc data")
if (self.prepared_epc["number-habitable-rooms"] in DATA_ANOMALY_MATCHES) or (
self.prepared_epc["floor-height"] in DATA_ANOMALY_MATCHES
) or (self.prepared_epc["number-heated-rooms"] in DATA_ANOMALY_MATCHES):
if (
(self.prepared_epc["number-habitable-rooms"] in DATA_ANOMALY_MATCHES)
or (self.prepared_epc["floor-height"] in DATA_ANOMALY_MATCHES)
or (self.prepared_epc["number-heated-rooms"] in DATA_ANOMALY_MATCHES)
):
property_dimensions = read_dataframe_from_s3_parquet(
bucket_name=DATA_BUCKET,
file_key=f"property_dimensions/{self.prepared_epc['local-authority']}.parquet",
@ -507,12 +518,18 @@ class EPCRecord:
self.property_dimensions["NUMBER_HABITABLE_ROOMS"].round()
)
else:
self.prepared_epc["number-habitable-rooms"] = float(self.prepared_epc["number-habitable-rooms"])
self.prepared_epc["number-habitable-rooms"] = float(
self.prepared_epc["number-habitable-rooms"]
)
if self.prepared_epc["number-heated-rooms"] in DATA_ANOMALY_MATCHES:
self.prepared_epc["number-heated-rooms"] = float(self.property_dimensions["NUMBER_HEATED_ROOMS"].round())
self.prepared_epc["number-heated-rooms"] = float(
self.property_dimensions["NUMBER_HEATED_ROOMS"].round()
)
else:
self.prepared_epc["number-heated-rooms"] = float(self.prepared_epc["number-heated-rooms"])
self.prepared_epc["number-heated-rooms"] = float(
self.prepared_epc["number-heated-rooms"]
)
self.number_of_floors = estimate_number_of_floors(
self.prepared_epc["property-type"]
@ -1033,18 +1050,24 @@ class EPCDifferenceRecord:
"heat_demand_ending": self.record2.get(HEAT_DEMAND_RESPONSE),
"carbon_starting": self.record1.get(CARBON_RESPONSE),
"carbon_ending": self.record2.get(CARBON_RESPONSE),
"potential_energy_efficiency": self.earliest_record.get(
"potential_energy_efficiency"
),
"environment_impact_potential": self.earliest_record.get(
"environment_impact_potential"
),
"energy_consumption_potential": self.earliest_record.get(
"energy_consumption_potential"
),
"co2_emissions_potential": self.earliest_record.get(
"co2_emissions_potential"
),
"lighting_cost_starting": self.record1.get("lighting_cost_current"),
"lighting_cost_ending": self.record2.get("lighting_cost_current"),
"heating_cost_starting": self.record1.get("heating_cost_current"),
"heating_cost_ending": self.record2.get("heating_cost_current"),
"hot_water_cost_starting": self.record1.get("hot_water_cost_current"),
"hot_water_cost_ending": self.record2.get("hot_water_cost_current"),
# "potential_energy_efficiency": self.earliest_record.get(
# "potential_energy_efficiency"
# ),
# "environment_impact_potential": self.earliest_record.get(
# "environment_impact_potential"
# ),
# "energy_consumption_potential": self.earliest_record.get(
# "energy_consumption_potential"
# ),
# "co2_emissions_potential": self.earliest_record.get(
# "co2_emissions_potential"
# ),
**ending_record,
**starting_record,
}

View file

@ -110,6 +110,12 @@ DEPLOYMENT_FOLDER = "deployment"
TOTAL_FLOOR_AREA_NATIONAL_AVERAGE = 70
FLOOR_HEIGHT_NATIONAL_AVERAGE = 2.45
COST_FEATURES = [
"LIGHTING_COST_CURRENT",
"HEATING_COST_CURRENT",
"HOT_WATER_COST_CURRENT",
]
AVERAGE_FIXED_FEATURES = [
"TOTAL_FLOOR_AREA",
"FLOOR_HEIGHT",