mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
removing data
This commit is contained in:
parent
f45260706e
commit
84d4263d9a
6 changed files with 84 additions and 81 deletions
|
|
@ -182,8 +182,8 @@ class GoogleSolarApi:
|
||||||
self.exclude_north_facing_segments(property_instance=property_instance)
|
self.exclude_north_facing_segments(property_instance=property_instance)
|
||||||
# If a property is semi-detached, it's possible for us to include segments from an attached unit
|
# If a property is semi-detached, it's possible for us to include segments from an attached unit
|
||||||
if property_instance is not None:
|
if property_instance is not None:
|
||||||
if (property_instance.data["built-form"] == "Semi-Detached") and (
|
if (property_instance.epc_record.built_form == "Semi-Detached") and (
|
||||||
property_instance.data["extension-count"] == 0
|
property_instance.epc_record.extension_count == 0
|
||||||
):
|
):
|
||||||
self.exclude_likely_duplicate_surfaces()
|
self.exclude_likely_duplicate_surfaces()
|
||||||
|
|
||||||
|
|
@ -708,7 +708,7 @@ class GoogleSolarApi:
|
||||||
# We set the target rating to EPC C, which is the typical EPC rating we would expect the
|
# We set the target rating to EPC C, which is the typical EPC rating we would expect the
|
||||||
# property to achieve post retrofit of just the fabric
|
# property to achieve post retrofit of just the fabric
|
||||||
"energy_consumption": cls.estimate_new_consumption(
|
"energy_consumption": cls.estimate_new_consumption(
|
||||||
current_energy_efficiency=min(p.data["current-energy-efficiency"], 100),
|
current_energy_efficiency=min(p.epc_record.current_energy_efficiency, 100),
|
||||||
target_efficiency="69",
|
target_efficiency="69",
|
||||||
current_consumption=p.estimate_electrical_consumption(
|
current_consumption=p.estimate_electrical_consumption(
|
||||||
assumed_ashp_efficiency=assumptions.AVERAGE_ASHP_EFFICIENCY, exclusions=body.exclusions
|
assumed_ashp_efficiency=assumptions.AVERAGE_ASHP_EFFICIENCY, exclusions=body.exclusions
|
||||||
|
|
@ -727,7 +727,7 @@ class GoogleSolarApi:
|
||||||
# We set the target rating to EPC C, which is the typical EPC rating we would expect the
|
# We set the target rating to EPC C, which is the typical EPC rating we would expect the
|
||||||
# property to achieve post retrofit of just the fabric
|
# property to achieve post retrofit of just the fabric
|
||||||
"energy_consumption": cls.estimate_new_consumption(
|
"energy_consumption": cls.estimate_new_consumption(
|
||||||
current_energy_efficiency=min(int(p.data["current-energy-efficiency"]), 100),
|
current_energy_efficiency=min(p.epc_record.current_energy_efficiency, 100),
|
||||||
target_efficiency="69",
|
target_efficiency="69",
|
||||||
current_consumption=p.estimate_electrical_consumption(
|
current_consumption=p.estimate_electrical_consumption(
|
||||||
assumed_ashp_efficiency=assumptions.AVERAGE_ASHP_EFFICIENCY, exclusions=body.exclusions
|
assumed_ashp_efficiency=assumptions.AVERAGE_ASHP_EFFICIENCY, exclusions=body.exclusions
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
|
from typing import Optional
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
from sqlalchemy.exc import SQLAlchemyError
|
|
||||||
from sqlalchemy import func
|
from sqlalchemy import func
|
||||||
from backend.app.db.models.addresses import PostcodeSearch
|
from backend.app.db.models.addresses import PostcodeSearch
|
||||||
from utils.logger import setup_logger
|
from utils.logger import setup_logger
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,4 @@
|
||||||
import re
|
import re
|
||||||
from dataclasses import fields
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
@ -15,24 +14,24 @@ logger = setup_logger()
|
||||||
|
|
||||||
|
|
||||||
class KwhData:
|
class KwhData:
|
||||||
COLS_TO_STRINGIFY = ["main_heating_controls", "floor_level"]
|
COLS_TO_STRINGIFY = ["main-heating-controls", "floor-level"]
|
||||||
|
|
||||||
CATEGORICAL_COLUMNS = [
|
CATEGORICAL_COLUMNS = [
|
||||||
"lodgement_year", "lodgement_month", "main_fuel", "mainheat_description", "number_heated_rooms",
|
"lodgement-year", "lodgement-month", "main-fuel", "mainheat-description", "number-heated-rooms",
|
||||||
"number_habitable_rooms", "mainheat_energy_eff", "mainheatcont_description", "property_type",
|
"number-habitable-rooms", "mainheat-energy-eff", "mainheatcont-description", "property-type",
|
||||||
"built_form",
|
"built-form",
|
||||||
"construction_age_band", "secondheat_description", "hotwater_description", "hot_water_energy_eff",
|
"construction-age-band", "secondheat-description", "hotwater-description", "hot-water-energy-eff",
|
||||||
"walls_description", "walls_energy_eff", "roof_description", "roof_energy_eff", "floor_description",
|
"walls-description", "walls-energy-eff", "roof-description", "roof-energy-eff", "floor-description",
|
||||||
"county",
|
"county",
|
||||||
"windows_description", "windows_energy_eff", "flat_top_storey",
|
"windows-description", "windows-energy-eff", "flat-top-storey",
|
||||||
"flat_storey_count", "unheated_corridor_length", "solar_water_heating_flag", "mechanical_ventilation",
|
"flat-storey-count", "unheated-corridor-length", "solar-water-heating-flag", "mechanical-ventilation",
|
||||||
"low_energy_lighting", "environment_impact_current", "energy_tariff", "current_energy_rating",
|
"low-energy-lighting", "environment-impact-current", "energy-tariff", "current-energy-rating",
|
||||||
"floor_level"
|
"floor-level"
|
||||||
]
|
]
|
||||||
|
|
||||||
NUMERICAL_COLUMNS = [
|
NUMERICAL_COLUMNS = [
|
||||||
'heating_cost_current', 'total_floor_area', 'co2_emissions_current', 'energy_consumption_current',
|
'heating-cost-current', 'total-floor-area', 'co2-emissions-current', 'energy-consumption-current',
|
||||||
'heating_cost_potential', 'hot_water_cost_current', 'current_energy_efficiency'
|
'heating-cost-potential', 'hot-water-cost-current', 'current-energy-efficiency'
|
||||||
]
|
]
|
||||||
|
|
||||||
def __init__(self, bucket=None, read_consumption_data=False):
|
def __init__(self, bucket=None, read_consumption_data=False):
|
||||||
|
|
@ -107,16 +106,6 @@ class KwhData:
|
||||||
# If no match is found, return None or raise an exception
|
# If no match is found, return None or raise an exception
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _normalise_epc_keys(data):
|
|
||||||
if isinstance(data, dict):
|
|
||||||
return {key.replace("-", "_"): value for key, value in data.items()}
|
|
||||||
|
|
||||||
if isinstance(data, pd.DataFrame):
|
|
||||||
return data.rename(columns=lambda column: column.replace("-", "_"))
|
|
||||||
|
|
||||||
raise TypeError("Expected dict or DataFrame")
|
|
||||||
|
|
||||||
def combine(self):
|
def combine(self):
|
||||||
"""
|
"""
|
||||||
Given the data that is collected containing the kwh values for heating and hot water, this method will combine
|
Given the data that is collected containing the kwh values for heating and hot water, this method will combine
|
||||||
|
|
@ -139,9 +128,9 @@ class KwhData:
|
||||||
# We check that the retrieved energy consumption sufficiently matches the EPC data
|
# We check that the retrieved energy consumption sufficiently matches the EPC data
|
||||||
internal_dataset = []
|
internal_dataset = []
|
||||||
for x in data:
|
for x in data:
|
||||||
epc_data = self._normalise_epc_keys(x["epc"])
|
epc_data = x["epc"]
|
||||||
epc_sap = epc_data["current_energy_efficiency"]
|
epc_sap = epc_data["current-energy-efficiency"]
|
||||||
epc_potential_sap = epc_data["potential_energy_efficiency"]
|
epc_potential_sap = epc_data["potential-energy-efficiency"]
|
||||||
# Make sure this matches the extracted sap
|
# Make sure this matches the extracted sap
|
||||||
if int(epc_sap) != int(x["current_epc_efficiency"]) or int(epc_potential_sap) != int(
|
if int(epc_sap) != int(x["current_epc_efficiency"]) or int(epc_potential_sap) != int(
|
||||||
x["potential_epc_efficiency"]
|
x["potential_epc_efficiency"]
|
||||||
|
|
@ -182,7 +171,7 @@ class KwhData:
|
||||||
|
|
||||||
# We also estimate the energy consumption reduction from this data, by band
|
# We also estimate the energy consumption reduction from this data, by band
|
||||||
df["total_consumption"] = df["heating_kwh"] + df["hot_water_kwh"]
|
df["total_consumption"] = df["heating_kwh"] + df["hot_water_kwh"]
|
||||||
consumption_averages = df.groupby("current_energy_efficiency")["total_consumption"].mean().reset_index()
|
consumption_averages = df.groupby("current-energy-efficiency")["total_consumption"].mean().reset_index()
|
||||||
df = df.drop(columns=["total_consumption"])
|
df = df.drop(columns=["total_consumption"])
|
||||||
|
|
||||||
self.consumption_averages_filepath = f"energy_consumption/{self.run_date}/consumption_averages.parquet"
|
self.consumption_averages_filepath = f"energy_consumption/{self.run_date}/consumption_averages.parquet"
|
||||||
|
|
@ -214,11 +203,9 @@ class KwhData:
|
||||||
# TODO: New is a temporary parameter, which will transform the epc descriptions to their transformed features
|
# TODO: New is a temporary parameter, which will transform the epc descriptions to their transformed features
|
||||||
# in anticipation of the new model
|
# in anticipation of the new model
|
||||||
|
|
||||||
data = self._normalise_epc_keys(data.copy())
|
data["lodgement-date"] = pd.to_datetime(data["lodgement-date"])
|
||||||
|
data["lodgement-year"] = data["lodgement-date"].dt.year
|
||||||
data["lodgement_date"] = pd.to_datetime(data["lodgement_date"])
|
data["lodgement-month"] = data["lodgement-date"].dt.month
|
||||||
data["lodgement_year"] = data["lodgement_date"].dt.year
|
|
||||||
data["lodgement_month"] = data["lodgement_date"].dt.month
|
|
||||||
|
|
||||||
# For walls, roof, floor description where we have average thermal transmittance, to avoid too many
|
# For walls, roof, floor description where we have average thermal transmittance, to avoid too many
|
||||||
# categories
|
# categories
|
||||||
|
|
@ -244,10 +231,8 @@ class KwhData:
|
||||||
thermal_transmittance_lookup_table["from"] = thermal_transmittance_lookup_table["from"].astype(str)
|
thermal_transmittance_lookup_table["from"] = thermal_transmittance_lookup_table["from"].astype(str)
|
||||||
|
|
||||||
# Apply the lookup table to the data
|
# Apply the lookup table to the data
|
||||||
for feature in ["walls_description", "roof_description", "floor_description"]:
|
for feature in ["walls-description", "roof-description", "floor-description"]:
|
||||||
cleaned_df = pd.DataFrame(
|
cleaned_df = pd.DataFrame(cleaned[feature])[["original_description", "thermal_transmittance"]]
|
||||||
cleaned[feature.replace("_", "-")]
|
|
||||||
)[["original_description", "thermal_transmittance"]]
|
|
||||||
# Round to 2 decimal places and convert to string
|
# Round to 2 decimal places and convert to string
|
||||||
cleaned_df["thermal_transmittance"] = cleaned_df["thermal_transmittance"].round(2).astype(str)
|
cleaned_df["thermal_transmittance"] = cleaned_df["thermal_transmittance"].round(2).astype(str)
|
||||||
|
|
||||||
|
|
@ -276,10 +261,10 @@ class KwhData:
|
||||||
data[self.CATEGORICAL_COLUMNS] = data[self.CATEGORICAL_COLUMNS].astype(str)
|
data[self.CATEGORICAL_COLUMNS] = data[self.CATEGORICAL_COLUMNS].astype(str)
|
||||||
|
|
||||||
# Create new features:
|
# Create new features:
|
||||||
data['estimate_annual_kwh'] = data['energy_consumption_current'] * data['total_floor_area']
|
data['estimate_annual_kwh'] = data['energy-consumption-current'] * data['total-floor-area']
|
||||||
|
|
||||||
# Ensure this is string, because we could have mixed types
|
# Ensure this is string, because we could have mixed types
|
||||||
data["lodgement_datetime"] = data["lodgement_datetime"].astype(str)
|
data["lodgement-datetime"] = data["lodgement-datetime"].astype(str)
|
||||||
|
|
||||||
if save:
|
if save:
|
||||||
self.model_training_data_filepath = f"energy_consumption/{self.run_date}/training_data.parquet"
|
self.model_training_data_filepath = f"energy_consumption/{self.run_date}/training_data.parquet"
|
||||||
|
|
@ -301,39 +286,29 @@ class KwhData:
|
||||||
data is in the format required by the model
|
data is in the format required by the model
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
epc = p.epc_record.to_dict(case="kebab", source="prepared")
|
||||||
numeric_cols = [
|
numeric_cols = [
|
||||||
'current_energy_efficiency',
|
'current-energy-efficiency',
|
||||||
'potential_energy_efficiency', 'environment_impact_current',
|
'potential-energy-efficiency', 'environment-impact-current',
|
||||||
'environment_impact_potential', 'energy_consumption_current',
|
'environment-impact-potential', 'energy-consumption-current',
|
||||||
'energy_consumption_potential', 'co2_emissions_current',
|
'energy-consumption-potential', 'co2-emissions-current',
|
||||||
'co2_emiss_curr_per_floor_area', 'co2_emissions_potential',
|
'co2-emiss-curr-per-floor-area', 'co2-emissions-potential',
|
||||||
'lighting_cost_current', 'lighting_cost_potential',
|
'lighting-cost-current', 'lighting-cost-potential',
|
||||||
'heating_cost_current', 'heating_cost_potential',
|
'heating-cost-current', 'heating-cost-potential',
|
||||||
'hot_water_cost_current', 'hot_water_cost_potential',
|
'hot-water-cost-current', 'hot-water-cost-potential',
|
||||||
'total_floor_area', 'multi_glaze_proportion',
|
'total-floor-area', 'multi-glaze-proportion',
|
||||||
'extension_count', 'number_habitable_rooms', 'number_heated_rooms',
|
'extension-count', 'number-habitable-rooms', 'number-heated-rooms',
|
||||||
'low_energy_lighting', 'number_open_fireplaces',
|
'low-energy-lighting', 'number-open-fireplaces',
|
||||||
'wind_turbine_count', 'unheated_corridor_length',
|
'wind-turbine-count', 'unheated-corridor-length',
|
||||||
'floor_height', 'photo_supply', 'fixed_lighting_outlets_count',
|
'floor-height', 'photo-supply', 'fixed-lighting-outlets-count',
|
||||||
'low_energy_fixed_light_count',
|
'low-energy-fixed-light-count',
|
||||||
]
|
]
|
||||||
required_cols = set(numeric_cols + KwhData.CATEGORICAL_COLUMNS + [
|
|
||||||
"uprn", "lodgement_date", "lodgement_datetime", "floor_energy_eff"
|
|
||||||
])
|
|
||||||
|
|
||||||
epc_record = p.epc_record
|
|
||||||
available_fields = {field.name for field in fields(epc_record)}
|
|
||||||
missing_fields = required_cols - available_fields
|
|
||||||
if missing_fields:
|
|
||||||
raise ValueError(f"Missing EPCRecord fields required by KwhData: {sorted(missing_fields)}")
|
|
||||||
|
|
||||||
epc = {field_name: getattr(epc_record, field_name) for field_name in required_cols}
|
|
||||||
|
|
||||||
for v in numeric_cols:
|
for v in numeric_cols:
|
||||||
if epc[v] is not None:
|
if epc[v] is not None:
|
||||||
epc[v] = float(epc[v])
|
epc[v] = float(epc[v])
|
||||||
|
|
||||||
bools_to_remap = ['mains_gas_flag', 'flat_top_storey']
|
bools_to_remap = ['mains-gas-flag', 'flat-top-storey']
|
||||||
bool_map = {
|
bool_map = {
|
||||||
True: "Y",
|
True: "Y",
|
||||||
False: "N",
|
False: "N",
|
||||||
|
|
@ -345,8 +320,8 @@ class KwhData:
|
||||||
epc[v] = bool_map[epc[v]]
|
epc[v] = bool_map[epc[v]]
|
||||||
|
|
||||||
no_data = {
|
no_data = {
|
||||||
"floor_level": "NODATA!",
|
"floor-level": "NODATA!",
|
||||||
"floor_energy_eff": "NO DATA!"
|
"floor-energy-eff": "NO DATA!"
|
||||||
}
|
}
|
||||||
for v, fill_val in no_data.items():
|
for v, fill_val in no_data.items():
|
||||||
if pd.isnull(epc[v]):
|
if pd.isnull(epc[v]):
|
||||||
|
|
@ -356,8 +331,8 @@ class KwhData:
|
||||||
|
|
||||||
def prepare_epc(self, input_properties: list[Property]):
|
def prepare_epc(self, input_properties: list[Property]):
|
||||||
scoring_data = pd.DataFrame([self._prepare_epc(p) for p in input_properties])
|
scoring_data = pd.DataFrame([self._prepare_epc(p) for p in input_properties])
|
||||||
scoring_data["lodgement_year"] = pd.to_datetime(scoring_data["lodgement_date"]).dt.year
|
scoring_data["lodgement-year"] = pd.to_datetime(scoring_data["lodgement-date"]).dt.year
|
||||||
scoring_data["lodgement_month"] = pd.to_datetime(scoring_data["lodgement_date"]).dt.month
|
scoring_data["lodgement-month"] = pd.to_datetime(scoring_data["lodgement-date"]).dt.month
|
||||||
|
|
||||||
scoring_data["id"] = scoring_data["uprn"].copy()
|
scoring_data["id"] = scoring_data["uprn"].copy()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1245,6 +1245,34 @@ class EPCRecord:
|
||||||
|
|
||||||
return self.__dict__[RDSAP_RESPONSE] <= other.__dict__[RDSAP_RESPONSE]
|
return self.__dict__[RDSAP_RESPONSE] <= other.__dict__[RDSAP_RESPONSE]
|
||||||
|
|
||||||
|
def to_dict(
|
||||||
|
self,
|
||||||
|
case: Literal["snake", "kebab"] = "kebab",
|
||||||
|
source: Literal["prepared", "attributes"] = "prepared",
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
|
||||||
|
if source == "prepared":
|
||||||
|
if self._prepared_epc is None:
|
||||||
|
raise ValueError("Prepared EPC not available")
|
||||||
|
data = self._prepared_epc.copy()
|
||||||
|
|
||||||
|
elif source == "attributes":
|
||||||
|
data = {
|
||||||
|
k: v for k, v in vars(self).items()
|
||||||
|
if not k.startswith("_")
|
||||||
|
}
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unknown source: {source}")
|
||||||
|
|
||||||
|
if case == "snake":
|
||||||
|
return {k.replace("-", "_"): v for k, v in data.items()}
|
||||||
|
|
||||||
|
if case == "kebab":
|
||||||
|
return {k.replace("_", "-"): v for k, v in data.items()}
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
def get(
|
def get(
|
||||||
self,
|
self,
|
||||||
key: str | list[str],
|
key: str | list[str],
|
||||||
|
|
|
||||||
|
|
@ -200,20 +200,20 @@ class Costs:
|
||||||
self.property = property_instance
|
self.property = property_instance
|
||||||
self.regional_labour_variations = regional_labour_variations
|
self.regional_labour_variations = regional_labour_variations
|
||||||
|
|
||||||
self.region = county_to_region_map.get(self.property.data["county"], None)
|
self.region = county_to_region_map.get(self.property.epc_record.county, None)
|
||||||
if self.region is None:
|
if self.region is None:
|
||||||
# Try and grab using the local-authority-label
|
# Try and grab using the local-authority-label
|
||||||
self.region = county_to_region_map.get(self.property.data["local-authority-label"], None)
|
self.region = county_to_region_map.get(self.property.epc_record.local_authority_label, None)
|
||||||
|
|
||||||
if self.region is None:
|
if self.region is None:
|
||||||
# Try and get the region after converting the keys to lower
|
# Try and get the region after converting the keys to lower
|
||||||
self.region = {
|
self.region = {
|
||||||
k.lower(): v for k, v in county_to_region_map.items()
|
k.lower(): v for k, v in county_to_region_map.items()
|
||||||
}.get(self.property.data["local-authority-label"].lower(), None)
|
}.get(self.property.property.epc_record.local_authority_label.lower(), None)
|
||||||
|
|
||||||
if self.region is None:
|
if self.region is None:
|
||||||
logger.warning("No region found for county %s, defaulting to South East England",
|
logger.warning("No region found for county %s, defaulting to South East England",
|
||||||
self.property.data["county"])
|
self.property.epc_record.county)
|
||||||
self.region = "South East England"
|
self.region = "South East England"
|
||||||
|
|
||||||
self.labour_adjustment_factor = [
|
self.labour_adjustment_factor = [
|
||||||
|
|
@ -858,8 +858,8 @@ class Costs:
|
||||||
n_radiators = self._estimate_n_radiators(
|
n_radiators = self._estimate_n_radiators(
|
||||||
number_habitable_rooms=n_rooms,
|
number_habitable_rooms=n_rooms,
|
||||||
total_floor_area=self.property.floor_area,
|
total_floor_area=self.property.floor_area,
|
||||||
property_type=self.property.data["property-type"],
|
property_type=self.property.epc_record.property - type,
|
||||||
built_form=self.property.data["built-form"]
|
built_form=self.property.epc_record.built_form
|
||||||
)
|
)
|
||||||
|
|
||||||
additionals_labour_cost = labour_rate * self.labour_adjustment_factor
|
additionals_labour_cost = labour_rate * self.labour_adjustment_factor
|
||||||
|
|
|
||||||
|
|
@ -76,7 +76,7 @@ class FloorRecommendations(Definitions):
|
||||||
return
|
return
|
||||||
|
|
||||||
u_value = self.property.floor["thermal_transmittance"]
|
u_value = self.property.floor["thermal_transmittance"]
|
||||||
property_type = self.property.data["property-type"]
|
property_type = self.property.epc_record.property_type
|
||||||
floor_area = self.property.insulation_floor_area
|
floor_area = self.property.insulation_floor_area
|
||||||
|
|
||||||
if self.property.floor["another_property_below"] | (self.property.floor["insulation_thickness"] in [
|
if self.property.floor["another_property_below"] | (self.property.floor["insulation_thickness"] in [
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue