mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
removing data
This commit is contained in:
parent
f45260706e
commit
84d4263d9a
6 changed files with 84 additions and 81 deletions
|
|
@ -182,8 +182,8 @@ class GoogleSolarApi:
|
|||
self.exclude_north_facing_segments(property_instance=property_instance)
|
||||
# If a property is semi-detached, it's possible for us to include segments from an attached unit
|
||||
if property_instance is not None:
|
||||
if (property_instance.data["built-form"] == "Semi-Detached") and (
|
||||
property_instance.data["extension-count"] == 0
|
||||
if (property_instance.epc_record.built_form == "Semi-Detached") and (
|
||||
property_instance.epc_record.extension_count == 0
|
||||
):
|
||||
self.exclude_likely_duplicate_surfaces()
|
||||
|
||||
|
|
@ -708,7 +708,7 @@ class GoogleSolarApi:
|
|||
# We set the target rating to EPC C, which is the typical EPC rating we would expect the
|
||||
# property to achieve post retrofit of just the fabric
|
||||
"energy_consumption": cls.estimate_new_consumption(
|
||||
current_energy_efficiency=min(p.data["current-energy-efficiency"], 100),
|
||||
current_energy_efficiency=min(p.epc_record.current_energy_efficiency, 100),
|
||||
target_efficiency="69",
|
||||
current_consumption=p.estimate_electrical_consumption(
|
||||
assumed_ashp_efficiency=assumptions.AVERAGE_ASHP_EFFICIENCY, exclusions=body.exclusions
|
||||
|
|
@ -727,7 +727,7 @@ class GoogleSolarApi:
|
|||
# We set the target rating to EPC C, which is the typical EPC rating we would expect the
|
||||
# property to achieve post retrofit of just the fabric
|
||||
"energy_consumption": cls.estimate_new_consumption(
|
||||
current_energy_efficiency=min(int(p.data["current-energy-efficiency"]), 100),
|
||||
current_energy_efficiency=min(p.epc_record.current_energy_efficiency, 100),
|
||||
target_efficiency="69",
|
||||
current_consumption=p.estimate_electrical_consumption(
|
||||
assumed_ashp_efficiency=assumptions.AVERAGE_ASHP_EFFICIENCY, exclusions=body.exclusions
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
from typing import Optional
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
from sqlalchemy import func
|
||||
from backend.app.db.models.addresses import PostcodeSearch
|
||||
from utils.logger import setup_logger
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
import re
|
||||
from dataclasses import fields
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from datetime import datetime
|
||||
|
|
@ -15,24 +14,24 @@ logger = setup_logger()
|
|||
|
||||
|
||||
class KwhData:
|
||||
COLS_TO_STRINGIFY = ["main_heating_controls", "floor_level"]
|
||||
COLS_TO_STRINGIFY = ["main-heating-controls", "floor-level"]
|
||||
|
||||
CATEGORICAL_COLUMNS = [
|
||||
"lodgement_year", "lodgement_month", "main_fuel", "mainheat_description", "number_heated_rooms",
|
||||
"number_habitable_rooms", "mainheat_energy_eff", "mainheatcont_description", "property_type",
|
||||
"built_form",
|
||||
"construction_age_band", "secondheat_description", "hotwater_description", "hot_water_energy_eff",
|
||||
"walls_description", "walls_energy_eff", "roof_description", "roof_energy_eff", "floor_description",
|
||||
"lodgement-year", "lodgement-month", "main-fuel", "mainheat-description", "number-heated-rooms",
|
||||
"number-habitable-rooms", "mainheat-energy-eff", "mainheatcont-description", "property-type",
|
||||
"built-form",
|
||||
"construction-age-band", "secondheat-description", "hotwater-description", "hot-water-energy-eff",
|
||||
"walls-description", "walls-energy-eff", "roof-description", "roof-energy-eff", "floor-description",
|
||||
"county",
|
||||
"windows_description", "windows_energy_eff", "flat_top_storey",
|
||||
"flat_storey_count", "unheated_corridor_length", "solar_water_heating_flag", "mechanical_ventilation",
|
||||
"low_energy_lighting", "environment_impact_current", "energy_tariff", "current_energy_rating",
|
||||
"floor_level"
|
||||
"windows-description", "windows-energy-eff", "flat-top-storey",
|
||||
"flat-storey-count", "unheated-corridor-length", "solar-water-heating-flag", "mechanical-ventilation",
|
||||
"low-energy-lighting", "environment-impact-current", "energy-tariff", "current-energy-rating",
|
||||
"floor-level"
|
||||
]
|
||||
|
||||
NUMERICAL_COLUMNS = [
|
||||
'heating_cost_current', 'total_floor_area', 'co2_emissions_current', 'energy_consumption_current',
|
||||
'heating_cost_potential', 'hot_water_cost_current', 'current_energy_efficiency'
|
||||
'heating-cost-current', 'total-floor-area', 'co2-emissions-current', 'energy-consumption-current',
|
||||
'heating-cost-potential', 'hot-water-cost-current', 'current-energy-efficiency'
|
||||
]
|
||||
|
||||
def __init__(self, bucket=None, read_consumption_data=False):
|
||||
|
|
@ -107,16 +106,6 @@ class KwhData:
|
|||
# If no match is found, return None or raise an exception
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _normalise_epc_keys(data):
|
||||
if isinstance(data, dict):
|
||||
return {key.replace("-", "_"): value for key, value in data.items()}
|
||||
|
||||
if isinstance(data, pd.DataFrame):
|
||||
return data.rename(columns=lambda column: column.replace("-", "_"))
|
||||
|
||||
raise TypeError("Expected dict or DataFrame")
|
||||
|
||||
def combine(self):
|
||||
"""
|
||||
Given the data that is collected containing the kwh values for heating and hot water, this method will combine
|
||||
|
|
@ -139,9 +128,9 @@ class KwhData:
|
|||
# We check that the retrieved energy consumption sufficiently matches the EPC data
|
||||
internal_dataset = []
|
||||
for x in data:
|
||||
epc_data = self._normalise_epc_keys(x["epc"])
|
||||
epc_sap = epc_data["current_energy_efficiency"]
|
||||
epc_potential_sap = epc_data["potential_energy_efficiency"]
|
||||
epc_data = x["epc"]
|
||||
epc_sap = epc_data["current-energy-efficiency"]
|
||||
epc_potential_sap = epc_data["potential-energy-efficiency"]
|
||||
# Make sure this matches the extracted sap
|
||||
if int(epc_sap) != int(x["current_epc_efficiency"]) or int(epc_potential_sap) != int(
|
||||
x["potential_epc_efficiency"]
|
||||
|
|
@ -182,7 +171,7 @@ class KwhData:
|
|||
|
||||
# We also estimate the energy consumption reduction from this data, by band
|
||||
df["total_consumption"] = df["heating_kwh"] + df["hot_water_kwh"]
|
||||
consumption_averages = df.groupby("current_energy_efficiency")["total_consumption"].mean().reset_index()
|
||||
consumption_averages = df.groupby("current-energy-efficiency")["total_consumption"].mean().reset_index()
|
||||
df = df.drop(columns=["total_consumption"])
|
||||
|
||||
self.consumption_averages_filepath = f"energy_consumption/{self.run_date}/consumption_averages.parquet"
|
||||
|
|
@ -214,11 +203,9 @@ class KwhData:
|
|||
# TODO: New is a temporary parameter, which will transform the epc descriptions to their transformed features
|
||||
# in anticipation of the new model
|
||||
|
||||
data = self._normalise_epc_keys(data.copy())
|
||||
|
||||
data["lodgement_date"] = pd.to_datetime(data["lodgement_date"])
|
||||
data["lodgement_year"] = data["lodgement_date"].dt.year
|
||||
data["lodgement_month"] = data["lodgement_date"].dt.month
|
||||
data["lodgement-date"] = pd.to_datetime(data["lodgement-date"])
|
||||
data["lodgement-year"] = data["lodgement-date"].dt.year
|
||||
data["lodgement-month"] = data["lodgement-date"].dt.month
|
||||
|
||||
# For walls, roof, floor description where we have average thermal transmittance, to avoid too many
|
||||
# categories
|
||||
|
|
@ -244,10 +231,8 @@ class KwhData:
|
|||
thermal_transmittance_lookup_table["from"] = thermal_transmittance_lookup_table["from"].astype(str)
|
||||
|
||||
# Apply the lookup table to the data
|
||||
for feature in ["walls_description", "roof_description", "floor_description"]:
|
||||
cleaned_df = pd.DataFrame(
|
||||
cleaned[feature.replace("_", "-")]
|
||||
)[["original_description", "thermal_transmittance"]]
|
||||
for feature in ["walls-description", "roof-description", "floor-description"]:
|
||||
cleaned_df = pd.DataFrame(cleaned[feature])[["original_description", "thermal_transmittance"]]
|
||||
# Round to 2 decimal places and convert to string
|
||||
cleaned_df["thermal_transmittance"] = cleaned_df["thermal_transmittance"].round(2).astype(str)
|
||||
|
||||
|
|
@ -276,10 +261,10 @@ class KwhData:
|
|||
data[self.CATEGORICAL_COLUMNS] = data[self.CATEGORICAL_COLUMNS].astype(str)
|
||||
|
||||
# Create new features:
|
||||
data['estimate_annual_kwh'] = data['energy_consumption_current'] * data['total_floor_area']
|
||||
data['estimate_annual_kwh'] = data['energy-consumption-current'] * data['total-floor-area']
|
||||
|
||||
# Ensure this is string, because we could have mixed types
|
||||
data["lodgement_datetime"] = data["lodgement_datetime"].astype(str)
|
||||
data["lodgement-datetime"] = data["lodgement-datetime"].astype(str)
|
||||
|
||||
if save:
|
||||
self.model_training_data_filepath = f"energy_consumption/{self.run_date}/training_data.parquet"
|
||||
|
|
@ -301,39 +286,29 @@ class KwhData:
|
|||
data is in the format required by the model
|
||||
:return:
|
||||
"""
|
||||
|
||||
epc = p.epc_record.to_dict(case="kebab", source="prepared")
|
||||
numeric_cols = [
|
||||
'current_energy_efficiency',
|
||||
'potential_energy_efficiency', 'environment_impact_current',
|
||||
'environment_impact_potential', 'energy_consumption_current',
|
||||
'energy_consumption_potential', 'co2_emissions_current',
|
||||
'co2_emiss_curr_per_floor_area', 'co2_emissions_potential',
|
||||
'lighting_cost_current', 'lighting_cost_potential',
|
||||
'heating_cost_current', 'heating_cost_potential',
|
||||
'hot_water_cost_current', 'hot_water_cost_potential',
|
||||
'total_floor_area', 'multi_glaze_proportion',
|
||||
'extension_count', 'number_habitable_rooms', 'number_heated_rooms',
|
||||
'low_energy_lighting', 'number_open_fireplaces',
|
||||
'wind_turbine_count', 'unheated_corridor_length',
|
||||
'floor_height', 'photo_supply', 'fixed_lighting_outlets_count',
|
||||
'low_energy_fixed_light_count',
|
||||
'current-energy-efficiency',
|
||||
'potential-energy-efficiency', 'environment-impact-current',
|
||||
'environment-impact-potential', 'energy-consumption-current',
|
||||
'energy-consumption-potential', 'co2-emissions-current',
|
||||
'co2-emiss-curr-per-floor-area', 'co2-emissions-potential',
|
||||
'lighting-cost-current', 'lighting-cost-potential',
|
||||
'heating-cost-current', 'heating-cost-potential',
|
||||
'hot-water-cost-current', 'hot-water-cost-potential',
|
||||
'total-floor-area', 'multi-glaze-proportion',
|
||||
'extension-count', 'number-habitable-rooms', 'number-heated-rooms',
|
||||
'low-energy-lighting', 'number-open-fireplaces',
|
||||
'wind-turbine-count', 'unheated-corridor-length',
|
||||
'floor-height', 'photo-supply', 'fixed-lighting-outlets-count',
|
||||
'low-energy-fixed-light-count',
|
||||
]
|
||||
required_cols = set(numeric_cols + KwhData.CATEGORICAL_COLUMNS + [
|
||||
"uprn", "lodgement_date", "lodgement_datetime", "floor_energy_eff"
|
||||
])
|
||||
|
||||
epc_record = p.epc_record
|
||||
available_fields = {field.name for field in fields(epc_record)}
|
||||
missing_fields = required_cols - available_fields
|
||||
if missing_fields:
|
||||
raise ValueError(f"Missing EPCRecord fields required by KwhData: {sorted(missing_fields)}")
|
||||
|
||||
epc = {field_name: getattr(epc_record, field_name) for field_name in required_cols}
|
||||
|
||||
for v in numeric_cols:
|
||||
if epc[v] is not None:
|
||||
epc[v] = float(epc[v])
|
||||
|
||||
bools_to_remap = ['mains_gas_flag', 'flat_top_storey']
|
||||
bools_to_remap = ['mains-gas-flag', 'flat-top-storey']
|
||||
bool_map = {
|
||||
True: "Y",
|
||||
False: "N",
|
||||
|
|
@ -345,8 +320,8 @@ class KwhData:
|
|||
epc[v] = bool_map[epc[v]]
|
||||
|
||||
no_data = {
|
||||
"floor_level": "NODATA!",
|
||||
"floor_energy_eff": "NO DATA!"
|
||||
"floor-level": "NODATA!",
|
||||
"floor-energy-eff": "NO DATA!"
|
||||
}
|
||||
for v, fill_val in no_data.items():
|
||||
if pd.isnull(epc[v]):
|
||||
|
|
@ -356,8 +331,8 @@ class KwhData:
|
|||
|
||||
def prepare_epc(self, input_properties: list[Property]):
|
||||
scoring_data = pd.DataFrame([self._prepare_epc(p) for p in input_properties])
|
||||
scoring_data["lodgement_year"] = pd.to_datetime(scoring_data["lodgement_date"]).dt.year
|
||||
scoring_data["lodgement_month"] = pd.to_datetime(scoring_data["lodgement_date"]).dt.month
|
||||
scoring_data["lodgement-year"] = pd.to_datetime(scoring_data["lodgement-date"]).dt.year
|
||||
scoring_data["lodgement-month"] = pd.to_datetime(scoring_data["lodgement-date"]).dt.month
|
||||
|
||||
scoring_data["id"] = scoring_data["uprn"].copy()
|
||||
|
||||
|
|
|
|||
|
|
@ -1245,6 +1245,34 @@ class EPCRecord:
|
|||
|
||||
return self.__dict__[RDSAP_RESPONSE] <= other.__dict__[RDSAP_RESPONSE]
|
||||
|
||||
def to_dict(
|
||||
self,
|
||||
case: Literal["snake", "kebab"] = "kebab",
|
||||
source: Literal["prepared", "attributes"] = "prepared",
|
||||
) -> dict[str, Any]:
|
||||
|
||||
if source == "prepared":
|
||||
if self._prepared_epc is None:
|
||||
raise ValueError("Prepared EPC not available")
|
||||
data = self._prepared_epc.copy()
|
||||
|
||||
elif source == "attributes":
|
||||
data = {
|
||||
k: v for k, v in vars(self).items()
|
||||
if not k.startswith("_")
|
||||
}
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unknown source: {source}")
|
||||
|
||||
if case == "snake":
|
||||
return {k.replace("-", "_"): v for k, v in data.items()}
|
||||
|
||||
if case == "kebab":
|
||||
return {k.replace("_", "-"): v for k, v in data.items()}
|
||||
|
||||
return data
|
||||
|
||||
def get(
|
||||
self,
|
||||
key: str | list[str],
|
||||
|
|
|
|||
|
|
@ -200,20 +200,20 @@ class Costs:
|
|||
self.property = property_instance
|
||||
self.regional_labour_variations = regional_labour_variations
|
||||
|
||||
self.region = county_to_region_map.get(self.property.data["county"], None)
|
||||
self.region = county_to_region_map.get(self.property.epc_record.county, None)
|
||||
if self.region is None:
|
||||
# Try and grab using the local-authority-label
|
||||
self.region = county_to_region_map.get(self.property.data["local-authority-label"], None)
|
||||
self.region = county_to_region_map.get(self.property.epc_record.local_authority_label, None)
|
||||
|
||||
if self.region is None:
|
||||
# Try and get the region after converting the keys to lower
|
||||
self.region = {
|
||||
k.lower(): v for k, v in county_to_region_map.items()
|
||||
}.get(self.property.data["local-authority-label"].lower(), None)
|
||||
}.get(self.property.property.epc_record.local_authority_label.lower(), None)
|
||||
|
||||
if self.region is None:
|
||||
logger.warning("No region found for county %s, defaulting to South East England",
|
||||
self.property.data["county"])
|
||||
self.property.epc_record.county)
|
||||
self.region = "South East England"
|
||||
|
||||
self.labour_adjustment_factor = [
|
||||
|
|
@ -858,8 +858,8 @@ class Costs:
|
|||
n_radiators = self._estimate_n_radiators(
|
||||
number_habitable_rooms=n_rooms,
|
||||
total_floor_area=self.property.floor_area,
|
||||
property_type=self.property.data["property-type"],
|
||||
built_form=self.property.data["built-form"]
|
||||
property_type=self.property.epc_record.property - type,
|
||||
built_form=self.property.epc_record.built_form
|
||||
)
|
||||
|
||||
additionals_labour_cost = labour_rate * self.labour_adjustment_factor
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ class FloorRecommendations(Definitions):
|
|||
return
|
||||
|
||||
u_value = self.property.floor["thermal_transmittance"]
|
||||
property_type = self.property.data["property-type"]
|
||||
property_type = self.property.epc_record.property_type
|
||||
floor_area = self.property.insulation_floor_area
|
||||
|
||||
if self.property.floor["another_property_below"] | (self.property.floor["insulation_thickness"] in [
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue