fixing extension count bug, tidying up router

This commit is contained in:
Khalim Conn-Kowlessar 2024-01-16 17:11:35 +00:00
parent 6e8c83c228
commit cc8b6801b2
3 changed files with 184 additions and 175 deletions

View file

@ -2,7 +2,6 @@ from datetime import datetime
import numpy as np
import pandas as pd
from epc_api.client import EpcClient
from etl.epc.Record import EPCRecord
from backend.SearchEpc import SearchEpc
from fastapi import APIRouter, Depends
@ -25,7 +24,7 @@ from backend.app.db.models.portfolio import rating_lookup
from backend.app.dependencies import validate_token
from backend.app.plan.schemas import PlanTriggerRequest
from backend.app.plan.utils import create_recommendation_scoring_data, get_cleaned
from backend.app.utils import epc_to_sap_lower_bound, read_csv_from_s3, read_parquet_from_s3, sap_to_epc
from backend.app.utils import epc_to_sap_lower_bound, read_csv_from_s3, sap_to_epc
from backend.ml_models.api import ModelApi
from backend.Property import Property
@ -53,7 +52,6 @@ router = APIRouter(
responses={404: {"description": "Not found"}}
)
# TODO: Need to install base.txt requirements into new env
@router.post("/trigger")
async def trigger_plan(body: PlanTriggerRequest):
@ -64,8 +62,10 @@ async def trigger_plan(body: PlanTriggerRequest):
try:
session.begin()
logger.info("Getting the inputs")
epc_client = EpcClient(auth_token=get_settings().EPC_AUTH_TOKEN)
plan_input = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path)
cleaning_data = read_dataframe_from_s3_parquet(
bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
)
input_properties = []
@ -95,26 +95,25 @@ async def trigger_plan(body: PlanTriggerRequest):
heat_demand_target=None
)
epc_records ={
epc_records = {
'original_epc': epc_searcher.newest_epc,
'full_sap_epc': epc_searcher.full_sap_epc,
'old_data': epc_searcher.old_data,
'old_data': epc_searcher.older_epcs,
}
prepared_epc = EPCRecord(epc_records=epc_records, run_mode="newdata", cleaning_data=cleaning_data) # This uses all the epc records to clean the data
prepared_epc = EPCRecord(epc_records=epc_records, run_mode="newdata",
cleaning_data=cleaning_data) # This uses all the epc records to clean the data
input_properties.append(
Property(
id=property_id,
address1=config['address'],
postcode=config['postcode'],
address=epc_searcher.address_clean,
postcode=epc_searcher.postcode_clean,
epc_record=prepared_epc,
)
)
if not input_properties:
if not input_properties:
return Response(status_code=204)
# The materials data could be cached or local so we don't need to make
@ -127,9 +126,6 @@ async def trigger_plan(body: PlanTriggerRequest):
uprn_filenames = read_dataframe_from_s3_parquet(
bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet"
)
cleaning_data = read_dataframe_from_s3_parquet(
bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
)
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket=get_settings().DATA_BUCKET)
logger.info("Getting spatial data")
@ -160,12 +156,12 @@ async def trigger_plan(body: PlanTriggerRequest):
recommendations_scoring_data.extend(p.recommendations_scoring_data)
logger.info("Preparing data for scoring in sap change api")
recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)
recommendations_scoring_data = recommendations_scoring_data.drop(
columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", "carbon_ending"]
)
columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
"carbon_ending"]
)
model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
all_predictions = model_api.predict_all(
@ -308,20 +304,20 @@ async def trigger_plan(body: PlanTriggerRequest):
combined_recommendations_scoring_data = pd.DataFrame(combined_recommendations_scoring_data)
# Perform the same cleaning as in the model - first clean number of room variables though
combined_recommendations_scoring_data = DataProcessor.apply_averages_cleaning(
combined_recommendations_scoring_data = EPCDataProcessor.apply_averages_cleaning(
data_to_clean=combined_recommendations_scoring_data,
cleaning_data=cleaning_data,
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
)
combined_recommendations_scoring_data = DataProcessor.apply_averages_cleaning(
combined_recommendations_scoring_data = EPCDataProcessor.apply_averages_cleaning(
data_to_clean=combined_recommendations_scoring_data,
cleaning_data=cleaning_data,
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
).drop(columns=["LOCAL_AUTHORITY"])
combined_recommendations_scoring_data = DataProcessor.clean_missings_after_description_process(
combined_recommendations_scoring_data = EPCDataProcessor.clean_missings_after_description_process(
combined_recommendations_scoring_data,
ignore_cols=[
c for c in combined_recommendations_scoring_data.columns if ("thermal_transmittance" in c) or (
@ -329,7 +325,7 @@ async def trigger_plan(body: PlanTriggerRequest):
]
)
combined_recommendations_scoring_data = DataProcessor.clean_efficiency_variables(
combined_recommendations_scoring_data = EPCDataProcessor.clean_efficiency_variables(
combined_recommendations_scoring_data
)
@ -472,11 +468,6 @@ async def trigger_plan(body: PlanTriggerRequest):
update_or_create_property_spatial_details(session, p.uprn, p.spatial)
# TODO: TEMP
if p.data["uprn"] == "":
print("Get rid of me!")
p.data["uprn"] = 0
property_data = p.get_full_property_data()
update_property_data(
session, property_id=p.id, portfolio_id=body.portfolio_id, property_data=property_data

View file

@ -4,7 +4,6 @@ from io import StringIO
import string
import secrets
import logging
import pandas as pd
from io import BytesIO

View file

@ -1,8 +1,8 @@
from datetime import datetime
from dataclasses import dataclass
from etl.epc.ValidationConfiguration import (
EPCRecordValidationConfiguration,
EPCDifferenceRecordValidationConfiguration,
EPCRecordValidationConfiguration,
EPCDifferenceRecordValidationConfiguration,
EPCDifferenceRecordFixedDataValidationConfiguration
)
from etl.epc.DataProcessor import EPCDataProcessor
@ -33,6 +33,7 @@ EFFICIENCY_FEATURES = [x.lower() for x in EFFICIENCY_FEATURES]
ENVIRONMENT = os.environ.get('ENVIRONMENT', 'dev')
DATA_BUCKET = os.environ.get('DATA_BUCKET', 'retrofit-data-dev' if ENVIRONMENT == 'dev' else None)
@dataclass
class EPCRecord:
"""
@ -41,44 +42,44 @@ class EPCRecord:
uprn: int = None
walls_description: str = None
floor_description : str = None
lighting_description : str = None
roof_description : str = None
mainheat_description : str = None
hotwater_description : str = None
main_fuel : str = None
mechanical_ventilation : str = None
secondheat_description : str = None
windows_description : str = None
glazed_type : str = None
multi_glaze_proportion : float = None
low_energy_lighting : float = None
number_open_fireplaces : float = None
mainheatcont_description : str = None
solar_water_heating_flag : str = None
photo_supply : float = None
transaction_type : str = None
energy_tariff : str = None
extension_count : float = None
total_floor_area : float = None
floor_height : float = None
hot_water_energy_eff : str = None
floor_energy_eff : str = None
windows_energy_eff : str = None
walls_energy_eff : str = None
sheating_energy_eff : str = None
roof_energy_eff : str = None
mainheat_energy_eff : str = None
mainheatc_energy_eff : str = None
lighting_energy_eff : str = None
potential_energy_efficiency : float = None
environment_impact_potential : float = None
energy_consumption_potential : float = None
co2_emissions_potential : float = None
lodgement_date : str = None
current_energy_efficiency : int = None
energy_consumption_current : int = None
co2_emissions_current : float = None
floor_description: str = None
lighting_description: str = None
roof_description: str = None
mainheat_description: str = None
hotwater_description: str = None
main_fuel: str = None
mechanical_ventilation: str = None
secondheat_description: str = None
windows_description: str = None
glazed_type: str = None
multi_glaze_proportion: float = None
low_energy_lighting: float = None
number_open_fireplaces: float = None
mainheatcont_description: str = None
solar_water_heating_flag: str = None
photo_supply: float = None
transaction_type: str = None
energy_tariff: str = None
extension_count: float = None
total_floor_area: float = None
floor_height: float = None
hot_water_energy_eff: str = None
floor_energy_eff: str = None
windows_energy_eff: str = None
walls_energy_eff: str = None
sheating_energy_eff: str = None
roof_energy_eff: str = None
mainheat_energy_eff: str = None
mainheatc_energy_eff: str = None
lighting_energy_eff: str = None
potential_energy_efficiency: float = None
environment_impact_potential: float = None
energy_consumption_potential: float = None
co2_emissions_potential: float = None
lodgement_date: str = None
current_energy_efficiency: int = None
energy_consumption_current: int = None
co2_emissions_current: float = None
# u_values_walls = None
# u_values_roof = None
@ -107,7 +108,7 @@ class EPCRecord:
# self.WALLS_DESCRIPTION = 'check'
# Could also have cleaning of records if needed
if self.run_mode == "training":
if self.run_mode == "training":
self.validation_configuration = EPCRecordValidationConfiguration
# self._field_validation()
return
@ -115,7 +116,7 @@ class EPCRecord:
# We are running in newdata mode
if self.epc_records is None:
raise ValueError("Must provide epc records if running in newdata mode")
self.prepared_epc = self.epc_records['original_epc']
self.original_epc = self.epc_records['original_epc'].copy()
@ -123,8 +124,8 @@ class EPCRecord:
self.old_data = self.epc_records['old_data']
if self.cleaning_data is None:
raise ValueError("Must provide cleaning data if running in newdata mode")
raise ValueError("Must provide cleaning data if running in newdata mode")
self._clean_records_using_epc_records()
self._clean_with_data_processor()
self._temp_uprn_catch()
@ -175,7 +176,6 @@ class EPCRecord:
pd.to_datetime(lodgement_date) - pd.to_datetime(EARLIEST_EPC_DATE)
).dt.days
def _temp_uprn_catch(self):
"""
Catch the case we do now have uprn
@ -188,15 +188,14 @@ class EPCRecord:
This method will clean the records using the data processor
"""
epc_data_processor = EPCDataProcessor(
data=self.epc_record_as_dataframe("prepared_epc"),
run_mode="newdata",
data=self.epc_record_as_dataframe("prepared_epc"),
run_mode="newdata",
cleaning_averages=self.cleaning_data
)
epc_data_processor.prepare_data()
self.prepared_epc = epc_data_processor.data.to_dict(orient="records")[0]
def _expand_prepared_epc_to_attributes(self):
"""
This method will expand the prepared epc to attributes
@ -207,44 +206,44 @@ class EPCRecord:
self.uprn: int = int(self.prepared_epc["uprn"])
self.walls_description: str = self.prepared_epc["walls_description"]
self.floor_description : str = self.prepared_epc["floor_description"]
self.lighting_description : str = self.prepared_epc["lighting_description"]
self.roof_description : str = self.prepared_epc["roof_description"]
self.mainheat_description : str = self.prepared_epc["mainheat_description"]
self.hotwater_description : str = self.prepared_epc["hotwater_description"]
self.main_fuel : str = self.prepared_epc["main_fuel"]
self.mechanical_ventilation : str = self.prepared_epc["mechanical_ventilation"]
self.secondheat_description : str = self.prepared_epc["secondheat_description"]
self.windows_description : str = self.prepared_epc["windows_description"]
self.glazed_type : str = self.prepared_epc["glazed_type"]
self.multi_glaze_proportion : float = float(self.prepared_epc["multi_glaze_proportion"])
self.low_energy_lighting : float = float(self.prepared_epc["low_energy_lighting"])
self.number_open_fireplaces : float = float(self.prepared_epc["number_open_fireplaces"])
self.mainheatcont_description : str = self.prepared_epc["mainheatcont_description"]
self.solar_water_heating_flag : str = self.prepared_epc["solar_water_heating_flag"]
self.photo_supply : float = float(self.prepared_epc["photo_supply"])
self.transaction_type : str = self.prepared_epc["transaction_type"]
self.energy_tariff : str = self.prepared_epc["energy_tariff"]
self.extension_count : float = float(self.prepared_epc["extension_count"])
self.total_floor_area : float = float(self.prepared_epc["total_floor_area"])
self.floor_height : float = float(self.prepared_epc["floor_height"])
self.hot_water_energy_eff : str = self.prepared_epc["hot_water_energy_eff"]
self.floor_energy_eff : str = self.prepared_epc["floor_energy_eff"]
self.windows_energy_eff : str = self.prepared_epc["windows_energy_eff"]
self.walls_energy_eff : str = self.prepared_epc["walls_energy_eff"]
self.sheating_energy_eff : str = self.prepared_epc["sheating_energy_eff"]
self.roof_energy_eff : str = self.prepared_epc["roof_energy_eff"]
self.mainheat_energy_eff : str = self.prepared_epc["mainheat_energy_eff"]
self.mainheatc_energy_eff : str = self.prepared_epc["mainheatc_energy_eff"]
self.lighting_energy_eff : str = self.prepared_epc["lighting_energy_eff"]
self.potential_energy_efficiency : float = float(self.prepared_epc["potential_energy_efficiency"])
self.environment_impact_potential : float = float(self.prepared_epc["environment_impact_potential"])
self.energy_consumption_potential : float = float(self.prepared_epc["energy_consumption_potential"])
self.co2_emissions_potential : float = float(self.prepared_epc["co2_emissions_potential"])
self.lodgement_date : str = self.prepared_epc["lodgement_date"]
self.current_energy_efficiency : int = int(self.prepared_epc["current_energy_efficiency"])
self.energy_consumption_current : int = int(self.prepared_epc["energy_consumption_current"])
self.co2_emissions_current : float = float(self.prepared_epc["co2_emissions_current"])
self.floor_description: str = self.prepared_epc["floor_description"]
self.lighting_description: str = self.prepared_epc["lighting_description"]
self.roof_description: str = self.prepared_epc["roof_description"]
self.mainheat_description: str = self.prepared_epc["mainheat_description"]
self.hotwater_description: str = self.prepared_epc["hotwater_description"]
self.main_fuel: str = self.prepared_epc["main_fuel"]
self.mechanical_ventilation: str = self.prepared_epc["mechanical_ventilation"]
self.secondheat_description: str = self.prepared_epc["secondheat_description"]
self.windows_description: str = self.prepared_epc["windows_description"]
self.glazed_type: str = self.prepared_epc["glazed_type"]
self.multi_glaze_proportion: float = float(self.prepared_epc["multi_glaze_proportion"])
self.low_energy_lighting: float = float(self.prepared_epc["low_energy_lighting"])
self.number_open_fireplaces: float = float(self.prepared_epc["number_open_fireplaces"])
self.mainheatcont_description: str = self.prepared_epc["mainheatcont_description"]
self.solar_water_heating_flag: str = self.prepared_epc["solar_water_heating_flag"]
self.photo_supply: float = float(self.prepared_epc["photo_supply"])
self.transaction_type: str = self.prepared_epc["transaction_type"]
self.energy_tariff: str = self.prepared_epc["energy_tariff"]
self.extension_count: float = float(self.prepared_epc["extension_count"])
self.total_floor_area: float = float(self.prepared_epc["total_floor_area"])
self.floor_height: float = float(self.prepared_epc["floor_height"])
self.hot_water_energy_eff: str = self.prepared_epc["hot_water_energy_eff"]
self.floor_energy_eff: str = self.prepared_epc["floor_energy_eff"]
self.windows_energy_eff: str = self.prepared_epc["windows_energy_eff"]
self.walls_energy_eff: str = self.prepared_epc["walls_energy_eff"]
self.sheating_energy_eff: str = self.prepared_epc["sheating_energy_eff"]
self.roof_energy_eff: str = self.prepared_epc["roof_energy_eff"]
self.mainheat_energy_eff: str = self.prepared_epc["mainheat_energy_eff"]
self.mainheatc_energy_eff: str = self.prepared_epc["mainheatc_energy_eff"]
self.lighting_energy_eff: str = self.prepared_epc["lighting_energy_eff"]
self.potential_energy_efficiency: float = float(self.prepared_epc["potential_energy_efficiency"])
self.environment_impact_potential: float = float(self.prepared_epc["environment_impact_potential"])
self.energy_consumption_potential: float = float(self.prepared_epc["energy_consumption_potential"])
self.co2_emissions_potential: float = float(self.prepared_epc["co2_emissions_potential"])
self.lodgement_date: str = self.prepared_epc["lodgement_date"]
self.current_energy_efficiency: int = int(self.prepared_epc["current_energy_efficiency"])
self.energy_consumption_current: int = int(self.prepared_epc["energy_consumption_current"])
self.co2_emissions_current: float = float(self.prepared_epc["co2_emissions_current"])
def _identify_delta_between_prepared_and_original_records(self):
"""
@ -258,14 +257,13 @@ class EPCRecord:
same_index = df.apply(pd.Series.duplicated).any()
self.prepared_epc_delta_metadata = df[same_index[~same_index].index]
def _expand_description_to_features(self):
pass
def _expand_description_to_uvalues(self):
# TODO: can be loop over all the descriptions, or done in one
pass
# def _process_and_prune(self, cleaned_lookup: dict):
# """
# This method will merge on the cleaned lookup table and ensure that the building fabric in the
@ -283,7 +281,7 @@ class EPCRecord:
# left_on_ending = (
# f"{component}_ending" if component == "main-fuel" else f"{component}_description_ending"
# )
# self.df2 = self.df.merge(
# pd.DataFrame(cleaned_lookup[cleaned_key]),
# how="left",
@ -296,7 +294,6 @@ class EPCRecord:
# right_on="original_description",
# suffixes=("", "_ending")
# )
def _clean_records_using_epc_records(self):
"""
@ -328,20 +325,21 @@ class EPCRecord:
# self._clean_energy_consumption_current()
# self._clean_co2_emissions_current()
def epc_record_as_dataframe(self, epc_type: str = "prepared_epc", use_upper_columns: bool = True, replace_empty_string: bool = False):
def epc_record_as_dataframe(self, epc_type: str = "prepared_epc", use_upper_columns: bool = True,
replace_empty_string: bool = False):
"""
This method will return the dataframe representation of the epc record
"""
df = pd.DataFrame.from_dict(self.get(epc_type), orient="index").T
if use_upper_columns:
df.columns = [x.upper().replace("-","_") for x in df.columns]
df.columns = [x.upper().replace("-", "_") for x in df.columns]
if replace_empty_string:
df = df.replace("", np.nan)
return df
def _clean_floor_level(self):
"""
This method will clean the floor level, if empty or invalid
@ -360,7 +358,7 @@ class EPCRecord:
"""
if not self.prepared_epc:
raise ValueError("EPC Recrod doesn not contain epc data")
if self.prepared_epc["fixed-lighting-outlets-count"] == "":
# We check old EPCs and the full SAP EPC
@ -380,13 +378,15 @@ class EPCRecord:
if lighting_data:
self.prepared_epc["fixed-lighting-outlets-count"] = round(np.median(lighting_data))
else:
# Use averages from the cleaning dataset, based on the property type, built form, construction age band and local authority
# Use averages from the cleaning dataset, based on the property type, built form, construction age
# band and local authority
cleaned_property_data = EPCDataProcessor.apply_averages_cleaning(
data_to_clean=self.epc_record_as_dataframe("prepared_epc", replace_empty_string=True),
cleaning_data=self.cleaning_data,
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
)
self.prepared_epc["fixed-lighting-outlets-count"] = round(cleaned_property_data["FIXED_LIGHTING_OUTLETS_COUNT"].values[0])
self.prepared_epc["fixed-lighting-outlets-count"] = round(
cleaned_property_data["FIXED_LIGHTING_OUTLETS_COUNT"].values[0])
else:
self.prepared_epc["fixed-lighting-outlets-count"] = float(self.prepared_epc["fixed-lighting-outlets-count"])
@ -402,7 +402,8 @@ class EPCRecord:
if self.construction_age_band is not None and self.construction_age_band not in DATA_ANOMALY_MATCHES:
result = result[(result["CONSTRUCTION_AGE_BAND"] == self.construction_age_band)]
if self.prepared_epc["built-form"] not in DATA_ANOMALY_MATCHES and self.prepared_epc["built-form"] in result["BUILT_FORM"]:
if self.prepared_epc["built-form"] not in DATA_ANOMALY_MATCHES and self.prepared_epc["built-form"] in result[
"BUILT_FORM"]:
result = result[(result["BUILT_FORM"] == self.prepared_epc["built-form"])]
return result[["NUMBER_HABITABLE_ROOMS", "TOTAL_FLOOR_AREA", "FLOOR_HEIGHT"]].mean()
@ -424,7 +425,8 @@ class EPCRecord:
self.property_dimensions = self._filter_property_dimensions(property_dimensions)
if not self.prepared_epc["number-habitable-rooms"]:
self.prepared_epc["number-habitable-rooms"] = float(self.property_dimensions["NUMBER_HABITABLE_ROOMS"].round())
self.prepared_epc["number-habitable-rooms"] = float(
self.property_dimensions["NUMBER_HABITABLE_ROOMS"].round())
else:
self.prepared_epc["number-habitable-rooms"] = float(self.prepared_epc["number-habitable-rooms"])
@ -451,7 +453,7 @@ class EPCRecord:
self.prepared_epc["total-floor-area"] = float(self.prepared_epc["total-floor-area"])
def _clean_mains_gas(self):
def _clean_mains_gas(self):
"""
This method will clean the mains gas, if empty or invalid
"""
@ -465,7 +467,7 @@ class EPCRecord:
self.prepared_epc["mains-gas-flag"] = None if (
self.prepared_epc["mains-gas-flag"] == "" or self.prepared_epc["mains-gas-flag"] in DATA_ANOMALY_MATCHES
) else map[self.prepared_epc["mains-gas-flag"]]
) else map[self.prepared_epc["mains-gas-flag"]]
def _clean_heat_loss_corridor(self):
"""
@ -480,10 +482,14 @@ class EPCRecord:
"heated corridor": False
}
self.prepared_epc["heat-loss-corridor"] = False if self.prepared_epc["heat-loss-corridor"] in DATA_ANOMALY_MATCHES else map[self.prepared_epc["heat-loss-corridor"]]
self.prepared_epc["unheated-corridor-length"] = float(self.prepared_epc["unheated-corridor-length"]) if self.prepared_epc["unheated-corridor-length"] != "" else None
self.prepared_epc["heat-loss-corridor"] = False if self.prepared_epc[
"heat-loss-corridor"] in DATA_ANOMALY_MATCHES else map[
self.prepared_epc["heat-loss-corridor"]]
self.prepared_epc["unheated-corridor-length"] = (
float(self.prepared_epc["unheated-corridor-length"]) if
self.prepared_epc["unheated-corridor-length"] != "" else None
)
def _clean_count_variables(self):
"""
@ -502,8 +508,6 @@ class EPCRecord:
null_attributes = ["number_of_storeys", "number_of_rooms"]
for attribute, epc_field in fields.items():
# TODO: check this
# value = self.data["extension-count"]
value = self.prepared_epc[epc_field]
if value == "" or value in DATA_ANOMALY_MATCHES:
if attribute in null_attributes:
@ -522,7 +526,8 @@ class EPCRecord:
if not self.prepared_epc:
raise ValueError("EPC Recrod doesn not contain epc data")
self.prepared_epc['wind-turbine-count'] = int(self.prepared_epc['wind-turbine-count']) if self.prepared_epc['wind-turbine-count'] != "" else None
self.prepared_epc['wind-turbine-count'] = int(self.prepared_epc['wind-turbine-count']) if self.prepared_epc[
'wind-turbine-count'] != "" else None
def _clean_solar_hot_water(self):
"""
@ -530,7 +535,7 @@ class EPCRecord:
"""
if not self.prepared_epc:
raise ValueError("EPC Recrod doesn not contain epc data")
value_map = {
"Y": True,
"N": False,
@ -546,7 +551,9 @@ class EPCRecord:
if not self.prepared_epc:
raise ValueError("EPC Recrod doesn not contain epc data")
self.prepared_epc['photo-supply'] = float(self.prepared_epc['photo-supply']) if self.prepared_epc['photo-supply'] != "" else None
self.prepared_epc['photo-supply'] = float(self.prepared_epc['photo-supply']) if self.prepared_epc[
'photo-supply'] != "" \
else None
def _clean_energy(self):
"""
@ -558,7 +565,6 @@ class EPCRecord:
self.prepared_epc['energy-consumption-current'] = float(self.prepared_epc["energy-consumption-current"])
self.prepared_epc['co2-emissions-current'] = float(self.prepared_epc["co2-emissions-current"])
def _clean_built_form(self):
"""
This method will clean the build form, if empty or invalid
@ -566,7 +572,8 @@ class EPCRecord:
if not self.prepared_epc:
raise ValueError("EPC Recrod doesn not contain epc data")
self.prepared_epc['built-form'] = BUILT_FORM_REMAP.get(self.prepared_epc["built-form"], self.prepared_epc["built-form"])
self.prepared_epc['built-form'] = BUILT_FORM_REMAP.get(self.prepared_epc["built-form"],
self.prepared_epc["built-form"])
if self.prepared_epc["built-form"] in DATA_ANOMALY_MATCHES:
if self.prepared_epc["property-type"] == "Flat":
self.prepared_epc["built-form"] = "Semi-Detached"
@ -578,7 +585,8 @@ class EPCRecord:
if not self.prepared_epc:
raise ValueError("EPC Recrod doesn not contain epc data")
self.construction_age_band = EPCDataProcessor.clean_construction_age_band(self.prepared_epc["construction-age-band"])
self.construction_age_band = EPCDataProcessor.clean_construction_age_band(
self.prepared_epc["construction-age-band"])
if self.construction_age_band in DATA_ANOMALY_MATCHES:
if self.old_data:
# Take the most recent
@ -586,7 +594,8 @@ class EPCRecord:
[old_record["lodgement-datetime"] for old_record in self.old_data if
old_record["construction-age-band"] not in DATA_ANOMALY_MATCHES]
)
most_recent = [old_record for old_record in self.old_data if old_record["lodgement-datetime"] == max_datetime]
most_recent = [old_record for old_record in self.old_data if
old_record["lodgement-datetime"] == max_datetime]
self.construction_age_band = EPCDataProcessor.clean_construction_age_band(
most_recent[0]["construction-age-band"]
@ -625,14 +634,15 @@ class EPCRecord:
"""
This method will clean the ventilation, if empty or invalid
"""
self.prepared_epc['mechanical-ventilation'] = None if (self.mechanical_ventilation == "" or self.mechanical_ventilation in DATA_ANOMALY_MATCHES) else self.mechanical_ventilation
self.prepared_epc['mechanical-ventilation'] = None if (
self.mechanical_ventilation == "" or self.mechanical_ventilation in DATA_ANOMALY_MATCHES) else (
self.mechanical_ventilation)
def _field_validation(self):
"""
This method will validate each of the fields in the EPC record
"""
for record_key, validation_config in self.validation_configuration.items():
# Get the variable named record key from self
field_value = self.__dict__[record_key]
@ -650,81 +660,89 @@ class EPCRecord:
"""
if not isinstance(field_value, str):
raise ValueError(f"Field {record_key} has value {field_value} which is not a string")
if 'function' in validation_config:
try:
validation_config['function'](field_value)
except:
raise ValueError(f"Field {record_key} has value {field_value} which does not pass the validation function {validation_config['function']}")
raise ValueError(
f"Field {record_key} has value {field_value} which does not pass the validation function "
f"{validation_config['function']}")
if validation_config['acceptable_values'] is not None:
if field_value not in validation_config['acceptable_values']:
raise ValueError(f"Field {record_key} has value {field_value} which is not in the acceptable values of {validation_config['acceptable_values']}")
raise ValueError(
f"Field {record_key} has value {field_value} which is not in the acceptable values of "
f"{validation_config['acceptable_values']}")
def _validate_float(self, record_key: str, field_value: Union[str, float], validation_config: dict):
"""
Validate a float field
"""
if not isinstance(field_value, float):
raise ValueError(f"Field {record_key} has value {field_value} which is not a float")
if 'function' in validation_config:
try:
validation_config['function'](field_value)
except:
raise ValueError(f"Field {record_key} has value {field_value} which does not pass the validation function {validation_config['function']}")
raise ValueError(
f"Field {record_key} has value {field_value} which does not pass the validation function "
f"{validation_config['function']}")
if validation_config['range'] is not None:
if field_value < validation_config['range'][0] or field_value > validation_config['range'][1]:
raise ValueError(f"Field {record_key} has value {field_value} which is not in the acceptable range of {validation_config['range']}")
raise ValueError(
f"Field {record_key} has value {field_value} which is not in the acceptable range of "
f"{validation_config['range']}")
def __sub__(self, other):
"""
This method will return the difference between two EPC records
"""
if not isinstance(other, EPCRecord):
raise ValueError("Can only subtract EPCRecord from EPCRecord")
difference_record = EPCDifferenceRecord(record1=self, record2=other, auto_sort=True)
return difference_record
def __gt__(self, other):
"""
This method will return True if the EPC record is greater than or equal to the other
"""
if not isinstance(other, EPCRecord):
raise ValueError("Can only compare EPCRecord to EPCRecord")
return self.__dict__[RDSAP_RESPONSE] > other.__dict__[RDSAP_RESPONSE]
def __ge__(self, other):
"""
This method will return True if the EPC record is greater than or equal to the other
"""
if not isinstance(other, EPCRecord):
raise ValueError("Can only compare EPCRecord to EPCRecord")
return self.__dict__[RDSAP_RESPONSE] >= other.__dict__[RDSAP_RESPONSE]
def __lt__(self, other):
"""
This method will return True if the EPC record is greater than or equal to the other
"""
if not isinstance(other, EPCRecord):
raise ValueError("Can only compare EPCRecord to EPCRecord")
return self.__dict__[RDSAP_RESPONSE] < other.__dict__[RDSAP_RESPONSE]
def __le__(self, other):
"""
This method will return True if the EPC record is greater than or equal to the other
"""
if not isinstance(other, EPCRecord):
raise ValueError("Can only compare EPCRecord to EPCRecord")
return self.__dict__[RDSAP_RESPONSE] <= other.__dict__[RDSAP_RESPONSE]
def get(self, key: Union[str, List[str]], return_asdict: bool = False, key_suffix: str | None = None) -> Any:
"""
This method will return the value of the key
@ -738,8 +756,8 @@ class EPCRecord:
if isinstance(key, list):
return [self.__dict__[x] if x in self.__dict__.keys() else None for x in key]
elif isinstance(key, str):
return self.__dict__[key] if key in self.__dict__.keys() else None
return self.__dict__[key] if key in self.__dict__.keys() else None
class EPCDifferenceRecord:
"""
@ -767,7 +785,6 @@ class EPCDifferenceRecord:
self._validate_difference_record()
# self._detect_fabric_consistency()
def _construct_difference_record(self):
"""
This method will construct the difference record between the two records
@ -778,8 +795,10 @@ class EPCDifferenceRecord:
carbon_change = self.record2.get(CARBON_RESPONSE) - self.record1.get(CARBON_RESPONSE)
component_variables = COMPONENT_FEATURES + EFFICIENCY_FEATURES
ending_record = self.record2.get(component_variables + ["lodgement_date"], return_asdict=True, key_suffix="_ending")
starting_record = self.record1.get(component_variables + ["lodgement_date"], return_asdict=True, key_suffix="_starting")
ending_record = self.record2.get(component_variables + ["lodgement_date"], return_asdict=True,
key_suffix="_ending")
starting_record = self.record1.get(component_variables + ["lodgement_date"], return_asdict=True,
key_suffix="_starting")
self.difference_record = {
"uprn": self.record1.get("uprn"),
@ -812,30 +831,30 @@ class EPCDifferenceRecord:
# if value < 0:
# raise ValueError(f"Difference record has negative value for {key}")
pass
def compare_fields_in_records(self, fields: List[str]):
"""
This method will compare the records, for specific fields
"""
all_equal = True
for field in fields:
if self.record1.get(field) != self.record2.get(field):
return False
if all_equal:
return True
def get(self, key: str):
"""
This method will return the value of the key
"""
return self.difference_record[key] if key in self.difference_record.keys() else None
return self.difference_record[key] if key in self.difference_record.keys() else None
def append_fixed_data(self, fixed_data: dict):
"""
This method will append fixed data to the difference record
"""
"""
self._validate_fixed_data(fixed_data)
self.difference_record.update(fixed_data)