mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
fixed bug in epc record cleaning"
This commit is contained in:
parent
5e8847d028
commit
f45260706e
8 changed files with 163 additions and 110 deletions
1
.idea/Model.iml
generated
1
.idea/Model.iml
generated
|
|
@ -6,6 +6,7 @@
|
|||
<sourceFolder url="file://$MODULE_DIR$/model_data" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/infrastructure/terraform/.terraform" />
|
||||
</content>
|
||||
<orderEntry type="jdk" jdkName="Fastapi-backend" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
|
|
|
|||
|
|
@ -73,25 +73,24 @@ def app():
|
|||
Property UPRN
|
||||
"""
|
||||
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lifespace Rentals/Missed"
|
||||
# data_filename = "For Modelling - Final - reviewed.xlsx"
|
||||
data_filename = "Missed Properties - with address.xlsx"
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/March 2026 SAL"
|
||||
data_filename = "Domna System Review - Livewest.xlsx"
|
||||
sheet_name = "Sheet1"
|
||||
postcode_column = "Postcode"
|
||||
address1_column = "address1"
|
||||
address1_method = None
|
||||
fulladdress_column = "address1"
|
||||
address1_column = None
|
||||
address1_method = "house_number_extraction"
|
||||
fulladdress_column = "Address"
|
||||
address_cols_to_concat = []
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = None
|
||||
landlord_os_uprn = "UPRN"
|
||||
landlord_property_type = "Type"
|
||||
landlord_built_form = None
|
||||
landlord_os_uprn = "gov UPRN"
|
||||
landlord_property_type = "AssetType"
|
||||
landlord_built_form = "AssetType"
|
||||
landlord_wall_construction = None
|
||||
landlord_roof_construction = None
|
||||
landlord_heating_system = None
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "Reference"
|
||||
landlord_property_id = "landlord_uprn"
|
||||
landlord_sap = None
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
|
|
|
|||
|
|
@ -173,6 +173,7 @@ def get_data(
|
|||
errors = []
|
||||
no_epc = []
|
||||
for _, home in tqdm(df.iterrows(), total=len(df)):
|
||||
|
||||
try:
|
||||
|
||||
# If we have a block of flats, we cannot retrieve this data
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ def _get_associated_records(results, uprn, uprn_key="UPRN"):
|
|||
return matched_record
|
||||
|
||||
|
||||
def get_associated_uprns(postcode_search: PostcodeSearch, uprn: str | int):
|
||||
def get_associated_uprns(postcode_search: Optional[PostcodeSearch], uprn: str | int):
|
||||
"""
|
||||
Given a postcode and UPRN, for a remote assessment, fetch all associated UPRNs, based
|
||||
on parent UPRN. This will be properties in the same building
|
||||
|
|
|
|||
|
|
@ -147,6 +147,10 @@ class PropertyModel(Base):
|
|||
is_sap_points_adjusted_for_installed_measures = Column(Boolean, default=False)
|
||||
original_sap_points = Column(Float)
|
||||
|
||||
# New for re-scoring - we will need to delete some of the redundant fields but there is a ticket for this
|
||||
lodged_sap_points = Column(Float)
|
||||
lodged_epc_rating = Column(Enum(Epc))
|
||||
|
||||
|
||||
class FeatureRating(enum.Enum):
|
||||
VERY_GOOD = 5
|
||||
|
|
@ -253,6 +257,12 @@ class PropertyDetailsEpcModel(Base):
|
|||
installed_measures_heat_demand_adjustment = Column(Float)
|
||||
is_epc_adjusted_for_installed_measures = Column(Boolean, default=False)
|
||||
|
||||
# New columns - we'll need to delete some of the redundant fields, associated to "already installed" but
|
||||
# we have a ticket for this piece of work
|
||||
lodged_co2_emissions = Column(Float)
|
||||
lodged_heat_demand = Column(Float)
|
||||
has_been_remodelled = Column(Boolean, default=False)
|
||||
|
||||
|
||||
class PropertyDetailsSpatial(Base):
|
||||
__tablename__ = "property_details_spatial"
|
||||
|
|
|
|||
|
|
@ -837,41 +837,41 @@ async def model_engine(body: PlanTriggerRequest):
|
|||
extract_uprn=True
|
||||
)
|
||||
|
||||
for idx, rebaselined_prediction in rebaselining_response["retrofit-sap-baseline-predictions"].iterrows():
|
||||
property_instance = next(p for p in input_properties if p.uprn == int(rebaselined_prediction["uprn"]))
|
||||
new_rating = rebaselined_prediction["predictions"]
|
||||
new_epc_rating = sap_to_epc(new_rating)
|
||||
# Insert
|
||||
# TODO: TEMP: Compare values
|
||||
compare_scores = []
|
||||
for x in rebaselining_scoring_data["uprn"].unique():
|
||||
record = [p for p in input_properties if p.uprn == x][0].epc_record
|
||||
original_sap = record.current_energy_efficiency
|
||||
new_sap = rebaselining_response["retrofit-sap-baseline-predictions"][
|
||||
rebaselining_response["retrofit-sap-baseline-predictions"]["uprn"] == x
|
||||
]["predictions"].values[0]
|
||||
lodgement_date = record.lodgement_date
|
||||
compare_scores.append({
|
||||
"uprn": x,
|
||||
"original_sap": original_sap,
|
||||
"new_sap": new_sap,
|
||||
"lodgement_date": lodgement_date
|
||||
})
|
||||
compare_scores = pd.DataFrame(compare_scores)
|
||||
|
||||
# property_instance.data["current-energy-efficiency"] = sap_to_epc(new_rating)
|
||||
for uprn in rebaselining_scoring_data["uprn"].unique():
|
||||
# Get the predictions
|
||||
sap_prediction = rebaselining_response["retrofit-sap-baseline-predictions"][
|
||||
rebaselining_response["retrofit-sap-baseline-predictions"]["uprn"] == uprn
|
||||
]["predictions"].values[0]
|
||||
|
||||
addr = [a for a in addresses if a.uprn == property_instance.uprn][0]
|
||||
landlord_remapping = {
|
||||
"total-floor-area": addr.landlord_total_floor_area_m2, # 1m tolerance on floor area to perform remap
|
||||
"property-type": addr.landlord_property_type,
|
||||
"built-form": addr.landlord_built_form,
|
||||
# Components
|
||||
"walls-description": addr.landlord_wall_construction,
|
||||
"roof-description": addr.landlord_roof_construction,
|
||||
"floor-description": addr.landlord_floor_construction,
|
||||
"windows-description": addr.landlord_windows_type,
|
||||
"main-fuel": addr.landlord_fuel_type,
|
||||
"mainheatcont-description": addr.landlord_heating_controls,
|
||||
"hotwater-description": addr.landlord_hot_water_system,
|
||||
# Efficiency
|
||||
"walls-energy-eff": addr.landlord_wall_efficiency,
|
||||
"roof-energy-eff": addr.landlord_roof_efficiency,
|
||||
"windows-energy-eff": addr.landlord_windows_efficiency,
|
||||
"mainheat-energy-eff": addr.landlord_heating_efficiency,
|
||||
"mainheatc-energy-eff": addr.landlord_heating_controls_efficiency,
|
||||
"hot-water-energy-eff": addr.landlord_hot_water_efficiency,
|
||||
"multi-glaze-proportion": addr.landlord_multi_glaze_proportion * 100, # TODO: Fix this!
|
||||
"construction-age-band": addr.landlord_construction_age_band,
|
||||
}
|
||||
carbon_prediction = 1337
|
||||
heat_demand_prediction = 1337
|
||||
|
||||
# Insert the re-baselined scores into the property data
|
||||
for p in input_properties:
|
||||
property_rebaselined_sap = rebaselining_response["retrofit-sap-baseline-predictions"]
|
||||
epc_prediction = sap_to_epc(sap_prediction)
|
||||
# We now need to insert the new values into the epc_record
|
||||
property_instance = next(p for p in input_properties if p.uprn == int(uprn))
|
||||
property_instance.epc_record.insert_new_performance_values(
|
||||
new_sap=sap_prediction,
|
||||
new_epc=epc_prediction,
|
||||
new_carbon=carbon_prediction,
|
||||
new_heat_demand=heat_demand_prediction,
|
||||
)
|
||||
|
||||
kwh_client = KwhData(bucket=get_settings().DATA_BUCKET, read_consumption_data=True)
|
||||
|
||||
|
|
@ -924,26 +924,6 @@ async def model_engine(body: PlanTriggerRequest):
|
|||
# We also make a tweak - if the property has been flagged for solar but doesn't contain
|
||||
# any panel performance, we ensure that we have a 3kWp and 4kWp option for the property
|
||||
|
||||
# TODO: Temp - test re-baselining
|
||||
p = input_properties[0]
|
||||
p.create_base_difference_epc_record(cleaned_lookup=cleaned)
|
||||
scoring_data = p.base_difference_record.df
|
||||
# We just need a recent date to trigger the right models,
|
||||
# as we are only interested in the deltas
|
||||
scoring_data["is_post_sap10_starting"] = True
|
||||
# Score model - SAP re-baselining model
|
||||
model_api.MODEL_URLS["retrofit-sap-baseline-predictions"] = "sapbaselinemodel"
|
||||
model_api.prediction_buckets["retrofit-sap-baseline-predictions"] = "retrofit-sap-baseline-predictions-dev"
|
||||
example_response = model_api.predict_all(
|
||||
df=scoring_data,
|
||||
bucket=get_settings().DATA_BUCKET,
|
||||
model_prefixes=["retrofit-sap-baseline-predictions"],
|
||||
extract_ids=False
|
||||
)
|
||||
|
||||
input_properties[0].data["current-energy-efficiency"] = 58.8
|
||||
input_properties[0].data["current-energy-rating"] = "D"
|
||||
|
||||
logger.info("Identifying property recommendations")
|
||||
recommendations, recommendations_scoring_data, representative_recommendations = {}, [], {}
|
||||
for p in tqdm(input_properties):
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
import re
|
||||
from dataclasses import fields
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from datetime import datetime
|
||||
|
|
@ -14,24 +15,24 @@ logger = setup_logger()
|
|||
|
||||
|
||||
class KwhData:
|
||||
COLS_TO_STRINGIFY = ["main-heating-controls", "floor-level"]
|
||||
COLS_TO_STRINGIFY = ["main_heating_controls", "floor_level"]
|
||||
|
||||
CATEGORICAL_COLUMNS = [
|
||||
"lodgement-year", "lodgement-month", "main-fuel", "mainheat-description", "number-heated-rooms",
|
||||
"number-habitable-rooms", "mainheat-energy-eff", "mainheatcont-description", "property-type",
|
||||
"built-form",
|
||||
"construction-age-band", "secondheat-description", "hotwater-description", "hot-water-energy-eff",
|
||||
"walls-description", "walls-energy-eff", "roof-description", "roof-energy-eff", "floor-description",
|
||||
"lodgement_year", "lodgement_month", "main_fuel", "mainheat_description", "number_heated_rooms",
|
||||
"number_habitable_rooms", "mainheat_energy_eff", "mainheatcont_description", "property_type",
|
||||
"built_form",
|
||||
"construction_age_band", "secondheat_description", "hotwater_description", "hot_water_energy_eff",
|
||||
"walls_description", "walls_energy_eff", "roof_description", "roof_energy_eff", "floor_description",
|
||||
"county",
|
||||
"windows-description", "windows-energy-eff", "flat-top-storey",
|
||||
"flat-storey-count", "unheated-corridor-length", "solar-water-heating-flag", "mechanical-ventilation",
|
||||
"low-energy-lighting", "environment-impact-current", "energy-tariff", "current-energy-rating",
|
||||
"floor-level"
|
||||
"windows_description", "windows_energy_eff", "flat_top_storey",
|
||||
"flat_storey_count", "unheated_corridor_length", "solar_water_heating_flag", "mechanical_ventilation",
|
||||
"low_energy_lighting", "environment_impact_current", "energy_tariff", "current_energy_rating",
|
||||
"floor_level"
|
||||
]
|
||||
|
||||
NUMERICAL_COLUMNS = [
|
||||
'heating-cost-current', 'total-floor-area', 'co2-emissions-current', 'energy-consumption-current',
|
||||
'heating-cost-potential', 'hot-water-cost-current', 'current-energy-efficiency'
|
||||
'heating_cost_current', 'total_floor_area', 'co2_emissions_current', 'energy_consumption_current',
|
||||
'heating_cost_potential', 'hot_water_cost_current', 'current_energy_efficiency'
|
||||
]
|
||||
|
||||
def __init__(self, bucket=None, read_consumption_data=False):
|
||||
|
|
@ -106,6 +107,16 @@ class KwhData:
|
|||
# If no match is found, return None or raise an exception
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _normalise_epc_keys(data):
|
||||
if isinstance(data, dict):
|
||||
return {key.replace("-", "_"): value for key, value in data.items()}
|
||||
|
||||
if isinstance(data, pd.DataFrame):
|
||||
return data.rename(columns=lambda column: column.replace("-", "_"))
|
||||
|
||||
raise TypeError("Expected dict or DataFrame")
|
||||
|
||||
def combine(self):
|
||||
"""
|
||||
Given the data that is collected containing the kwh values for heating and hot water, this method will combine
|
||||
|
|
@ -128,9 +139,9 @@ class KwhData:
|
|||
# We check that the retrieved energy consumption sufficiently matches the EPC data
|
||||
internal_dataset = []
|
||||
for x in data:
|
||||
epc_data = x["epc"]
|
||||
epc_sap = epc_data["current-energy-efficiency"]
|
||||
epc_potential_sap = epc_data["potential-energy-efficiency"]
|
||||
epc_data = self._normalise_epc_keys(x["epc"])
|
||||
epc_sap = epc_data["current_energy_efficiency"]
|
||||
epc_potential_sap = epc_data["potential_energy_efficiency"]
|
||||
# Make sure this matches the extracted sap
|
||||
if int(epc_sap) != int(x["current_epc_efficiency"]) or int(epc_potential_sap) != int(
|
||||
x["potential_epc_efficiency"]
|
||||
|
|
@ -171,7 +182,7 @@ class KwhData:
|
|||
|
||||
# We also estimate the energy consumption reduction from this data, by band
|
||||
df["total_consumption"] = df["heating_kwh"] + df["hot_water_kwh"]
|
||||
consumption_averages = df.groupby("current-energy-efficiency")["total_consumption"].mean().reset_index()
|
||||
consumption_averages = df.groupby("current_energy_efficiency")["total_consumption"].mean().reset_index()
|
||||
df = df.drop(columns=["total_consumption"])
|
||||
|
||||
self.consumption_averages_filepath = f"energy_consumption/{self.run_date}/consumption_averages.parquet"
|
||||
|
|
@ -203,9 +214,11 @@ class KwhData:
|
|||
# TODO: New is a temporary parameter, which will transform the epc descriptions to their transformed features
|
||||
# in anticipation of the new model
|
||||
|
||||
data["lodgement-date"] = pd.to_datetime(data["lodgement-date"])
|
||||
data["lodgement-year"] = data["lodgement-date"].dt.year
|
||||
data["lodgement-month"] = data["lodgement-date"].dt.month
|
||||
data = self._normalise_epc_keys(data.copy())
|
||||
|
||||
data["lodgement_date"] = pd.to_datetime(data["lodgement_date"])
|
||||
data["lodgement_year"] = data["lodgement_date"].dt.year
|
||||
data["lodgement_month"] = data["lodgement_date"].dt.month
|
||||
|
||||
# For walls, roof, floor description where we have average thermal transmittance, to avoid too many
|
||||
# categories
|
||||
|
|
@ -231,8 +244,10 @@ class KwhData:
|
|||
thermal_transmittance_lookup_table["from"] = thermal_transmittance_lookup_table["from"].astype(str)
|
||||
|
||||
# Apply the lookup table to the data
|
||||
for feature in ["walls-description", "roof-description", "floor-description"]:
|
||||
cleaned_df = pd.DataFrame(cleaned[feature])[["original_description", "thermal_transmittance"]]
|
||||
for feature in ["walls_description", "roof_description", "floor_description"]:
|
||||
cleaned_df = pd.DataFrame(
|
||||
cleaned[feature.replace("_", "-")]
|
||||
)[["original_description", "thermal_transmittance"]]
|
||||
# Round to 2 decimal places and convert to string
|
||||
cleaned_df["thermal_transmittance"] = cleaned_df["thermal_transmittance"].round(2).astype(str)
|
||||
|
||||
|
|
@ -261,10 +276,10 @@ class KwhData:
|
|||
data[self.CATEGORICAL_COLUMNS] = data[self.CATEGORICAL_COLUMNS].astype(str)
|
||||
|
||||
# Create new features:
|
||||
data['estimate_annual_kwh'] = data['energy-consumption-current'] * data['total-floor-area']
|
||||
data['estimate_annual_kwh'] = data['energy_consumption_current'] * data['total_floor_area']
|
||||
|
||||
# Ensure this is string, because we could have mixed types
|
||||
data["lodgement-datetime"] = data["lodgement-datetime"].astype(str)
|
||||
data["lodgement_datetime"] = data["lodgement_datetime"].astype(str)
|
||||
|
||||
if save:
|
||||
self.model_training_data_filepath = f"energy_consumption/{self.run_date}/training_data.parquet"
|
||||
|
|
@ -286,29 +301,39 @@ class KwhData:
|
|||
data is in the format required by the model
|
||||
:return:
|
||||
"""
|
||||
|
||||
epc = p.data.copy()
|
||||
numeric_cols = [
|
||||
'current-energy-efficiency',
|
||||
'potential-energy-efficiency', 'environment-impact-current',
|
||||
'environment-impact-potential', 'energy-consumption-current',
|
||||
'energy-consumption-potential', 'co2-emissions-current',
|
||||
'co2-emiss-curr-per-floor-area', 'co2-emissions-potential',
|
||||
'lighting-cost-current', 'lighting-cost-potential',
|
||||
'heating-cost-current', 'heating-cost-potential',
|
||||
'hot-water-cost-current', 'hot-water-cost-potential',
|
||||
'total-floor-area', 'multi-glaze-proportion',
|
||||
'extension-count', 'number-habitable-rooms', 'number-heated-rooms',
|
||||
'low-energy-lighting', 'number-open-fireplaces',
|
||||
'wind-turbine-count', 'unheated-corridor-length',
|
||||
'floor-height', 'photo-supply', 'fixed-lighting-outlets-count',
|
||||
'low-energy-fixed-light-count',
|
||||
'current_energy_efficiency',
|
||||
'potential_energy_efficiency', 'environment_impact_current',
|
||||
'environment_impact_potential', 'energy_consumption_current',
|
||||
'energy_consumption_potential', 'co2_emissions_current',
|
||||
'co2_emiss_curr_per_floor_area', 'co2_emissions_potential',
|
||||
'lighting_cost_current', 'lighting_cost_potential',
|
||||
'heating_cost_current', 'heating_cost_potential',
|
||||
'hot_water_cost_current', 'hot_water_cost_potential',
|
||||
'total_floor_area', 'multi_glaze_proportion',
|
||||
'extension_count', 'number_habitable_rooms', 'number_heated_rooms',
|
||||
'low_energy_lighting', 'number_open_fireplaces',
|
||||
'wind_turbine_count', 'unheated_corridor_length',
|
||||
'floor_height', 'photo_supply', 'fixed_lighting_outlets_count',
|
||||
'low_energy_fixed_light_count',
|
||||
]
|
||||
required_cols = set(numeric_cols + KwhData.CATEGORICAL_COLUMNS + [
|
||||
"uprn", "lodgement_date", "lodgement_datetime", "floor_energy_eff"
|
||||
])
|
||||
|
||||
epc_record = p.epc_record
|
||||
available_fields = {field.name for field in fields(epc_record)}
|
||||
missing_fields = required_cols - available_fields
|
||||
if missing_fields:
|
||||
raise ValueError(f"Missing EPCRecord fields required by KwhData: {sorted(missing_fields)}")
|
||||
|
||||
epc = {field_name: getattr(epc_record, field_name) for field_name in required_cols}
|
||||
|
||||
for v in numeric_cols:
|
||||
if epc[v] is not None:
|
||||
epc[v] = float(epc[v])
|
||||
|
||||
bools_to_remap = ['mains-gas-flag', 'flat-top-storey']
|
||||
bools_to_remap = ['mains_gas_flag', 'flat_top_storey']
|
||||
bool_map = {
|
||||
True: "Y",
|
||||
False: "N",
|
||||
|
|
@ -320,8 +345,8 @@ class KwhData:
|
|||
epc[v] = bool_map[epc[v]]
|
||||
|
||||
no_data = {
|
||||
"floor-level": "NODATA!",
|
||||
"floor-energy-eff": "NO DATA!"
|
||||
"floor_level": "NODATA!",
|
||||
"floor_energy_eff": "NO DATA!"
|
||||
}
|
||||
for v, fill_val in no_data.items():
|
||||
if pd.isnull(epc[v]):
|
||||
|
|
@ -331,8 +356,8 @@ class KwhData:
|
|||
|
||||
def prepare_epc(self, input_properties: list[Property]):
|
||||
scoring_data = pd.DataFrame([self._prepare_epc(p) for p in input_properties])
|
||||
scoring_data["lodgement-year"] = pd.to_datetime(scoring_data["lodgement-date"]).dt.year
|
||||
scoring_data["lodgement-month"] = pd.to_datetime(scoring_data["lodgement-date"]).dt.month
|
||||
scoring_data["lodgement_year"] = pd.to_datetime(scoring_data["lodgement_date"]).dt.year
|
||||
scoring_data["lodgement_month"] = pd.to_datetime(scoring_data["lodgement_date"]).dt.month
|
||||
|
||||
scoring_data["id"] = scoring_data["uprn"].copy()
|
||||
|
||||
|
|
|
|||
|
|
@ -309,6 +309,7 @@ class EPCRecord:
|
|||
# Indicates if the EPC record has been predicted. By default, false
|
||||
estimated: Optional[bool] = False
|
||||
sap_05_overwritten: Optional[bool] = False
|
||||
has_been_remodelled: Optional[bool] = False
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# MODEL FLAGS
|
||||
|
|
@ -386,6 +387,35 @@ class EPCRecord:
|
|||
|
||||
return
|
||||
|
||||
def insert_new_performance_values(
|
||||
self, new_sap: float, new_epc: float, new_carbon: float, new_heat_demand: float,
|
||||
):
|
||||
"""
|
||||
Given re-modelling for this property, is used to insert the new values and also keep a record of the
|
||||
fact that re-modelling has taken place
|
||||
:param new_sap:
|
||||
:param new_epc:
|
||||
:param new_carbon:
|
||||
:param new_heat_demand:
|
||||
:return:
|
||||
"""
|
||||
|
||||
self.has_been_remodelled = True
|
||||
# Update prepared epc
|
||||
update_data = {
|
||||
"current_energy_efficiency": new_sap,
|
||||
"current_energy_rating": new_epc,
|
||||
"co2_emissions_current": new_carbon,
|
||||
"energy_consumption_current": new_heat_demand,
|
||||
}
|
||||
# Validate we're updating correct fields
|
||||
for k in update_data:
|
||||
if k not in self._prepared_epc:
|
||||
raise ValueError(f"Attempting to update unknown field '{k}' in prepared EPC")
|
||||
self._prepared_epc.update(update_data)
|
||||
# Update dataclass attributes
|
||||
self._expand_prepared_epc_to_attributes()
|
||||
|
||||
def _apply_averages_cleaning(self) -> None:
|
||||
"""
|
||||
Fills missing property dimension values using medians from cleaning_data.
|
||||
|
|
@ -626,6 +656,10 @@ class EPCRecord:
|
|||
# Ignore keys that are not part of the dataclass schema
|
||||
continue
|
||||
|
||||
if value is None:
|
||||
setattr(self, key, None)
|
||||
continue
|
||||
|
||||
try:
|
||||
cast_value = self._cast_value(value, field_map[key].type)
|
||||
setattr(self, key, cast_value)
|
||||
|
|
@ -812,14 +846,17 @@ class EPCRecord:
|
|||
(property_dimensions["PROPERTY_TYPE"] == self._prepared_epc["property-type"])
|
||||
]
|
||||
|
||||
if self.construction_age_band not in DATA_ANOMALY_MATCHES:
|
||||
if (
|
||||
(self.construction_age_band not in DATA_ANOMALY_MATCHES) and
|
||||
(self.construction_age_band in result["CONSTRUCTION_AGE_BAND"].values)
|
||||
):
|
||||
result = result[
|
||||
(result["CONSTRUCTION_AGE_BAND"] == self.construction_age_band)
|
||||
]
|
||||
|
||||
if (
|
||||
self._prepared_epc["built-form"] not in DATA_ANOMALY_MATCHES
|
||||
and self._prepared_epc["built-form"] in result["BUILT_FORM"]
|
||||
and self._prepared_epc["built-form"] in result["BUILT_FORM"].values
|
||||
):
|
||||
result = result[(result["BUILT_FORM"] == self._prepared_epc["built-form"])]
|
||||
|
||||
|
|
@ -935,7 +972,7 @@ class EPCRecord:
|
|||
|
||||
self._prepared_epc["unheated-corridor-length"] = (
|
||||
float(self._prepared_epc["unheated-corridor-length"])
|
||||
if self._prepared_epc["unheated-corridor-length"] not in ["", None]
|
||||
if self._prepared_epc["unheated-corridor-length"] not in DATA_ANOMALY_MATCHES
|
||||
else None
|
||||
)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue