Model/backend/onboarders/parity.py
2026-01-29 19:21:14 +00:00

256 lines
9.5 KiB
Python

from tqdm import tqdm
import pandas as pd
from backend.onboarders.mappings.property_type import parity_map as property_map
from backend.onboarders.mappings.age_band import parity_map as age_band_map
from backend.onboarders.mappings.built_form import parity_map as built_form_map
from backend.onboarders.epc_descriptions import EpcWallDescriptions, EpcConstructionAgeBand
tqdm.pandas()
def check_nulls(data, original_column, mapped_column):
# We only allow nulls if the oroginal value was null
null_vals = data[pd.isnull(data[mapped_column])]
if null_vals.empty:
return True
# We make sure all original values were null
assert pd.isnull(null_vals[original_column]).all(), (
f"Some values in {mapped_column} were not mapped, but original values were not null"
)
# Sample input data
data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
"- Data Extracts for Domna.xlsx",
sheet_name="Sustainability"
)
# We want to map the parity fields to standard EPC references. This will allow us to
# 1) Estimate EPCs, more accurately
# 2) Patch incorrect EPCs with ease
# 3) Indicate already installed measures
# ------------ construction_age_band ------------
data["construction_age_band"] = data["Construction Years"].map(age_band_map)
check_nulls(data, "Construction Years", "construction_age_band")
# ------------ property_type ------------
data["property_type"] = data["Type"].map(property_map)
assert pd.isnull(data["property_type"]).sum() == 0, "Some property types were not mapped"
# ------------ built_form ------------
data["built_form"] = data["Attachment"].map(built_form_map)
assert pd.isnull(data["built_form"]).sum() == 0, "Some built forms were not mapped"
# ------------ Wall Construction ------------
# Unique combindations
wall_mapping = {
# Cavity walls
('Cavity', 'FilledCavity'): EpcWallDescriptions.cavity_filled_cavity,
('Cavity', 'Internal'): EpcWallDescriptions.cavity_internal_insulation,
('Cavity', 'External'): EpcWallDescriptions.cavity_external_insulation,
('Cavity', 'FilledCavityPlusInternal'): EpcWallDescriptions.cavity_filled_plus_internal,
('Cavity', 'FilledCavityPlusExternal'): EpcWallDescriptions.cavity_filled_plus_external,
('Cavity', 'AsBuilt'): None, # To be classified
('Cavity', 'Unknown'): None, # To be classified
# System built walls
('System', 'External'): EpcWallDescriptions.system_external_insulation,
('System', 'Internal'): EpcWallDescriptions.system_internal_insulation,
('System', 'AsBuilt'): None, # To be classified
('System', 'Unknown'): None,
# Timber Frame walls
('Timber Frame', 'Internal'): EpcWallDescriptions.timber_frame_internal_insulation,
('Timber Frame', 'External'): EpcWallDescriptions.timber_frame_external_insulation,
('Timber Frame', 'AsBuilt'): None, # To be classified
('Timber Frame', 'Unknown'): None,
# Solid Brick walls
('Solid Brick', 'External'): EpcWallDescriptions.solid_brick_external_insulation,
('Solid Brick', 'Internal'): EpcWallDescriptions.solid_brick_internal_insulation,
('Solid Brick', 'AsBuilt'): None, # To be classified
('Solid Brick', 'Unknown'): None,
# Granite walls
('Granite', 'External'): EpcWallDescriptions.granite_whinstone_external_insulation,
("Granite", 'Internal'): EpcWallDescriptions.granite_whinstone_internal_insulation,
('Granite', 'AsBuilt'): None,
('Granite', 'Unknown'): None,
# Sandstone walls
('Sandstone', 'Internal'): EpcWallDescriptions.sandstone_limestone_internal_insulation,
('Sandstone', 'External'): EpcWallDescriptions.sandstone_limestone_external_insulation,
('Sandstone', 'Unknown'): None,
('Sandstone', 'AsBuilt'): None,
# Cob walls
('Cob', 'AsBuilt'): None,
}
def map_cavity_wall_insulation(age_band: EpcConstructionAgeBand):
if age_band.start_year() < 1976:
return EpcWallDescriptions.cavity_no_insulation_assumed
if age_band == EpcConstructionAgeBand.from_1976_to_1982:
return EpcWallDescriptions.cavity_partial_insulated_assumed
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
return EpcWallDescriptions.cavity_insulated_assumed
raise NotImplementedError(f"Age band {age_band} not handled for cavity wall as built insulation mapping")
def map_solid_wall_insulation(age_band: EpcConstructionAgeBand):
if age_band.start_year() < 1976:
return EpcWallDescriptions.solid_brick_no_insulation_assumed
if age_band == EpcConstructionAgeBand.from_1976_to_1982:
return EpcWallDescriptions.solid_brick_partial_insulated_assumed
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
return EpcWallDescriptions.solid_brick_insulated_assumed
raise NotImplementedError(
f"Age band {age_band.value} not handled for solid wall insulation mapping"
)
def map_timber_frame_wall_insulation(age_band: EpcConstructionAgeBand):
if age_band.start_year() < 1950:
return EpcWallDescriptions.timber_frame_no_insulation_assumed
if age_band.start_year() < 1976:
return EpcWallDescriptions.timber_frame_partial_insulated_assumed
if age_band in EpcConstructionAgeBand.from_year_onwards(1976):
return EpcWallDescriptions.timber_frame_insulated_assumed
raise NotImplementedError(
f"Age band {age_band.value} not handled for timber frame wall insulation mapping"
)
def map_system_build_wall_insulation(age_band: EpcConstructionAgeBand):
if age_band.start_year() < 1976:
return EpcWallDescriptions.system_no_insulation_assumed
if age_band == EpcConstructionAgeBand.from_1976_to_1982:
return EpcWallDescriptions.system_partial_insulated_assumed
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
return EpcWallDescriptions.system_insulated_assumed
raise NotImplementedError(
f"Age band {age_band.value} not handled for system build wall insulation mapping"
)
def map_granite_wall_insulation(age_band: EpcConstructionAgeBand):
if age_band.start_year() < 1976:
return EpcWallDescriptions.granite_whinstone_no_insulation_assumed
if age_band == EpcConstructionAgeBand.from_1976_to_1982:
return EpcWallDescriptions.granite_whinstone_partial_insulated_assumed
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
return EpcWallDescriptions.granite_whinestone_insulated_assumed
raise NotImplementedError(
f"Age band {age_band.value} not handled for granite wall insulation mapping"
)
def map_sandstone_wall_insulation(age_band: EpcConstructionAgeBand):
if age_band.start_year() < 1976:
return EpcWallDescriptions.sandstone_limestone_no_insulation_assumed
if age_band == EpcConstructionAgeBand.from_1976_to_1982:
return EpcWallDescriptions.sandstone_limestone_partial_insulated_assumed
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
return EpcWallDescriptions.sandstone_limestone_insulated_assumed
raise NotImplementedError(
f"Age band {age_band.value} not handled for sandstone wall insulation mapping"
)
def map_cob_wall_insulation(age_band: EpcConstructionAgeBand):
if age_band.start_year() < 1983:
return EpcWallDescriptions.cob_as_built_average
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
return EpcWallDescriptions.cob_as_built_good
raise NotImplementedError(
f"Age band {age_band.value} not handled for cob wall insulation mapping"
)
AS_BUILT_WALL_CLASSIFIERS = {
"Cavity": map_cavity_wall_insulation,
"Solid Brick": map_solid_wall_insulation,
"Timber Frame": map_timber_frame_wall_insulation,
"System": map_system_build_wall_insulation,
"Granite": map_granite_wall_insulation,
"Sandstone": map_sandstone_wall_insulation,
"Cob": map_cob_wall_insulation,
}
UNKNOWN_AGE_FALLBACK = {
"Cavity": EpcWallDescriptions.cavity_as_built_unknown,
"Solid Brick": EpcWallDescriptions.solid_brick_as_built_unknown,
"Timber Frame": EpcWallDescriptions.timber_frame_as_built_unknown,
"System": EpcWallDescriptions.system_as_built_unknown,
"Granite": EpcWallDescriptions.granite_as_built_unknown,
"Sandstone": EpcWallDescriptions.sandstone_as_built_unknown,
"Cob": EpcWallDescriptions.cob_as_built_unknown,
}
data["landlord_wall_description"] = (
data[["Wall Construction", "Wall Insulation"]]
.apply(tuple, axis=1)
.map(wall_mapping)
)
def fill_as_built(row):
# Already resolved via direct mapping
if row.landlord_wall_description is not None:
return row.landlord_wall_description
wall_type = row["Wall Construction"]
# Missing construction age → conservative fallback
if pd.isnull(row.construction_age_band):
return UNKNOWN_AGE_FALLBACK.get(wall_type)
classifier = AS_BUILT_WALL_CLASSIFIERS.get(wall_type)
if classifier is None:
return None
return classifier(row.construction_age_band)
data["landlord_wall_description"] = data.progress_apply(fill_as_built, axis=1)
assert data["landlord_wall_description"].isnull().sum() == 0, (
"Some wall descriptions could not be resolved"
)
# Variables we want to map
# 'Org Ref', 'Address 1', 'Address 2', 'Address 3', 'Postcode', 'Type',
# 'Attachment', 'Construction Years', 'Wall Construction',
# 'Wall Insulation', 'Roof Construction', 'Roof Insulation',
# 'Floor Construction', 'Floor Insulation', 'Glazing', 'Heating',
# 'Boiler Efficiency', 'Main Fuel', 'Controls Adequacy', 'UPRN',
# 'Total Floor Area (m2)'