Model/backend/onboarders/parity.py
2026-02-02 14:53:25 +00:00

490 lines
20 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from numpy import nan
from tqdm import tqdm
import pandas as pd
from backend.onboarders.mappings.property_type import parity_map as property_map
from backend.onboarders.mappings.age_band import parity_map as age_band_map
from backend.onboarders.mappings.built_form import parity_map as built_form_map
from backend.onboarders.epc_descriptions import EpcWallDescriptions, EpcConstructionAgeBand, EpcEfficiency, \
WALL_DESCRIPTION_EFFICIENCIES
from onboarders.epc_descriptions import EpcRoofDescriptions
tqdm.pandas()
def check_nulls(data, original_column, mapped_column):
# We only allow nulls if the oroginal value was null
null_vals = data[pd.isnull(data[mapped_column])]
if null_vals.empty:
return True
# We make sure all original values were null
assert pd.isnull(null_vals[original_column]).all(), (
f"Some values in {mapped_column} were not mapped, but original values were not null"
)
# Sample input data
data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
"- Data Extracts for Domna.xlsx",
sheet_name="Sustainability"
)
# We want to map the parity fields to standard EPC references. This will allow us to
# 1) Estimate EPCs, more accurately
# 2) Patch incorrect EPCs with ease
# 3) Indicate already installed measures
# ------------ construction_age_band ------------
data["construction_age_band"] = data["Construction Years"].map(age_band_map)
check_nulls(data, "Construction Years", "construction_age_band")
# ------------ property_type ------------
data["property_type"] = data["Type"].map(property_map)
assert pd.isnull(data["property_type"]).sum() == 0, "Some property types were not mapped"
# ------------ built_form ------------
data["built_form"] = data["Attachment"].map(built_form_map)
assert pd.isnull(data["built_form"]).sum() == 0, "Some built forms were not mapped"
# ------------ Wall Construction ------------
# Unique combindations
wall_mapping = {
# Cavity walls
('Cavity', 'FilledCavity'): EpcWallDescriptions.cavity_filled_cavity,
('Cavity', 'Internal'): EpcWallDescriptions.cavity_internal_insulation,
('Cavity', 'External'): EpcWallDescriptions.cavity_external_insulation,
('Cavity', 'FilledCavityPlusInternal'): EpcWallDescriptions.cavity_filled_plus_internal,
('Cavity', 'FilledCavityPlusExternal'): EpcWallDescriptions.cavity_filled_plus_external,
('Cavity', 'AsBuilt'): None, # To be classified
('Cavity', 'Unknown'): None, # To be classified
# System built walls
('System', 'External'): EpcWallDescriptions.system_external_insulation,
('System', 'Internal'): EpcWallDescriptions.system_internal_insulation,
('System', 'AsBuilt'): None, # To be classified
('System', 'Unknown'): None,
# Timber Frame walls
('Timber Frame', 'Internal'): EpcWallDescriptions.timber_frame_internal_insulation,
('Timber Frame', 'External'): EpcWallDescriptions.timber_frame_external_insulation,
('Timber Frame', 'AsBuilt'): None, # To be classified
('Timber Frame', 'Unknown'): None,
# Solid Brick walls
('Solid Brick', 'External'): EpcWallDescriptions.solid_brick_external_insulation,
('Solid Brick', 'Internal'): EpcWallDescriptions.solid_brick_internal_insulation,
('Solid Brick', 'AsBuilt'): None, # To be classified
('Solid Brick', 'Unknown'): None,
# Granite walls
('Granite', 'External'): EpcWallDescriptions.granite_whinstone_external_insulation,
("Granite", 'Internal'): EpcWallDescriptions.granite_whinstone_internal_insulation,
('Granite', 'AsBuilt'): None,
('Granite', 'Unknown'): None,
# Sandstone walls
('Sandstone', 'Internal'): EpcWallDescriptions.sandstone_limestone_internal_insulation,
('Sandstone', 'External'): EpcWallDescriptions.sandstone_limestone_external_insulation,
('Sandstone', 'Unknown'): None,
('Sandstone', 'AsBuilt'): None,
# Cob walls
('Cob', 'AsBuilt'): None,
}
def map_cavity_wall_insulation(age_band: EpcConstructionAgeBand):
if age_band.start_year() < 1976:
return EpcWallDescriptions.cavity_no_insulation_assumed
if age_band == EpcConstructionAgeBand.from_1976_to_1982:
return EpcWallDescriptions.cavity_partial_insulated_assumed
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
return EpcWallDescriptions.cavity_insulated_assumed
raise NotImplementedError(f"Age band {age_band} not handled for cavity wall as built insulation mapping")
def map_solid_wall_insulation(age_band: EpcConstructionAgeBand):
if age_band.start_year() < 1976:
return EpcWallDescriptions.solid_brick_no_insulation_assumed
if age_band == EpcConstructionAgeBand.from_1976_to_1982:
return EpcWallDescriptions.solid_brick_partial_insulated_assumed
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
return EpcWallDescriptions.solid_brick_insulated_assumed
raise NotImplementedError(
f"Age band {age_band.value} not handled for solid wall insulation mapping"
)
def map_timber_frame_wall_insulation(age_band: EpcConstructionAgeBand):
if age_band.start_year() < 1950:
return EpcWallDescriptions.timber_frame_no_insulation_assumed
if age_band.start_year() < 1976:
return EpcWallDescriptions.timber_frame_partial_insulated_assumed
if age_band in EpcConstructionAgeBand.from_year_onwards(1976):
return EpcWallDescriptions.timber_frame_insulated_assumed
raise NotImplementedError(
f"Age band {age_band.value} not handled for timber frame wall insulation mapping"
)
def map_system_build_wall_insulation(age_band: EpcConstructionAgeBand):
if age_band.start_year() < 1976:
return EpcWallDescriptions.system_no_insulation_assumed
if age_band == EpcConstructionAgeBand.from_1976_to_1982:
return EpcWallDescriptions.system_partial_insulated_assumed
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
return EpcWallDescriptions.system_insulated_assumed
raise NotImplementedError(
f"Age band {age_band.value} not handled for system build wall insulation mapping"
)
def map_granite_wall_insulation(age_band: EpcConstructionAgeBand):
if age_band.start_year() < 1976:
return EpcWallDescriptions.granite_whinstone_no_insulation_assumed
if age_band == EpcConstructionAgeBand.from_1976_to_1982:
return EpcWallDescriptions.granite_whinstone_partial_insulated_assumed
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
return EpcWallDescriptions.granite_whinestone_insulated_assumed
raise NotImplementedError(
f"Age band {age_band.value} not handled for granite wall insulation mapping"
)
def map_sandstone_wall_insulation(age_band: EpcConstructionAgeBand):
if age_band.start_year() < 1976:
return EpcWallDescriptions.sandstone_limestone_no_insulation_assumed
if age_band == EpcConstructionAgeBand.from_1976_to_1982:
return EpcWallDescriptions.sandstone_limestone_partial_insulated_assumed
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
return EpcWallDescriptions.sandstone_limestone_insulated_assumed
raise NotImplementedError(
f"Age band {age_band.value} not handled for sandstone wall insulation mapping"
)
def map_cob_wall_insulation(age_band: EpcConstructionAgeBand):
if age_band.start_year() < 1983:
return EpcWallDescriptions.cob_as_built_average
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
return EpcWallDescriptions.cob_as_built_good
raise NotImplementedError(
f"Age band {age_band.value} not handled for cob wall insulation mapping"
)
AS_BUILT_WALL_CLASSIFIERS = {
"Cavity": map_cavity_wall_insulation,
"Solid Brick": map_solid_wall_insulation,
"Timber Frame": map_timber_frame_wall_insulation,
"System": map_system_build_wall_insulation,
"Granite": map_granite_wall_insulation,
"Sandstone": map_sandstone_wall_insulation,
"Cob": map_cob_wall_insulation,
}
WALL_UNKNOWN_AGE_FALLBACK = {
"Cavity": EpcWallDescriptions.cavity_as_built_unknown,
"Solid Brick": EpcWallDescriptions.solid_brick_as_built_unknown,
"Timber Frame": EpcWallDescriptions.timber_frame_as_built_unknown,
"System": EpcWallDescriptions.system_as_built_unknown,
"Granite": EpcWallDescriptions.granite_as_built_unknown,
"Sandstone": EpcWallDescriptions.sandstone_as_built_unknown,
"Cob": EpcWallDescriptions.cob_as_built_unknown,
}
data["landlord_wall_description"] = (
data[["Wall Construction", "Wall Insulation"]]
.apply(tuple, axis=1)
.map(wall_mapping)
)
def fill_as_built(row):
# Already resolved via direct mapping
if row.landlord_wall_description is not None:
return row.landlord_wall_description
wall_type = row["Wall Construction"]
# Missing construction age → conservative fallback
if pd.isnull(row.construction_age_band):
return WALL_UNKNOWN_AGE_FALLBACK.get(wall_type)
classifier = AS_BUILT_WALL_CLASSIFIERS.get(wall_type)
if classifier is None:
return None
return classifier(row.construction_age_band)
def resolve_wall_efficiency(
description: EpcWallDescriptions,
age_band: EpcConstructionAgeBand | None,
) -> EpcEfficiency:
# Unknown / holding descriptions → efficiency unknown
if "unknown insulation" in description.value.lower():
return EpcEfficiency.NA
rule = WALL_DESCRIPTION_EFFICIENCIES.get(description)
if rule is None:
return EpcEfficiency.NA
if isinstance(rule, EpcEfficiency):
return rule
# Rule needs age band but we don't have one
if age_band is None or pd.isnull(age_band):
return EpcEfficiency.NA
return rule(age_band)
data["landlord_wall_description"] = data.progress_apply(fill_as_built, axis=1)
assert data["landlord_wall_description"].isnull().sum() == 0, (
"Some wall descriptions could not be resolved"
)
data["landlord_wall_efficiency"] = data.progress_apply(
lambda row: resolve_wall_efficiency(
row.landlord_wall_description,
row.construction_age_band,
),
axis=1,
)
# Sanity check
assert data["landlord_wall_efficiency"].isnull().sum() == 0
# ------------ Roof Construction ------------
roof_mapping = {
# Dwelling above
('AnotherDwellingAbove', 'Another Dwelling Above'): EpcRoofDescriptions.another_dwelling_above,
('SameDwellingAbove', 'Same Dwelling Above'): EpcRoofDescriptions.another_dwelling_above,
# Pitched, normal loft access, with a loft thickness
('PitchedNormalLoftAccess', 'mm25'): EpcRoofDescriptions.loft_25mm_insulation,
('PitchedNormalLoftAccess', 'mm50'): EpcRoofDescriptions.loft_50mm_insulation,
('PitchedNormalLoftAccess', 'mm75'): EpcRoofDescriptions.loft_75mm_insulation,
('PitchedNormalLoftAccess', 'mm100'): EpcRoofDescriptions.loft_100mm_insulation,
('PitchedNormalLoftAccess', 'mm150'): EpcRoofDescriptions.loft_150mm_insulation,
('PitchedNormalLoftAccess', 'mm200'): EpcRoofDescriptions.loft_200mm_insulation,
('PitchedNormalLoftAccess', 'mm250'): EpcRoofDescriptions.loft_250mm_insulation,
('PitchedNormalLoftAccess', 'mm270'): EpcRoofDescriptions.loft_270mm_insulation,
('PitchedNormalLoftAccess', 'mm300'): EpcRoofDescriptions.loft_300mm_insulation,
('PitchedNormalLoftAccess', 'mm350'): EpcRoofDescriptions.loft_350mm_insulation,
('PitchedNormalLoftAccess', 'mm400'): EpcRoofDescriptions.loft_400mm_plus_insulation,
# Pitched, no loft access, with a loft thickness
('PitchedNormalNoLoftAccess', 'mm25'): EpcRoofDescriptions.loft_25mm_insulation,
('PitchedNormalNoLoftAccess', 'mm50'): EpcRoofDescriptions.loft_50mm_insulation,
('PitchedNormalNoLoftAccess', 'mm75'): EpcRoofDescriptions.loft_75mm_insulation,
('PitchedNormalNoLoftAccess', 'mm100'): EpcRoofDescriptions.loft_100mm_insulation,
('PitchedNormalNoLoftAccess', 'mm150'): EpcRoofDescriptions.loft_150mm_insulation,
('PitchedNormalNoLoftAccess', 'mm200'): EpcRoofDescriptions.loft_200mm_insulation,
('PitchedNormalNoLoftAccess', 'mm250'): EpcRoofDescriptions.loft_250mm_insulation,
('PitchedNormalNoLoftAccess', 'mm270'): EpcRoofDescriptions.loft_270mm_insulation,
('PitchedNormalNoLoftAccess', 'mm300'): EpcRoofDescriptions.loft_300mm_insulation,
('PitchedNormalNoLoftAccess', 'mm350'): EpcRoofDescriptions.loft_350mm_insulation,
('PitchedNormalNoLoftAccess', 'mm400'): EpcRoofDescriptions.loft_400mm_plus_insulation,
# All pitched options with asbuilt or unknown got to EpcRoofDescriptions.pitched_insulated_assumed
# With access
('PitchedNormalLoftAccess', nan): EpcRoofDescriptions.pitched_insulated_assumed,
('PitchedNormalLoftAccess', 'AsBuilt'): EpcRoofDescriptions.pitched_insulated_assumed,
('PitchedNormalLoftAccess', 'Unknown'): EpcRoofDescriptions.pitched_insulated_assumed,
# No access
('PitchedNormalNoLoftAccess', nan): EpcRoofDescriptions.pitched_insulated_assumed,
('PitchedNormalNoLoftAccess', 'AsBuilt'): EpcRoofDescriptions.pitched_insulated_assumed,
('PitchedNormalNoLoftAccess', 'Unknown'): EpcRoofDescriptions.pitched_insulated_assumed,
# Flat
('Flat', 'NoInsulation'): EpcRoofDescriptions.flat_no_insulation,
# Flat - limited insulation
('Flat', '12mm'): EpcRoofDescriptions.flat_limited_insulation,
('Flat', 'mm25'): EpcRoofDescriptions.flat_limited_insulation,
('Flat', 'mm50'): EpcRoofDescriptions.flat_limited_insulation,
# Flat insulated
('Flat', 'mm75'): EpcRoofDescriptions.flat_insulated,
('Flat', 'mm100'): EpcRoofDescriptions.flat_insulated,
('Flat', 'mm150'): EpcRoofDescriptions.flat_insulated,
('Flat', 'mm200'): EpcRoofDescriptions.flat_insulated,
('Flat', 'mm250'): EpcRoofDescriptions.flat_insulated,
('Flat', 'mm300'): EpcRoofDescriptions.flat_insulated,
('Flat', 'mm350'): EpcRoofDescriptions.flat_insulated,
('Flat', 'mm400'): EpcRoofDescriptions.flat_insulated,
# Flat - as built or unknown
('Flat', 'AsBuilt'): None, # To be classified
('Flat', nan): None, # To be classified
('Flat', 'Unknown'): None, # To be classified
# 12mm = very poor & has limited insulation description
# 25, 50 = poor & has limited insulation description
# 75, 100, 125mm = average (Flat, insulated)
# 150, 175, 200, 225, 250mm = good (Flat, insulated)
# 270mm+ = very good (Flat, insulated)
# Thatched
('PitchedThatched', 'mm50'): EpcRoofDescriptions.thatched_with_additional_insulation,
('PitchedThatched', 'mm150'): EpcRoofDescriptions.thatched_with_additional_insulation,
('PitchedThatched', 'mm300'): EpcRoofDescriptions.thatched_with_additional_insulation,
('PitchedThatched', 'Unknown'): EpcRoofDescriptions.thatched, # efficiency classified based on age
# Sloping:
# Limited (12 very poor, 25-50 poor)
('PitchedWithSlopingCeiling', 'mm12'): EpcRoofDescriptions.sloping_pitched_limited_insulation,
('PitchedWithSlopingCeiling', 'mm25'): EpcRoofDescriptions.sloping_pitched_limited_insulation,
('PitchedWithSlopingCeiling', 'mm50'): EpcRoofDescriptions.sloping_pitched_limited_insulation,
# Insulated 75mm+ (75 - 125 average, 150 - 250 good, 270+ very good)
('PitchedWithSlopingCeiling', 'mm75'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm100'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm150'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm200'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm250'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm270'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm300'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm350'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm400'): EpcRoofDescriptions.sloping_pitched_insulated,
# As built/unknown
('PitchedWithSlopingCeiling', 'AsBuilt'): None, # To be classified
('PitchedWithSlopingCeiling', nan): None, # To be classified
('PitchedWithSlopingCeiling', 'Unknown'): None, #
}
def classify_flat_roof(age_band: EpcConstructionAgeBand) -> EpcRoofDescriptions:
"""
For a flat, as built roof, these are the breakdowns:
2023 onwards → Flat, insulated
20032022 → Flat, insulated
19832002 → Flat, insulated
19761982 → Flat, limited insulation
19671975 → Flat, limited insulation
19501966 and earlier → Flat, no insulation
:param age_band: Input age band
:return: EpcRoofDescriptions
"""
year = age_band.start_year()
if year >= 1983:
return EpcRoofDescriptions.flat_insulated
if year >= 1967:
return EpcRoofDescriptions.flat_limited_insulation
return EpcRoofDescriptions.flat_no_insulation
def classify_sloping_ceiling_roof(age_band: EpcConstructionAgeBand) -> EpcRoofDescriptions:
"""
For a sloping ceiling, as built roof, these are the breakdowns:
2023 onwards → Sloping pitched, insulated
20032022 → Sloping pitched, insulated
19832002 → Sloping pitched, insulated
19761982 → Sloping pitched, limited insulation
19671975 and earlier → Sloping pitched, no insulation
:param age_band: Input age band
:return: EpcRoofDescriptions
"""
year = age_band.start_year()
if year >= 1983:
return EpcRoofDescriptions.sloping_pitched_insulated
if year >= 1976:
return EpcRoofDescriptions.sloping_pitched_limited_insulation
return EpcRoofDescriptions.sloping_pitched_no_insulation
AS_BUILT_ROOF_CLASSIFIERS = {
# Only need to apply this to flat and sloping ceiling roofs
"Flat": classify_flat_roof,
"PitchedWithSlopingCeiling": classify_sloping_ceiling_roof,
}
ROOF_UNKNOWN_AGE_FALLBACK = {
"Flat": EpcRoofDescriptions.flat_as_built_unknown,
"PitchedWithSlopingCeiling": EpcRoofDescriptions.sloping_pitched_as_built_unknown,
"PitchedThatched": EpcRoofDescriptions.thatched_as_built_unknown,
"PitchedNormalLoftAccess": EpcRoofDescriptions.loft_as_built_unknown,
"PitchedNormalNoLoftAccess": EpcRoofDescriptions.loft_as_built_unknown,
}
def fill_roof_as_built(row):
# Already resolved
if not pd.isnull(row.landlord_roof_description):
return row.landlord_roof_description
roof_type = row["Roof Construction"]
classifier = AS_BUILT_ROOF_CLASSIFIERS.get(roof_type)
if classifier is None:
raise NotImplementedError(f"No roof classifier for roof type '{roof_type}'")
if pd.isnull(row.construction_age_band):
return ROOF_UNKNOWN_AGE_FALLBACK.get(roof_type)
output = classifier(row.construction_age_band)
if output is None:
raise NotImplementedError(
f"Roof classification returned None for roof type '{roof_type}'"
)
return output
data["landlord_roof_description"] = (
data[["Roof Construction", "Roof Insulation"]]
.progress_apply(tuple, axis=1)
.map(roof_mapping)
)
data["landlord_roof_description"] = data.progress_apply(
fill_roof_as_built,
axis=1,
)
# Sanity check
assert data["landlord_roof_description"].isnull().sum() == 0, (
"Some roof descriptions could not be resolved"
)
# TODO: 1) Map energy efficiency
# TODO: 2) Flag sloped ceilings
# Variables we want to map
# 'Org Ref', 'Address 1', 'Address 2', 'Address 3', 'Postcode', 'Type',
# 'Attachment', 'Construction Years',
# 'Roof Construction', 'Roof Insulation',
# 'Floor Construction', 'Floor Insulation', 'Glazing', 'Heating',
# 'Boiler Efficiency', 'Main Fuel', 'Controls Adequacy', 'UPRN',
# 'Total Floor Area (m2)'