mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
256 lines
9.5 KiB
Python
256 lines
9.5 KiB
Python
from tqdm import tqdm
|
|
import pandas as pd
|
|
from backend.onboarders.mappings.property_type import parity_map as property_map
|
|
from backend.onboarders.mappings.age_band import parity_map as age_band_map
|
|
from backend.onboarders.mappings.built_form import parity_map as built_form_map
|
|
from backend.onboarders.epc_descriptions import EpcWallDescriptions, EpcConstructionAgeBand
|
|
|
|
tqdm.pandas()
|
|
|
|
|
|
def check_nulls(data, original_column, mapped_column):
|
|
# We only allow nulls if the oroginal value was null
|
|
null_vals = data[pd.isnull(data[mapped_column])]
|
|
if null_vals.empty:
|
|
return True
|
|
# We make sure all original values were null
|
|
assert pd.isnull(null_vals[original_column]).all(), (
|
|
f"Some values in {mapped_column} were not mapped, but original values were not null"
|
|
)
|
|
|
|
|
|
# Sample input data
|
|
data = pd.read_excel(
|
|
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
|
|
"- Data Extracts for Domna.xlsx",
|
|
sheet_name="Sustainability"
|
|
)
|
|
|
|
# We want to map the parity fields to standard EPC references. This will allow us to
|
|
# 1) Estimate EPCs, more accurately
|
|
# 2) Patch incorrect EPCs with ease
|
|
# 3) Indicate already installed measures
|
|
|
|
# ------------ construction_age_band ------------
|
|
|
|
data["construction_age_band"] = data["Construction Years"].map(age_band_map)
|
|
|
|
check_nulls(data, "Construction Years", "construction_age_band")
|
|
|
|
# ------------ property_type ------------
|
|
data["property_type"] = data["Type"].map(property_map)
|
|
|
|
assert pd.isnull(data["property_type"]).sum() == 0, "Some property types were not mapped"
|
|
|
|
# ------------ built_form ------------
|
|
data["built_form"] = data["Attachment"].map(built_form_map)
|
|
|
|
assert pd.isnull(data["built_form"]).sum() == 0, "Some built forms were not mapped"
|
|
|
|
# ------------ Wall Construction ------------
|
|
|
|
|
|
# Unique combindations
|
|
wall_mapping = {
|
|
# Cavity walls
|
|
('Cavity', 'FilledCavity'): EpcWallDescriptions.cavity_filled_cavity,
|
|
('Cavity', 'Internal'): EpcWallDescriptions.cavity_internal_insulation,
|
|
('Cavity', 'External'): EpcWallDescriptions.cavity_external_insulation,
|
|
('Cavity', 'FilledCavityPlusInternal'): EpcWallDescriptions.cavity_filled_plus_internal,
|
|
('Cavity', 'FilledCavityPlusExternal'): EpcWallDescriptions.cavity_filled_plus_external,
|
|
('Cavity', 'AsBuilt'): None, # To be classified
|
|
('Cavity', 'Unknown'): None, # To be classified
|
|
|
|
# System built walls
|
|
('System', 'External'): EpcWallDescriptions.system_external_insulation,
|
|
('System', 'Internal'): EpcWallDescriptions.system_internal_insulation,
|
|
('System', 'AsBuilt'): None, # To be classified
|
|
('System', 'Unknown'): None,
|
|
|
|
# Timber Frame walls
|
|
('Timber Frame', 'Internal'): EpcWallDescriptions.timber_frame_internal_insulation,
|
|
('Timber Frame', 'External'): EpcWallDescriptions.timber_frame_external_insulation,
|
|
('Timber Frame', 'AsBuilt'): None, # To be classified
|
|
('Timber Frame', 'Unknown'): None,
|
|
|
|
# Solid Brick walls
|
|
('Solid Brick', 'External'): EpcWallDescriptions.solid_brick_external_insulation,
|
|
('Solid Brick', 'Internal'): EpcWallDescriptions.solid_brick_internal_insulation,
|
|
('Solid Brick', 'AsBuilt'): None, # To be classified
|
|
('Solid Brick', 'Unknown'): None,
|
|
|
|
# Granite walls
|
|
('Granite', 'External'): EpcWallDescriptions.granite_whinstone_external_insulation,
|
|
("Granite", 'Internal'): EpcWallDescriptions.granite_whinstone_internal_insulation,
|
|
('Granite', 'AsBuilt'): None,
|
|
('Granite', 'Unknown'): None,
|
|
|
|
# Sandstone walls
|
|
('Sandstone', 'Internal'): EpcWallDescriptions.sandstone_limestone_internal_insulation,
|
|
('Sandstone', 'External'): EpcWallDescriptions.sandstone_limestone_external_insulation,
|
|
('Sandstone', 'Unknown'): None,
|
|
('Sandstone', 'AsBuilt'): None,
|
|
|
|
# Cob walls
|
|
('Cob', 'AsBuilt'): None,
|
|
}
|
|
|
|
|
|
def map_cavity_wall_insulation(age_band: EpcConstructionAgeBand):
|
|
if age_band.start_year() < 1976:
|
|
return EpcWallDescriptions.cavity_no_insulation_assumed
|
|
|
|
if age_band == EpcConstructionAgeBand.from_1976_to_1982:
|
|
return EpcWallDescriptions.cavity_partial_insulated_assumed
|
|
|
|
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
|
|
return EpcWallDescriptions.cavity_insulated_assumed
|
|
|
|
raise NotImplementedError(f"Age band {age_band} not handled for cavity wall as built insulation mapping")
|
|
|
|
|
|
def map_solid_wall_insulation(age_band: EpcConstructionAgeBand):
|
|
if age_band.start_year() < 1976:
|
|
return EpcWallDescriptions.solid_brick_no_insulation_assumed
|
|
|
|
if age_band == EpcConstructionAgeBand.from_1976_to_1982:
|
|
return EpcWallDescriptions.solid_brick_partial_insulated_assumed
|
|
|
|
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
|
|
return EpcWallDescriptions.solid_brick_insulated_assumed
|
|
|
|
raise NotImplementedError(
|
|
f"Age band {age_band.value} not handled for solid wall insulation mapping"
|
|
)
|
|
|
|
|
|
def map_timber_frame_wall_insulation(age_band: EpcConstructionAgeBand):
|
|
if age_band.start_year() < 1950:
|
|
return EpcWallDescriptions.timber_frame_no_insulation_assumed
|
|
|
|
if age_band.start_year() < 1976:
|
|
return EpcWallDescriptions.timber_frame_partial_insulated_assumed
|
|
|
|
if age_band in EpcConstructionAgeBand.from_year_onwards(1976):
|
|
return EpcWallDescriptions.timber_frame_insulated_assumed
|
|
|
|
raise NotImplementedError(
|
|
f"Age band {age_band.value} not handled for timber frame wall insulation mapping"
|
|
)
|
|
|
|
|
|
def map_system_build_wall_insulation(age_band: EpcConstructionAgeBand):
|
|
if age_band.start_year() < 1976:
|
|
return EpcWallDescriptions.system_no_insulation_assumed
|
|
|
|
if age_band == EpcConstructionAgeBand.from_1976_to_1982:
|
|
return EpcWallDescriptions.system_partial_insulated_assumed
|
|
|
|
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
|
|
return EpcWallDescriptions.system_insulated_assumed
|
|
|
|
raise NotImplementedError(
|
|
f"Age band {age_band.value} not handled for system build wall insulation mapping"
|
|
)
|
|
|
|
|
|
def map_granite_wall_insulation(age_band: EpcConstructionAgeBand):
|
|
if age_band.start_year() < 1976:
|
|
return EpcWallDescriptions.granite_whinstone_no_insulation_assumed
|
|
|
|
if age_band == EpcConstructionAgeBand.from_1976_to_1982:
|
|
return EpcWallDescriptions.granite_whinstone_partial_insulated_assumed
|
|
|
|
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
|
|
return EpcWallDescriptions.granite_whinestone_insulated_assumed
|
|
|
|
raise NotImplementedError(
|
|
f"Age band {age_band.value} not handled for granite wall insulation mapping"
|
|
)
|
|
|
|
|
|
def map_sandstone_wall_insulation(age_band: EpcConstructionAgeBand):
|
|
if age_band.start_year() < 1976:
|
|
return EpcWallDescriptions.sandstone_limestone_no_insulation_assumed
|
|
|
|
if age_band == EpcConstructionAgeBand.from_1976_to_1982:
|
|
return EpcWallDescriptions.sandstone_limestone_partial_insulated_assumed
|
|
|
|
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
|
|
return EpcWallDescriptions.sandstone_limestone_insulated_assumed
|
|
|
|
raise NotImplementedError(
|
|
f"Age band {age_band.value} not handled for sandstone wall insulation mapping"
|
|
)
|
|
|
|
|
|
def map_cob_wall_insulation(age_band: EpcConstructionAgeBand):
|
|
if age_band.start_year() < 1983:
|
|
return EpcWallDescriptions.cob_as_built_average
|
|
|
|
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
|
|
return EpcWallDescriptions.cob_as_built_good
|
|
|
|
raise NotImplementedError(
|
|
f"Age band {age_band.value} not handled for cob wall insulation mapping"
|
|
)
|
|
|
|
|
|
AS_BUILT_WALL_CLASSIFIERS = {
|
|
"Cavity": map_cavity_wall_insulation,
|
|
"Solid Brick": map_solid_wall_insulation,
|
|
"Timber Frame": map_timber_frame_wall_insulation,
|
|
"System": map_system_build_wall_insulation,
|
|
"Granite": map_granite_wall_insulation,
|
|
"Sandstone": map_sandstone_wall_insulation,
|
|
"Cob": map_cob_wall_insulation,
|
|
}
|
|
|
|
UNKNOWN_AGE_FALLBACK = {
|
|
"Cavity": EpcWallDescriptions.cavity_as_built_unknown,
|
|
"Solid Brick": EpcWallDescriptions.solid_brick_as_built_unknown,
|
|
"Timber Frame": EpcWallDescriptions.timber_frame_as_built_unknown,
|
|
"System": EpcWallDescriptions.system_as_built_unknown,
|
|
"Granite": EpcWallDescriptions.granite_as_built_unknown,
|
|
"Sandstone": EpcWallDescriptions.sandstone_as_built_unknown,
|
|
"Cob": EpcWallDescriptions.cob_as_built_unknown,
|
|
}
|
|
|
|
data["landlord_wall_description"] = (
|
|
data[["Wall Construction", "Wall Insulation"]]
|
|
.apply(tuple, axis=1)
|
|
.map(wall_mapping)
|
|
)
|
|
|
|
|
|
def fill_as_built(row):
|
|
# Already resolved via direct mapping
|
|
if row.landlord_wall_description is not None:
|
|
return row.landlord_wall_description
|
|
|
|
wall_type = row["Wall Construction"]
|
|
|
|
# Missing construction age → conservative fallback
|
|
if pd.isnull(row.construction_age_band):
|
|
return UNKNOWN_AGE_FALLBACK.get(wall_type)
|
|
|
|
classifier = AS_BUILT_WALL_CLASSIFIERS.get(wall_type)
|
|
if classifier is None:
|
|
return None
|
|
|
|
return classifier(row.construction_age_band)
|
|
|
|
|
|
data["landlord_wall_description"] = data.progress_apply(fill_as_built, axis=1)
|
|
|
|
assert data["landlord_wall_description"].isnull().sum() == 0, (
|
|
"Some wall descriptions could not be resolved"
|
|
)
|
|
|
|
# Variables we want to map
|
|
# 'Org Ref', 'Address 1', 'Address 2', 'Address 3', 'Postcode', 'Type',
|
|
# 'Attachment', 'Construction Years', 'Wall Construction',
|
|
# 'Wall Insulation', 'Roof Construction', 'Roof Insulation',
|
|
# 'Floor Construction', 'Floor Insulation', 'Glazing', 'Heating',
|
|
# 'Boiler Efficiency', 'Main Fuel', 'Controls Adequacy', 'UPRN',
|
|
# 'Total Floor Area (m2)'
|