add fallback to rows without an age band

This commit is contained in:
Khalim Conn-Kowlessar 2026-01-29 19:06:27 +00:00
parent 28e9f37739
commit 0524d7ab9c
2 changed files with 55 additions and 36 deletions

View file

@ -1,19 +1,19 @@
from backend.onboarders.epc_descriptions import EpcConstructionAgeBand
parity_map = {
"Before 1900": EpcConstructionAgeBand.before_1900.value,
"1900-1929": EpcConstructionAgeBand.from_1900_to_1929.value,
"1930-1949": EpcConstructionAgeBand.from_1930_to_1949.value,
"1950-1966": EpcConstructionAgeBand.from_1950_to_1966.value,
"1967-1975": EpcConstructionAgeBand.from_1967_to_1975.value,
"1976-1982": EpcConstructionAgeBand.from_1976_to_1982.value,
"1983-1990": EpcConstructionAgeBand.from_1983_to_1990.value,
"1991-1995": EpcConstructionAgeBand.from_1991_to_1995.value,
"1996-2002": EpcConstructionAgeBand.from_1996_to_2002.value,
"2003-2006": EpcConstructionAgeBand.from_2003_to_2006.value,
"2007-2011": EpcConstructionAgeBand.from_2007_to_2011.value,
"2012 onwards": EpcConstructionAgeBand.from_2012_onwards.value,
"Before 1900": EpcConstructionAgeBand.before_1900,
"1900-1929": EpcConstructionAgeBand.from_1900_to_1929,
"1930-1949": EpcConstructionAgeBand.from_1930_to_1949,
"1950-1966": EpcConstructionAgeBand.from_1950_to_1966,
"1967-1975": EpcConstructionAgeBand.from_1967_to_1975,
"1976-1982": EpcConstructionAgeBand.from_1976_to_1982,
"1983-1990": EpcConstructionAgeBand.from_1983_to_1990,
"1991-1995": EpcConstructionAgeBand.from_1991_to_1995,
"1996-2002": EpcConstructionAgeBand.from_1996_to_2002,
"2003-2006": EpcConstructionAgeBand.from_2003_to_2006,
"2007-2011": EpcConstructionAgeBand.from_2007_to_2011,
"2012 onwards": EpcConstructionAgeBand.from_2012_onwards,
# Newer age bands, under SAP10
"2012-2022": EpcConstructionAgeBand.from_2012_to_2022.value,
"2023 onwards": EpcConstructionAgeBand.from_2023_onwards.value,
"2012-2022": EpcConstructionAgeBand.from_2012_to_2022,
"2023 onwards": EpcConstructionAgeBand.from_2023_onwards,
}

View file

@ -1,9 +1,11 @@
from tqdm import tqdm
import pandas as pd
from etl.epc.DataProcessor import construction_age_bounds_map
from backend.onboarders.mappings.property_type import parity_map as property_map
from backend.onboarders.mappings.age_band import parity_map as age_band_map
from backend.onboarders.mappings.built_form import parity_map as built_form_map
from onboarders.epc_descriptions import EpcWallDescriptions, EpcConstructionAgeBand
from backend.onboarders.epc_descriptions import EpcWallDescriptions, EpcConstructionAgeBand
tqdm.pandas()
def check_nulls(data, original_column, mapped_column):
@ -51,41 +53,41 @@ assert pd.isnull(data["built_form"]).sum() == 0, "Some built forms were not mapp
# Unique combindations
wall_mapping = {
# Cavity walls
('Cavity', 'FilledCavity'): EpcWallDescriptions.cavity_filled_cavity.value,
('Cavity', 'Internal'): EpcWallDescriptions.cavity_internal_insulation.value,
('Cavity', 'External'): EpcWallDescriptions.cavity_external_insulation.value,
('Cavity', 'FilledCavityPlusInternal'): EpcWallDescriptions.cavity_filled_plus_internal.value,
('Cavity', 'FilledCavityPlusExternal'): EpcWallDescriptions.cavity_filled_plus_external.value,
('Cavity', 'FilledCavity'): EpcWallDescriptions.cavity_filled_cavity,
('Cavity', 'Internal'): EpcWallDescriptions.cavity_internal_insulation,
('Cavity', 'External'): EpcWallDescriptions.cavity_external_insulation,
('Cavity', 'FilledCavityPlusInternal'): EpcWallDescriptions.cavity_filled_plus_internal,
('Cavity', 'FilledCavityPlusExternal'): EpcWallDescriptions.cavity_filled_plus_external,
('Cavity', 'AsBuilt'): None, # To be classified
('Cavity', 'Unknown'): None, # To be classified
# System built walls
('System', 'External'): EpcWallDescriptions.system_external_insulation.value,
('System', 'Internal'): EpcWallDescriptions.system_internal_insulation.value,
('System', 'External'): EpcWallDescriptions.system_external_insulation,
('System', 'Internal'): EpcWallDescriptions.system_internal_insulation,
('System', 'AsBuilt'): None, # To be classified
('System', 'Unknown'): None,
# Timber Frame walls
('Timber Frame', 'Internal'): EpcWallDescriptions.timber_frame_internal_insulation.value,
('Timber Frame', 'External'): EpcWallDescriptions.timber_frame_external_insulation.value,
('Timber Frame', 'Internal'): EpcWallDescriptions.timber_frame_internal_insulation,
('Timber Frame', 'External'): EpcWallDescriptions.timber_frame_external_insulation,
('Timber Frame', 'AsBuilt'): None, # To be classified
('Timber Frame', 'Unknown'): None,
# Solid Brick walls
('Solid Brick', 'External'): EpcWallDescriptions.solid_brick_external_insulation.value,
('Solid Brick', 'Internal'): EpcWallDescriptions.solid_brick_internal_insulation.value,
('Solid Brick', 'External'): EpcWallDescriptions.solid_brick_external_insulation,
('Solid Brick', 'Internal'): EpcWallDescriptions.solid_brick_internal_insulation,
('Solid Brick', 'AsBuilt'): None, # To be classified
('Solid Brick', 'Unknown'): None,
# Granite walls
('Granite', 'External'): EpcWallDescriptions.granite_whinstone_external_insulation.value,
("Granite", 'Internal'): EpcWallDescriptions.granite_whinstone_internal_insulation.value,
('Granite', 'External'): EpcWallDescriptions.granite_whinstone_external_insulation,
("Granite", 'Internal'): EpcWallDescriptions.granite_whinstone_internal_insulation,
('Granite', 'AsBuilt'): None,
('Granite', 'Unknown'): None,
# Sandstone walls
('Sandstone', 'Internal'): EpcWallDescriptions.sandstone_limestone_internal_insulation.value,
('Sandstone', 'External'): EpcWallDescriptions.sandstone_limestone_external_insulation.value,
('Sandstone', 'Internal'): EpcWallDescriptions.sandstone_limestone_internal_insulation,
('Sandstone', 'External'): EpcWallDescriptions.sandstone_limestone_external_insulation,
('Sandstone', 'Unknown'): None,
('Sandstone', 'AsBuilt'): None,
@ -204,6 +206,16 @@ AS_BUILT_WALL_CLASSIFIERS = {
"Cob": map_cob_wall_insulation,
}
UNKNOWN_AGE_FALLBACK = {
"Cavity": EpcWallDescriptions.cavity_no_insulation_assumed,
"Solid Brick": EpcWallDescriptions.solid_brick_no_insulation_assumed,
"Timber Frame": EpcWallDescriptions.timber_frame_no_insulation_assumed,
"System": EpcWallDescriptions.system_no_insulation_assumed,
"Granite": EpcWallDescriptions.granite_whinstone_no_insulation_assumed,
"Sandstone": EpcWallDescriptions.sandstone_limestone_no_insulation_assumed,
"Cob": EpcWallDescriptions.cob_as_built_average,
}
data["landlord_wall_description"] = (
data[["Wall Construction", "Wall Insulation"]]
.apply(tuple, axis=1)
@ -212,21 +224,28 @@ data["landlord_wall_description"] = (
def fill_as_built(row):
# Already resolved via direct mapping
if row.landlord_wall_description is not None:
return row.landlord_wall_description
classifier = AS_BUILT_WALL_CLASSIFIERS.get(row["Wall Construction"])
wall_type = row["Wall Construction"]
# Missing construction age → conservative fallback
if pd.isnull(row.construction_age_band):
return UNKNOWN_AGE_FALLBACK.get(wall_type)
classifier = AS_BUILT_WALL_CLASSIFIERS.get(wall_type)
if classifier is None:
return None
return classifier(row.construction_age_band)
data["landlord_wall_description"] = data.apply(fill_as_built, axis=1)
data["landlord_wall_description"] = data.progress_apply(fill_as_built, axis=1)
for _, x in data.iterrows():
if x["construction_age_band"] == "England and Wales: 2012-2021":
de
assert data["landlord_wall_description"].isnull().sum() == 0, (
"Some wall descriptions could not be resolved"
)
# Variables we want to map
# 'Org Ref', 'Address 1', 'Address 2', 'Address 3', 'Postcode', 'Type',