diff --git a/backend/onboarders/mappings/age_band.py b/backend/onboarders/mappings/age_band.py index 5106a3fe..e49fede8 100644 --- a/backend/onboarders/mappings/age_band.py +++ b/backend/onboarders/mappings/age_band.py @@ -1,19 +1,19 @@ from backend.onboarders.epc_descriptions import EpcConstructionAgeBand parity_map = { - "Before 1900": EpcConstructionAgeBand.before_1900.value, - "1900-1929": EpcConstructionAgeBand.from_1900_to_1929.value, - "1930-1949": EpcConstructionAgeBand.from_1930_to_1949.value, - "1950-1966": EpcConstructionAgeBand.from_1950_to_1966.value, - "1967-1975": EpcConstructionAgeBand.from_1967_to_1975.value, - "1976-1982": EpcConstructionAgeBand.from_1976_to_1982.value, - "1983-1990": EpcConstructionAgeBand.from_1983_to_1990.value, - "1991-1995": EpcConstructionAgeBand.from_1991_to_1995.value, - "1996-2002": EpcConstructionAgeBand.from_1996_to_2002.value, - "2003-2006": EpcConstructionAgeBand.from_2003_to_2006.value, - "2007-2011": EpcConstructionAgeBand.from_2007_to_2011.value, - "2012 onwards": EpcConstructionAgeBand.from_2012_onwards.value, + "Before 1900": EpcConstructionAgeBand.before_1900, + "1900-1929": EpcConstructionAgeBand.from_1900_to_1929, + "1930-1949": EpcConstructionAgeBand.from_1930_to_1949, + "1950-1966": EpcConstructionAgeBand.from_1950_to_1966, + "1967-1975": EpcConstructionAgeBand.from_1967_to_1975, + "1976-1982": EpcConstructionAgeBand.from_1976_to_1982, + "1983-1990": EpcConstructionAgeBand.from_1983_to_1990, + "1991-1995": EpcConstructionAgeBand.from_1991_to_1995, + "1996-2002": EpcConstructionAgeBand.from_1996_to_2002, + "2003-2006": EpcConstructionAgeBand.from_2003_to_2006, + "2007-2011": EpcConstructionAgeBand.from_2007_to_2011, + "2012 onwards": EpcConstructionAgeBand.from_2012_onwards, # Newer age bands, under SAP10 - "2012-2022": EpcConstructionAgeBand.from_2012_to_2022.value, - "2023 onwards": EpcConstructionAgeBand.from_2023_onwards.value, + "2012-2022": EpcConstructionAgeBand.from_2012_to_2022, + "2023 onwards": EpcConstructionAgeBand.from_2023_onwards, } diff --git a/backend/onboarders/parity.py b/backend/onboarders/parity.py index ca84fbec..d3ee8485 100644 --- a/backend/onboarders/parity.py +++ b/backend/onboarders/parity.py @@ -1,9 +1,11 @@ +from tqdm import tqdm import pandas as pd -from etl.epc.DataProcessor import construction_age_bounds_map from backend.onboarders.mappings.property_type import parity_map as property_map from backend.onboarders.mappings.age_band import parity_map as age_band_map from backend.onboarders.mappings.built_form import parity_map as built_form_map -from onboarders.epc_descriptions import EpcWallDescriptions, EpcConstructionAgeBand +from backend.onboarders.epc_descriptions import EpcWallDescriptions, EpcConstructionAgeBand + +tqdm.pandas() def check_nulls(data, original_column, mapped_column): @@ -51,41 +53,41 @@ assert pd.isnull(data["built_form"]).sum() == 0, "Some built forms were not mapp # Unique combindations wall_mapping = { # Cavity walls - ('Cavity', 'FilledCavity'): EpcWallDescriptions.cavity_filled_cavity.value, - ('Cavity', 'Internal'): EpcWallDescriptions.cavity_internal_insulation.value, - ('Cavity', 'External'): EpcWallDescriptions.cavity_external_insulation.value, - ('Cavity', 'FilledCavityPlusInternal'): EpcWallDescriptions.cavity_filled_plus_internal.value, - ('Cavity', 'FilledCavityPlusExternal'): EpcWallDescriptions.cavity_filled_plus_external.value, + ('Cavity', 'FilledCavity'): EpcWallDescriptions.cavity_filled_cavity, + ('Cavity', 'Internal'): EpcWallDescriptions.cavity_internal_insulation, + ('Cavity', 'External'): EpcWallDescriptions.cavity_external_insulation, + ('Cavity', 'FilledCavityPlusInternal'): EpcWallDescriptions.cavity_filled_plus_internal, + ('Cavity', 'FilledCavityPlusExternal'): EpcWallDescriptions.cavity_filled_plus_external, ('Cavity', 'AsBuilt'): None, # To be classified ('Cavity', 'Unknown'): None, # To be classified # System built walls - ('System', 'External'): EpcWallDescriptions.system_external_insulation.value, - ('System', 'Internal'): EpcWallDescriptions.system_internal_insulation.value, + ('System', 'External'): EpcWallDescriptions.system_external_insulation, + ('System', 'Internal'): EpcWallDescriptions.system_internal_insulation, ('System', 'AsBuilt'): None, # To be classified ('System', 'Unknown'): None, # Timber Frame walls - ('Timber Frame', 'Internal'): EpcWallDescriptions.timber_frame_internal_insulation.value, - ('Timber Frame', 'External'): EpcWallDescriptions.timber_frame_external_insulation.value, + ('Timber Frame', 'Internal'): EpcWallDescriptions.timber_frame_internal_insulation, + ('Timber Frame', 'External'): EpcWallDescriptions.timber_frame_external_insulation, ('Timber Frame', 'AsBuilt'): None, # To be classified ('Timber Frame', 'Unknown'): None, # Solid Brick walls - ('Solid Brick', 'External'): EpcWallDescriptions.solid_brick_external_insulation.value, - ('Solid Brick', 'Internal'): EpcWallDescriptions.solid_brick_internal_insulation.value, + ('Solid Brick', 'External'): EpcWallDescriptions.solid_brick_external_insulation, + ('Solid Brick', 'Internal'): EpcWallDescriptions.solid_brick_internal_insulation, ('Solid Brick', 'AsBuilt'): None, # To be classified ('Solid Brick', 'Unknown'): None, # Granite walls - ('Granite', 'External'): EpcWallDescriptions.granite_whinstone_external_insulation.value, - ("Granite", 'Internal'): EpcWallDescriptions.granite_whinstone_internal_insulation.value, + ('Granite', 'External'): EpcWallDescriptions.granite_whinstone_external_insulation, + ("Granite", 'Internal'): EpcWallDescriptions.granite_whinstone_internal_insulation, ('Granite', 'AsBuilt'): None, ('Granite', 'Unknown'): None, # Sandstone walls - ('Sandstone', 'Internal'): EpcWallDescriptions.sandstone_limestone_internal_insulation.value, - ('Sandstone', 'External'): EpcWallDescriptions.sandstone_limestone_external_insulation.value, + ('Sandstone', 'Internal'): EpcWallDescriptions.sandstone_limestone_internal_insulation, + ('Sandstone', 'External'): EpcWallDescriptions.sandstone_limestone_external_insulation, ('Sandstone', 'Unknown'): None, ('Sandstone', 'AsBuilt'): None, @@ -204,6 +206,16 @@ AS_BUILT_WALL_CLASSIFIERS = { "Cob": map_cob_wall_insulation, } +UNKNOWN_AGE_FALLBACK = { + "Cavity": EpcWallDescriptions.cavity_no_insulation_assumed, + "Solid Brick": EpcWallDescriptions.solid_brick_no_insulation_assumed, + "Timber Frame": EpcWallDescriptions.timber_frame_no_insulation_assumed, + "System": EpcWallDescriptions.system_no_insulation_assumed, + "Granite": EpcWallDescriptions.granite_whinstone_no_insulation_assumed, + "Sandstone": EpcWallDescriptions.sandstone_limestone_no_insulation_assumed, + "Cob": EpcWallDescriptions.cob_as_built_average, +} + data["landlord_wall_description"] = ( data[["Wall Construction", "Wall Insulation"]] .apply(tuple, axis=1) @@ -212,21 +224,28 @@ data["landlord_wall_description"] = ( def fill_as_built(row): + # Already resolved via direct mapping if row.landlord_wall_description is not None: return row.landlord_wall_description - classifier = AS_BUILT_WALL_CLASSIFIERS.get(row["Wall Construction"]) + wall_type = row["Wall Construction"] + + # Missing construction age → conservative fallback + if pd.isnull(row.construction_age_band): + return UNKNOWN_AGE_FALLBACK.get(wall_type) + + classifier = AS_BUILT_WALL_CLASSIFIERS.get(wall_type) if classifier is None: return None return classifier(row.construction_age_band) -data["landlord_wall_description"] = data.apply(fill_as_built, axis=1) +data["landlord_wall_description"] = data.progress_apply(fill_as_built, axis=1) -for _, x in data.iterrows(): - if x["construction_age_band"] == "England and Wales: 2012-2021": - de +assert data["landlord_wall_description"].isnull().sum() == 0, ( + "Some wall descriptions could not be resolved" +) # Variables we want to map # 'Org Ref', 'Address 1', 'Address 2', 'Address 3', 'Postcode', 'Type',