diff --git a/backend/onboarders/mappings/as_built_floor_classifiers.py b/backend/onboarders/mappings/as_built_floor_classifiers.py new file mode 100644 index 00000000..9f14fa7d --- /dev/null +++ b/backend/onboarders/mappings/as_built_floor_classifiers.py @@ -0,0 +1,46 @@ +from backend.onboarders.epc_descriptions import EpcConstructionAgeBand, EpcFloorDescriptions + + +def unknown_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions: + year = age_band.start_year() + + if year >= 2003: + return EpcFloorDescriptions.solid_insulated_assumed + + if year >= 1996: + return EpcFloorDescriptions.solid_limited_insulation_assumed + + if year >= 1930: + return EpcFloorDescriptions.solid_no_insulation_assumed + + return EpcFloorDescriptions.suspended_no_insulation_assumed + + +def unknown_floor_retrofitted(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions: + year = age_band.start_year() + + if year >= 1930: + return EpcFloorDescriptions.solid_insulated + + return EpcFloorDescriptions.suspended_insulated + + +def solid_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions: + year = age_band.start_year() + + if year >= 2003: + return EpcFloorDescriptions.solid_insulated_assumed + if year >= 1996: + return EpcFloorDescriptions.solid_limited_insulation_assumed + return EpcFloorDescriptions.solid_no_insulation_assumed + + +def suspended_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions: + year = age_band.start_year() + + if year >= 2003: + return EpcFloorDescriptions.suspended_insulated_assumed + if year >= 1996: + return EpcFloorDescriptions.suspended_limited_insulation_assumed + + return EpcFloorDescriptions.suspended_no_insulation_assumed diff --git a/backend/onboarders/parity.py b/backend/onboarders/parity.py index 67e65115..dfba0443 100644 --- a/backend/onboarders/parity.py +++ b/backend/onboarders/parity.py @@ -9,6 +9,8 @@ from backend.onboarders.epc_descriptions import EpcWallDescriptions, EpcConstruc WALL_DESCRIPTION_EFFICIENCIES, EpcRoofDescriptions, resolve_roof_efficiency, EpcFloorDescriptions from backend.onboarders.mappings.as_built_wall_classifiers import AS_BUILT_WALL_CLASSIFIERS from backend.onboarders.mappings.as_built_roof_classifiers import AS_BUILT_ROOF_CLASSIFIERS +from backend.onboarders.mappings.as_built_floor_classifiers import unknown_floor_as_built, unknown_floor_retrofitted, \ + solid_floor_as_built, suspended_floor_as_built tqdm.pandas() @@ -379,63 +381,6 @@ floor_mapping = { ('Unknown', 'AsBuilt'): None, # Mapped unknown_floor_as_built } - -# Unknown floor, as built -# Before 1900, 1900 - 1929 -> Suspended, no insulation (assumed) -# 1930-1949, 1950 - 1966, 1967 - 1975, 1976-1982, 1983-1990, 1991-1995, -> Solid, no insulation (assumed) -# 1996 - 2002, Solid, limited insulation (assumed) -# 2003 onwards -> Solid, insulated (assumed) - -def unknown_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions: - year = age_band.start_year() - - if year >= 2003: - return EpcFloorDescriptions.solid_insulated_assumed - - if year >= 1930: - return EpcFloorDescriptions.solid_no_insulation_assumed - - return EpcFloorDescriptions.suspended_no_insulation_assumed - - -# before 1900, 1900-1929 -> Suspended, insulated -# Thereafter, 1930 onwards -> Solid, insulated -def unknown_floor_retrofitted(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions: - year = age_band.start_year() - - if year >= 1930: - return EpcFloorDescriptions.solid_insulated - - return EpcFloorDescriptions.suspended_insulated - - -# 2003 - 2006, 2023 onwards -> Solid, insulated (assumed) -# 1996 - 2022 -> Solid, limited insulation (assumed) -# 1983 - 1990, 1991 - 1995 -> Solid, no insulation (assumed) -def solid_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions: - year = age_band.start_year() - - if year >= 2003: - return EpcFloorDescriptions.solid_insulated_assumed - if year >= 1996: - return EpcFloorDescriptions.solid_limited_insulation_assumed - return EpcFloorDescriptions.solid_no_insulation_assumed - - -# 2003 -> 2006 -> Suspended, insulated (assumed) -# 1996 - 2022 -> Suspended, limited insulation (assumed) -# 1983 - 1995 -> Suspended, no insulation (assumed) -def suspended_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions: - year = age_band.start_year() - - if year >= 2003: - return EpcFloorDescriptions.suspended_insulated_assumed - if year >= 1996: - return EpcFloorDescriptions.suspended_limited_insulation_assumed - - return EpcFloorDescriptions.suspended_no_insulation_assumed - - data["landlord_floor_description"] = ( data[["Floor Construction", "Floor Insulation"]] .progress_apply(tuple, axis=1) @@ -478,6 +423,13 @@ data["landlord_floor_description"] = data.progress_apply( axis=1, ) +# All values should be remapped now +assert data["landlord_floor_description"].isnull().sum() == 0, ( + "Some floor descriptions could not be resolved" +) + +# TODO: Convert everything to values + # Variables we want to map # 'Org Ref', 'Address 1', 'Address 2', 'Address 3', 'Postcode', # 'Floor Construction', 'Floor Insulation', 'Glazing', 'Heating', diff --git a/backend/onboarders/tests/test_floor_remapping.py b/backend/onboarders/tests/test_floor_remapping.py new file mode 100644 index 00000000..26a05d22 --- /dev/null +++ b/backend/onboarders/tests/test_floor_remapping.py @@ -0,0 +1,98 @@ +import pytest + +from backend.onboarders.epc_descriptions import ( + EpcConstructionAgeBand, + EpcFloorDescriptions, +) +from backend.onboarders.mappings.as_built_floor_classifiers import ( + unknown_floor_as_built, + unknown_floor_retrofitted, + solid_floor_as_built, + suspended_floor_as_built, +) + + +@pytest.mark.parametrize( + "age_band,expected", + [ + # Before 1900 / 1900–1929 → suspended, no insulation + (EpcConstructionAgeBand.before_1900, EpcFloorDescriptions.suspended_no_insulation_assumed), + (EpcConstructionAgeBand.from_1900_to_1929, EpcFloorDescriptions.suspended_no_insulation_assumed), + + # 1930–1995 → solid, no insulation + (EpcConstructionAgeBand.from_1930_to_1949, EpcFloorDescriptions.solid_no_insulation_assumed), + (EpcConstructionAgeBand.from_1950_to_1966, EpcFloorDescriptions.solid_no_insulation_assumed), + (EpcConstructionAgeBand.from_1967_to_1975, EpcFloorDescriptions.solid_no_insulation_assumed), + (EpcConstructionAgeBand.from_1976_to_1982, EpcFloorDescriptions.solid_no_insulation_assumed), + (EpcConstructionAgeBand.from_1983_to_1990, EpcFloorDescriptions.solid_no_insulation_assumed), + (EpcConstructionAgeBand.from_1991_to_1995, EpcFloorDescriptions.solid_no_insulation_assumed), + + # 1996–2002 → solid, limited insulation + (EpcConstructionAgeBand.from_1996_to_2002, EpcFloorDescriptions.solid_limited_insulation_assumed), + + # 2003+ → solid, insulated + (EpcConstructionAgeBand.from_2003_to_2006, EpcFloorDescriptions.solid_insulated_assumed), + (EpcConstructionAgeBand.from_2012_to_2022, EpcFloorDescriptions.solid_insulated_assumed), + (EpcConstructionAgeBand.from_2023_onwards, EpcFloorDescriptions.solid_insulated_assumed), + ], +) +def test_unknown_floor_as_built(age_band, expected): + assert unknown_floor_as_built(age_band) == expected + + +@pytest.mark.parametrize( + "age_band,expected", + [ + # Pre-1930 → suspended, insulated + (EpcConstructionAgeBand.before_1900, EpcFloorDescriptions.suspended_insulated), + (EpcConstructionAgeBand.from_1900_to_1929, EpcFloorDescriptions.suspended_insulated), + + # 1930+ → solid, insulated + (EpcConstructionAgeBand.from_1930_to_1949, EpcFloorDescriptions.solid_insulated), + (EpcConstructionAgeBand.from_1950_to_1966, EpcFloorDescriptions.solid_insulated), + (EpcConstructionAgeBand.from_1976_to_1982, EpcFloorDescriptions.solid_insulated), + (EpcConstructionAgeBand.from_2023_onwards, EpcFloorDescriptions.solid_insulated), + ], +) +def test_unknown_floor_retrofitted(age_band, expected): + assert unknown_floor_retrofitted(age_band) == expected + + +@pytest.mark.parametrize( + "age_band,expected", + [ + # 1983–1995 → no insulation + (EpcConstructionAgeBand.from_1983_to_1990, EpcFloorDescriptions.solid_no_insulation_assumed), + (EpcConstructionAgeBand.from_1991_to_1995, EpcFloorDescriptions.solid_no_insulation_assumed), + + # 1996–2002 → limited insulation + (EpcConstructionAgeBand.from_1996_to_2002, EpcFloorDescriptions.solid_limited_insulation_assumed), + + # 2003+ → insulated + (EpcConstructionAgeBand.from_2003_to_2006, EpcFloorDescriptions.solid_insulated_assumed), + (EpcConstructionAgeBand.from_2012_to_2022, EpcFloorDescriptions.solid_insulated_assumed), + (EpcConstructionAgeBand.from_2023_onwards, EpcFloorDescriptions.solid_insulated_assumed), + ], +) +def test_solid_floor_as_built(age_band, expected): + assert solid_floor_as_built(age_band) == expected + + +@pytest.mark.parametrize( + "age_band,expected", + [ + # 1983–1995 → no insulation + (EpcConstructionAgeBand.from_1983_to_1990, EpcFloorDescriptions.suspended_no_insulation_assumed), + (EpcConstructionAgeBand.from_1991_to_1995, EpcFloorDescriptions.suspended_no_insulation_assumed), + + # 1996–2002 → limited insulation + (EpcConstructionAgeBand.from_1996_to_2002, EpcFloorDescriptions.suspended_limited_insulation_assumed), + + # 2003+ → insulated + (EpcConstructionAgeBand.from_2003_to_2006, EpcFloorDescriptions.suspended_insulated_assumed), + (EpcConstructionAgeBand.from_2012_to_2022, EpcFloorDescriptions.suspended_insulated_assumed), + (EpcConstructionAgeBand.from_2023_onwards, EpcFloorDescriptions.suspended_insulated_assumed), + ], +) +def test_suspended_floor_as_built(age_band, expected): + assert suspended_floor_as_built(age_band) == expected