Model/backend/onboarders/parity.py

718 lines
32 KiB
Python

import re
from numpy import nan
from tqdm import tqdm
import pandas as pd
from backend.onboarders.mappings.property_type import parity_map as property_map
from backend.onboarders.mappings.age_band import parity_map as age_band_map
from backend.onboarders.mappings.built_form import parity_map as built_form_map
from backend.onboarders.epc_descriptions import EpcWallDescriptions, EpcConstructionAgeBand, EpcEfficiency, \
WALL_DESCRIPTION_EFFICIENCIES, EpcRoofDescriptions, resolve_roof_efficiency, EpcFloorDescriptions
from backend.onboarders.epc.placeholder import EpcFuel, EpcHeatingControls, EpcHeatingSystems
from backend.onboarders.mappings.as_built_wall_classifiers import AS_BUILT_WALL_CLASSIFIERS
from backend.onboarders.mappings.as_built_roof_classifiers import AS_BUILT_ROOF_CLASSIFIERS
from backend.onboarders.mappings.as_built_floor_classifiers import unknown_floor_as_built, unknown_floor_retrofitted, \
solid_floor_as_built, suspended_floor_as_built
tqdm.pandas()
def check_nulls(data, original_column, mapped_column):
# We only allow nulls if the oroginal value was null
null_vals = data[pd.isnull(data[mapped_column])]
if null_vals.empty:
return True
# We make sure all original values were null
assert pd.isnull(null_vals[original_column]).all(), (
f"Some values in {mapped_column} were not mapped, but original values were not null"
)
# Sample input data
data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
"- Data Extracts for Domna.xlsx",
sheet_name="Sustainability"
)
# We want to map the parity fields to standard EPC references. This will allow us to
# 1) Estimate EPCs, more accurately
# 2) Patch incorrect EPCs with ease
# 3) Indicate already installed measures
# ------------ construction_age_band ------------
data["construction_age_band"] = data["Construction Years"].map(age_band_map)
check_nulls(data, "Construction Years", "construction_age_band")
# ------------ property_type ------------
data["property_type"] = data["Type"].map(property_map)
assert pd.isnull(data["property_type"]).sum() == 0, "Some property types were not mapped"
# ------------ built_form ------------
data["built_form"] = data["Attachment"].map(built_form_map)
assert pd.isnull(data["built_form"]).sum() == 0, "Some built forms were not mapped"
# ------------ Wall Construction ------------
# Unique combindations
wall_mapping = {
# Cavity walls
('Cavity', 'FilledCavity'): EpcWallDescriptions.cavity_filled_cavity,
('Cavity', 'Internal'): EpcWallDescriptions.cavity_internal_insulation,
('Cavity', 'External'): EpcWallDescriptions.cavity_external_insulation,
('Cavity', 'FilledCavityPlusInternal'): EpcWallDescriptions.cavity_filled_plus_internal,
('Cavity', 'FilledCavityPlusExternal'): EpcWallDescriptions.cavity_filled_plus_external,
('Cavity', 'AsBuilt'): None, # To be classified
('Cavity', 'Unknown'): None, # To be classified
# System built walls
('System', 'External'): EpcWallDescriptions.system_external_insulation,
('System', 'Internal'): EpcWallDescriptions.system_internal_insulation,
('System', 'AsBuilt'): None, # To be classified
('System', 'Unknown'): None,
# Timber Frame walls
('Timber Frame', 'Internal'): EpcWallDescriptions.timber_frame_internal_insulation,
('Timber Frame', 'External'): EpcWallDescriptions.timber_frame_external_insulation,
('Timber Frame', 'AsBuilt'): None, # To be classified
('Timber Frame', 'Unknown'): None,
# Solid Brick walls
('Solid Brick', 'External'): EpcWallDescriptions.solid_brick_external_insulation,
('Solid Brick', 'Internal'): EpcWallDescriptions.solid_brick_internal_insulation,
('Solid Brick', 'AsBuilt'): None, # To be classified
('Solid Brick', 'Unknown'): None,
# Granite walls
('Granite', 'External'): EpcWallDescriptions.granite_whinstone_external_insulation,
("Granite", 'Internal'): EpcWallDescriptions.granite_whinstone_internal_insulation,
('Granite', 'AsBuilt'): None,
('Granite', 'Unknown'): None,
# Sandstone walls
('Sandstone', 'Internal'): EpcWallDescriptions.sandstone_limestone_internal_insulation,
('Sandstone', 'External'): EpcWallDescriptions.sandstone_limestone_external_insulation,
('Sandstone', 'Unknown'): None,
('Sandstone', 'AsBuilt'): None,
# Cob walls
('Cob', 'AsBuilt'): None,
}
WALL_UNKNOWN_AGE_FALLBACK = {
"Cavity": EpcWallDescriptions.cavity_as_built_unknown,
"Solid Brick": EpcWallDescriptions.solid_brick_as_built_unknown,
"Timber Frame": EpcWallDescriptions.timber_frame_as_built_unknown,
"System": EpcWallDescriptions.system_as_built_unknown,
"Granite": EpcWallDescriptions.granite_as_built_unknown,
"Sandstone": EpcWallDescriptions.sandstone_as_built_unknown,
"Cob": EpcWallDescriptions.cob_as_built_unknown,
}
data["landlord_wall_description"] = (
data[["Wall Construction", "Wall Insulation"]]
.apply(tuple, axis=1)
.map(wall_mapping)
)
def fill_as_built(row):
# Already resolved via direct mapping
if row.landlord_wall_description is not None:
return row.landlord_wall_description
wall_type = row["Wall Construction"]
# Missing construction age → conservative fallback
if pd.isnull(row.construction_age_band):
return WALL_UNKNOWN_AGE_FALLBACK.get(wall_type)
classifier = AS_BUILT_WALL_CLASSIFIERS.get(wall_type)
if classifier is None:
return None
return classifier(row.construction_age_band)
def resolve_wall_efficiency(
description: EpcWallDescriptions,
age_band: EpcConstructionAgeBand | None,
) -> EpcEfficiency:
# Unknown / holding descriptions → efficiency unknown
if "unknown insulation" in description.value.lower():
return EpcEfficiency.NA
rule = WALL_DESCRIPTION_EFFICIENCIES.get(description)
if rule is None:
return EpcEfficiency.NA
if isinstance(rule, EpcEfficiency):
return rule
# Rule needs age band but we don't have one
if age_band is None or pd.isnull(age_band):
return EpcEfficiency.NA
return rule(age_band)
data["landlord_wall_description"] = data.progress_apply(fill_as_built, axis=1)
assert data["landlord_wall_description"].isnull().sum() == 0, (
"Some wall descriptions could not be resolved"
)
data["landlord_wall_efficiency"] = data.progress_apply(
lambda row: resolve_wall_efficiency(
row.landlord_wall_description,
row.construction_age_band,
),
axis=1,
)
# Sanity check
assert data["landlord_wall_efficiency"].isnull().sum() == 0
# ------------ Roof Construction ------------
roof_mapping = {
# Dwelling above
('AnotherDwellingAbove', 'Another Dwelling Above'): EpcRoofDescriptions.another_dwelling_above,
('SameDwellingAbove', 'Same Dwelling Above'): EpcRoofDescriptions.another_dwelling_above,
# Pitched, normal loft access, with a loft thickness
('PitchedNormalLoftAccess', 'mm25'): EpcRoofDescriptions.loft_25mm_insulation,
('PitchedNormalLoftAccess', 'mm50'): EpcRoofDescriptions.loft_50mm_insulation,
('PitchedNormalLoftAccess', 'mm75'): EpcRoofDescriptions.loft_75mm_insulation,
('PitchedNormalLoftAccess', 'mm100'): EpcRoofDescriptions.loft_100mm_insulation,
('PitchedNormalLoftAccess', 'mm150'): EpcRoofDescriptions.loft_150mm_insulation,
('PitchedNormalLoftAccess', 'mm200'): EpcRoofDescriptions.loft_200mm_insulation,
('PitchedNormalLoftAccess', 'mm250'): EpcRoofDescriptions.loft_250mm_insulation,
('PitchedNormalLoftAccess', 'mm270'): EpcRoofDescriptions.loft_270mm_insulation,
('PitchedNormalLoftAccess', 'mm300'): EpcRoofDescriptions.loft_300mm_insulation,
('PitchedNormalLoftAccess', 'mm350'): EpcRoofDescriptions.loft_350mm_insulation,
('PitchedNormalLoftAccess', 'mm400'): EpcRoofDescriptions.loft_400mm_plus_insulation,
# Pitched, no loft access, with a loft thickness
('PitchedNormalNoLoftAccess', 'mm25'): EpcRoofDescriptions.loft_25mm_insulation,
('PitchedNormalNoLoftAccess', 'mm50'): EpcRoofDescriptions.loft_50mm_insulation,
('PitchedNormalNoLoftAccess', 'mm75'): EpcRoofDescriptions.loft_75mm_insulation,
('PitchedNormalNoLoftAccess', 'mm100'): EpcRoofDescriptions.loft_100mm_insulation,
('PitchedNormalNoLoftAccess', 'mm150'): EpcRoofDescriptions.loft_150mm_insulation,
('PitchedNormalNoLoftAccess', 'mm200'): EpcRoofDescriptions.loft_200mm_insulation,
('PitchedNormalNoLoftAccess', 'mm250'): EpcRoofDescriptions.loft_250mm_insulation,
('PitchedNormalNoLoftAccess', 'mm270'): EpcRoofDescriptions.loft_270mm_insulation,
('PitchedNormalNoLoftAccess', 'mm300'): EpcRoofDescriptions.loft_300mm_insulation,
('PitchedNormalNoLoftAccess', 'mm350'): EpcRoofDescriptions.loft_350mm_insulation,
('PitchedNormalNoLoftAccess', 'mm400'): EpcRoofDescriptions.loft_400mm_plus_insulation,
# All pitched options with asbuilt or unknown got to EpcRoofDescriptions.pitched_insulated_assumed
# With access
('PitchedNormalLoftAccess', nan): EpcRoofDescriptions.pitched_insulated_assumed,
('PitchedNormalLoftAccess', 'AsBuilt'): EpcRoofDescriptions.pitched_insulated_assumed,
('PitchedNormalLoftAccess', 'Unknown'): EpcRoofDescriptions.pitched_insulated_assumed,
# No access
('PitchedNormalNoLoftAccess', nan): EpcRoofDescriptions.pitched_insulated_assumed,
('PitchedNormalNoLoftAccess', 'AsBuilt'): EpcRoofDescriptions.pitched_insulated_assumed,
('PitchedNormalNoLoftAccess', 'Unknown'): EpcRoofDescriptions.pitched_insulated_assumed,
# Flat
('Flat', 'NoInsulation'): EpcRoofDescriptions.flat_no_insulation,
# Flat - limited insulation
('Flat', '12mm'): EpcRoofDescriptions.flat_limited_insulation,
('Flat', 'mm25'): EpcRoofDescriptions.flat_limited_insulation,
('Flat', 'mm50'): EpcRoofDescriptions.flat_limited_insulation,
# Flat insulated
('Flat', 'mm75'): EpcRoofDescriptions.flat_insulated,
('Flat', 'mm100'): EpcRoofDescriptions.flat_insulated,
('Flat', 'mm150'): EpcRoofDescriptions.flat_insulated,
('Flat', 'mm200'): EpcRoofDescriptions.flat_insulated,
('Flat', 'mm250'): EpcRoofDescriptions.flat_insulated,
('Flat', 'mm300'): EpcRoofDescriptions.flat_insulated,
('Flat', 'mm350'): EpcRoofDescriptions.flat_insulated,
('Flat', 'mm400'): EpcRoofDescriptions.flat_insulated,
# Flat - as built or unknown
('Flat', 'AsBuilt'): None, # To be classified
('Flat', nan): None, # To be classified
('Flat', 'Unknown'): None, # To be classified
# 12mm = very poor & has limited insulation description
# 25, 50 = poor & has limited insulation description
# 75, 100, 125mm = average (Flat, insulated)
# 150, 175, 200, 225, 250mm = good (Flat, insulated)
# 270mm+ = very good (Flat, insulated)
# Thatched
('PitchedThatched', 'mm50'): EpcRoofDescriptions.thatched_with_additional_insulation,
('PitchedThatched', 'mm150'): EpcRoofDescriptions.thatched_with_additional_insulation,
('PitchedThatched', 'mm300'): EpcRoofDescriptions.thatched_with_additional_insulation,
('PitchedThatched', 'Unknown'): EpcRoofDescriptions.thatched, # efficiency classified based on age
# Sloping:
# Limited (12 very poor, 25-50 poor)
('PitchedWithSlopingCeiling', 'mm12'): EpcRoofDescriptions.sloping_pitched_limited_insulation,
('PitchedWithSlopingCeiling', 'mm25'): EpcRoofDescriptions.sloping_pitched_limited_insulation,
('PitchedWithSlopingCeiling', 'mm50'): EpcRoofDescriptions.sloping_pitched_limited_insulation,
# Insulated 75mm+ (75 - 125 average, 150 - 250 good, 270+ very good)
('PitchedWithSlopingCeiling', 'mm75'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm100'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm150'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm200'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm250'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm270'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm300'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm350'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm400'): EpcRoofDescriptions.sloping_pitched_insulated,
# As built/unknown
('PitchedWithSlopingCeiling', 'AsBuilt'): None, # To be classified
('PitchedWithSlopingCeiling', nan): None, # To be classified
('PitchedWithSlopingCeiling', 'Unknown'): None, #
}
ROOF_UNKNOWN_AGE_FALLBACK = {
"Flat": EpcRoofDescriptions.flat_as_built_unknown,
"PitchedWithSlopingCeiling": EpcRoofDescriptions.sloping_pitched_as_built_unknown,
"PitchedThatched": EpcRoofDescriptions.thatched_as_built_unknown,
"PitchedNormalLoftAccess": EpcRoofDescriptions.loft_as_built_unknown,
"PitchedNormalNoLoftAccess": EpcRoofDescriptions.loft_as_built_unknown,
}
def fill_roof_as_built(row):
# Already resolved
if not pd.isnull(row.landlord_roof_description):
return row.landlord_roof_description
roof_type = row["Roof Construction"]
classifier = AS_BUILT_ROOF_CLASSIFIERS.get(roof_type)
if classifier is None:
raise NotImplementedError(f"No roof classifier for roof type '{roof_type}'")
if pd.isnull(row.construction_age_band):
return ROOF_UNKNOWN_AGE_FALLBACK.get(roof_type)
output = classifier(row.construction_age_band)
if output is None:
raise NotImplementedError(
f"Roof classification returned None for roof type '{roof_type}'"
)
return output
data["landlord_roof_description"] = (
data[["Roof Construction", "Roof Insulation"]]
.progress_apply(tuple, axis=1)
.map(roof_mapping)
)
data["landlord_roof_description"] = data.progress_apply(
fill_roof_as_built,
axis=1,
)
# Sanity check
assert data["landlord_roof_description"].isnull().sum() == 0, (
"Some roof descriptions could not be resolved"
)
def extract_insulation_thickness(value: str | None) -> int | None:
"""
Extract insulation thickness in mm from a string like 'mm150'.
Returns None if not present or not parseable.
"""
if value is None or pd.isnull(value):
return None
match = re.search(r"(\d+)", str(value))
if not match:
return None
return int(match.group(1))
data["roof_insulation_thickness_mm"] = data["Roof Insulation"].apply(
extract_insulation_thickness
)
data["landlord_roof_efficiency"] = data.progress_apply(
lambda row: resolve_roof_efficiency(
description=row.landlord_roof_description,
age_band=row.construction_age_band,
insulation_thickness=row.roof_insulation_thickness_mm,
),
axis=1,
)
assert data["landlord_roof_efficiency"].isnull().sum() == 0
# Flag sloping ceiling
data["has_sloping_ceiling"] = data["Roof Construction"].apply(
lambda x: x == "PitchedWithSlopingCeiling"
)
# ------------ Floor Construction ------------
floor_mapping = {
# Solid floor
('Solid', 'AsBuilt'): None, # Mapped
('Solid', 'Unknown'): None, # Mapped
('Solid', nan): None, # Mapped
('Solid', 'RetroFitted'): EpcFloorDescriptions.solid_insulated,
# Suspended floor
('SuspendedTimber', nan): None, # Mapped suspended_floor_as_built
('SuspendedTimber', 'AsBuilt'): None, # Mapped suspended_floor_as_built
('SuspendedTimber', 'RetroFitted'): EpcFloorDescriptions.suspended_insulated,
('SuspendedTimber', 'Unknown'): None, # Mapped suspended_floor_as_built
('SuspendedNotTimber', 'RetroFitted'): EpcFloorDescriptions.suspended_insulated,
('SuspendedNotTimber', nan): None, # Mapped suspended_floor_as_built
('SuspendedNotTimber', 'Unknown'): None, # Mapped suspended_floor_as_built
('SuspendedNotTimber', 'AsBuilt'): None, # Mapped suspended_floor_as_built
# Unknown type - mapped on age
('Unknown', 'Unknown'): None, # Mapped unknown_floor_as_built
('Unknown', 'RetroFitted'): None, # Mapped unknown_floor_retrofitted
(nan, nan): None, # No actual information!
('Unknown', 'AsBuilt'): None, # Mapped unknown_floor_as_built
}
data["landlord_floor_description"] = (
data[["Floor Construction", "Floor Insulation"]]
.progress_apply(tuple, axis=1)
.map(floor_mapping)
)
def fill_floor_as_built(row):
# 1. Already resolved
if row.landlord_floor_description is not None:
return row.landlord_floor_description
age_band = row.construction_age_band
floor_type = row["Floor Construction"]
insulation = row["Floor Insulation"]
# 2. Missing age band → conservative fallback
if pd.isnull(age_band):
return EpcFloorDescriptions.unknown
# 3. Known floor types
if floor_type == "Solid":
return solid_floor_as_built(age_band)
if floor_type in {"SuspendedTimber", "SuspendedNotTimber"}:
return suspended_floor_as_built(age_band)
# 4. Unknown floor type
if floor_type == "Unknown":
if insulation == "RetroFitted":
return unknown_floor_retrofitted(age_band)
return unknown_floor_as_built(age_band)
# 5. Truly missing / garbage input
return EpcFloorDescriptions.unknown
data["landlord_floor_description"] = data.progress_apply(
fill_floor_as_built,
axis=1,
)
# All values should be remapped now
assert data["landlord_floor_description"].isnull().sum() == 0, (
"Some floor descriptions could not be resolved"
)
# ------------ Glazing ------------
glazing_map = {
# (description, energy efficiency, multi_glaze_proportion, glazed_type, glazed_area
# For SAP 10 assessments, The glazed type and glazed area are not populated in the EPC API data any more
"Double 2002 or later": ("Fully double glazed", EpcEfficiency.AVERAGE, 1, None, None),
"Double before 2002": ("Fully double glazed", EpcEfficiency.POOR, 1, None, None),
"Double but age unknown": ("Fully double glazed", EpcEfficiency.POOR, 1, None, None),
"Single": ("Single glazed", EpcEfficiency.VERY_POOR, 0, None, None),
# For triple glazing, with age unknown, the performance is only average, whereas if it's a post 2022
# installation, it's classed as high performance glazing with good efficiency. We'll need to be considerate as to
# how we make updates to the windows data.
# Triple known data is high performance glazing with Good efficiency (at least)
"Triple": ("Fully triple glazed", EpcEfficiency.AVERAGE, 1, None, None),
# This is also classed as high performance glazing
"DoubleKnownData": ("High performance glazing", EpcEfficiency.GOOD, 1, None, None),
# Under SAP 10, secondary glazing is classed as poor efficiency (whereas under SAP 2012 it was generally good)
"Secondary": ("Full secondary glazing", EpcEfficiency.POOR, 1, None, None),
"TripleKnownData": ("High performance glazing", EpcEfficiency.GOOD, 1, None, None),
}
data[["landlord_windows_description",
"landlord_windows_efficiency",
"landlord_multi_glaze_proportion",
"landlord_glazed_type",
"landlord_glazed_area"]] = data["Glazing"].map(glazing_map).progress_apply(pd.Series)
# Peform the remapping. The columns we wish to produce are the following:
# 1) landlord_windows_description
# 2) landlord_windows_efficiency
# 3) landlord_multi_glaze_proportion - maybe don't need to store this, same for glazing type and area
# ------------ Heating ------------
agg = data.groupby(['Heating', 'Boiler Efficiency', 'Main Fuel', 'Controls Adequacy']).size().reset_index(name='counts')
# We map to:
# 1) Heating description
# 2) Heating efficiency
# 3) Fuel type
# 4) Heating controls
# 5) Heating controls efficiency
# TODO - when mapping heating controls, we should check the existing heating controls and the efficiency rating
# For sub optimal heating controls, we're going to make an assumption as to what the heating controls are
# and the energy efficiency rating we prescribe here may not be accurate. We therefore use this as an upper limit
# as opposed to a guaranteed efficiency rating. To stress, this is only relevant for sub optimal heating
# controls. E.g. it may be programmer and room thermostat
# Boiler ratings based on efficiency
# 90%+ = A
# 86-89.9% = B -> Mapped to good efficiency
# 78 - 85% = C
# 70 - 77.9% = D
# 65 - 69.9% = E
# 60 - 64.9% = F
# <60% = G
heating_map = {
# 0
('Boilers', 'A', 'ElectricityNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD
),
# 1
('Boilers', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE
),
# 2
('Boilers', 'A', 'ElectricityNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD
),
# 3
('Boilers', 'A', 'LPGNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_and_radiators_lpg, EpcEfficiency.POOR, EpcFuel.lpg_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD
),
# 4
('Boilers', 'A', 'MainsGasNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.VERY_GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD
),
# 5
('Boilers', 'A', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.VERY_GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE
),
# 6
('Boilers', 'A', 'MainsGasNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.VERY_GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD
),
# 7
('Boilers', 'B', 'MainsGasNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD
),
# 8
('Boilers', 'B', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE
),
# 9
('Boilers', 'B', 'MainsGasNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD
),
# 10
('Boilers', 'C', 'ElectricityNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD
),
# 11
('Boilers', 'C', 'ElectricityNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE
),
# 12
('Boilers', 'C', 'ElectricityNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD
),
# 13
('Boilers', 'C', 'LPGNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_and_radiators_lpg, EpcEfficiency.POOR, EpcFuel.lpg_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD
),
# 14
('Boilers', 'C', 'LPGNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_and_radiators_lpg, EpcEfficiency.POOR, EpcFuel.lpg_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE
),
# 15
('Boilers', 'C', 'MainsGasNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD
),
# 16
('Boilers', 'C', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE
),
# 17
('Boilers', 'C', 'MainsGasNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD
),
# 18 - oil boilers have an average efficiency rating
('Boilers', 'C', 'OilNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD
),
# 18
('Boilers', 'C', 'OilNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE
),
# 19
('Boilers', 'C', 'OilNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD
),
# 20
('Boilers', 'D', 'MainsGasNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD
),
# 21
('Boilers', 'D', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE
),
# 22
('Boilers', 'D', 'MainsGasNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD
),
# 23
('Boilers', 'E', 'ElectricityNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD
),
# 24
('Boilers', 'E', 'MainsGasNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD
),
# 25
('Boilers', 'E', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE
),
('Boilers', 'E', 'MainsGasNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD
),
('Boilers', 'E', 'OilNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD
),
('Boilers', 'E', 'OilNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE
),
('Boilers', 'F', 'MainsGasNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD
),
('Boilers', 'F', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE
),
('Boilers', 'F', 'MainsGasNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD
),
('Boilers', 'G', 'MainsGasNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD
),
('Boilers', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE
),
('Boilers', 'G', 'MainsGasNotCommunity', 'Top Spec'): None,
('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Optimal'): None,
('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): None,
('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Top Spec'): None,
('Heat pumps (warm air)', 'A', 'ElectricityNotCommunity', 'Optimal'): None,
('Heat pumps (warm air)', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): None,
('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Optimal'): None,
('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): None,
('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Top Spec'): None,
('Room heaters', 'A', 'ElectricityNotCommunity', 'Optimal'): None,
('Room heaters', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): None,
('Room heaters', 'C', 'MainsGasNotCommunity', 'Sub Optimal'): None,
('Room heaters', 'F', 'MainsGasNotCommunity', 'Sub Optimal'): None,
('Room heaters', 'G', 'MainsGasNotCommunity', 'Optimal'): None,
('Room heaters', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): None,
('Room heaters', 'G', 'SmokelessCoal', 'Sub Optimal'): None,
('Storage heaters', 'A', 'ElectricityNotCommunity', 'Optimal'): None,
('Storage heaters', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): None,
('Warm Air (not heat pump)', 'G', 'ElectricityNotCommunity', 'Sub Optimal'): None,
('Warm Air (not heat pump)', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): None
}
example = list(heating_map.keys())[25]
example_data = data[
(data["Heating"] == example[0]) & (data["Boiler Efficiency"] == example[1]) & (data["Main Fuel"] == example[2]) & (
data["Controls Adequacy"] == example[3])
]
print(example_data["UPRN"].values.tolist())
agg_tuples = {
(row['Heating'], row['Boiler Efficiency'], row['Main Fuel']): None for _, row in agg.iterrows()
}
epcs = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/domestic-E08000003-Manchester/certificates.csv")
epcs[epcs["LODGEMENT_DATE"] > "2025-07-01"]["WINDOWS_DESCRIPTION"].value_counts()
epcs[epcs["LODGEMENT_DATE"] > "2025-07-01"]["GLAZED_AREA"].value_counts()
epcs[
(epcs["MAINHEAT_DESCRIPTION"] == "Boiler and radiators, mains gas")
]["MAINHEAT_ENERGY_EFF"].value_counts()
z = data[data["Address 1"].str.lower().str.contains("133, mayo")]
# ------------ Fuel ------------
# ------------ Heating Controls ------------
# ------------ Floor Area ------------
# TODO: Convert everything to values
# Variables we want to map
# 'Org Ref', 'Address 1', 'Address 2', 'Address 3', 'Postcode',
# 'Glazing', 'Heating',
# 'Boiler Efficiency', 'Main Fuel', 'Controls Adequacy', 'UPRN',
# 'Total Floor Area (m2)'
data["Glazing"].value_counts()
data["Glazing"].value_counts()