mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
718 lines
32 KiB
Python
718 lines
32 KiB
Python
import re
|
|
from numpy import nan
|
|
from tqdm import tqdm
|
|
import pandas as pd
|
|
from backend.onboarders.mappings.property_type import parity_map as property_map
|
|
from backend.onboarders.mappings.age_band import parity_map as age_band_map
|
|
from backend.onboarders.mappings.built_form import parity_map as built_form_map
|
|
from backend.onboarders.epc_descriptions import EpcWallDescriptions, EpcConstructionAgeBand, EpcEfficiency, \
|
|
WALL_DESCRIPTION_EFFICIENCIES, EpcRoofDescriptions, resolve_roof_efficiency, EpcFloorDescriptions
|
|
from backend.onboarders.epc.placeholder import EpcFuel, EpcHeatingControls, EpcHeatingSystems
|
|
from backend.onboarders.mappings.as_built_wall_classifiers import AS_BUILT_WALL_CLASSIFIERS
|
|
from backend.onboarders.mappings.as_built_roof_classifiers import AS_BUILT_ROOF_CLASSIFIERS
|
|
from backend.onboarders.mappings.as_built_floor_classifiers import unknown_floor_as_built, unknown_floor_retrofitted, \
|
|
solid_floor_as_built, suspended_floor_as_built
|
|
|
|
tqdm.pandas()
|
|
|
|
|
|
def check_nulls(data, original_column, mapped_column):
|
|
# We only allow nulls if the oroginal value was null
|
|
null_vals = data[pd.isnull(data[mapped_column])]
|
|
if null_vals.empty:
|
|
return True
|
|
# We make sure all original values were null
|
|
assert pd.isnull(null_vals[original_column]).all(), (
|
|
f"Some values in {mapped_column} were not mapped, but original values were not null"
|
|
)
|
|
|
|
|
|
# Sample input data
|
|
data = pd.read_excel(
|
|
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
|
|
"- Data Extracts for Domna.xlsx",
|
|
sheet_name="Sustainability"
|
|
)
|
|
|
|
# We want to map the parity fields to standard EPC references. This will allow us to
|
|
# 1) Estimate EPCs, more accurately
|
|
# 2) Patch incorrect EPCs with ease
|
|
# 3) Indicate already installed measures
|
|
|
|
# ------------ construction_age_band ------------
|
|
|
|
data["construction_age_band"] = data["Construction Years"].map(age_band_map)
|
|
|
|
check_nulls(data, "Construction Years", "construction_age_band")
|
|
|
|
# ------------ property_type ------------
|
|
data["property_type"] = data["Type"].map(property_map)
|
|
|
|
assert pd.isnull(data["property_type"]).sum() == 0, "Some property types were not mapped"
|
|
|
|
# ------------ built_form ------------
|
|
data["built_form"] = data["Attachment"].map(built_form_map)
|
|
|
|
assert pd.isnull(data["built_form"]).sum() == 0, "Some built forms were not mapped"
|
|
|
|
# ------------ Wall Construction ------------
|
|
|
|
# Unique combindations
|
|
wall_mapping = {
|
|
# Cavity walls
|
|
('Cavity', 'FilledCavity'): EpcWallDescriptions.cavity_filled_cavity,
|
|
('Cavity', 'Internal'): EpcWallDescriptions.cavity_internal_insulation,
|
|
('Cavity', 'External'): EpcWallDescriptions.cavity_external_insulation,
|
|
('Cavity', 'FilledCavityPlusInternal'): EpcWallDescriptions.cavity_filled_plus_internal,
|
|
('Cavity', 'FilledCavityPlusExternal'): EpcWallDescriptions.cavity_filled_plus_external,
|
|
('Cavity', 'AsBuilt'): None, # To be classified
|
|
('Cavity', 'Unknown'): None, # To be classified
|
|
|
|
# System built walls
|
|
('System', 'External'): EpcWallDescriptions.system_external_insulation,
|
|
('System', 'Internal'): EpcWallDescriptions.system_internal_insulation,
|
|
('System', 'AsBuilt'): None, # To be classified
|
|
('System', 'Unknown'): None,
|
|
|
|
# Timber Frame walls
|
|
('Timber Frame', 'Internal'): EpcWallDescriptions.timber_frame_internal_insulation,
|
|
('Timber Frame', 'External'): EpcWallDescriptions.timber_frame_external_insulation,
|
|
('Timber Frame', 'AsBuilt'): None, # To be classified
|
|
('Timber Frame', 'Unknown'): None,
|
|
|
|
# Solid Brick walls
|
|
('Solid Brick', 'External'): EpcWallDescriptions.solid_brick_external_insulation,
|
|
('Solid Brick', 'Internal'): EpcWallDescriptions.solid_brick_internal_insulation,
|
|
('Solid Brick', 'AsBuilt'): None, # To be classified
|
|
('Solid Brick', 'Unknown'): None,
|
|
|
|
# Granite walls
|
|
('Granite', 'External'): EpcWallDescriptions.granite_whinstone_external_insulation,
|
|
("Granite", 'Internal'): EpcWallDescriptions.granite_whinstone_internal_insulation,
|
|
('Granite', 'AsBuilt'): None,
|
|
('Granite', 'Unknown'): None,
|
|
|
|
# Sandstone walls
|
|
('Sandstone', 'Internal'): EpcWallDescriptions.sandstone_limestone_internal_insulation,
|
|
('Sandstone', 'External'): EpcWallDescriptions.sandstone_limestone_external_insulation,
|
|
('Sandstone', 'Unknown'): None,
|
|
('Sandstone', 'AsBuilt'): None,
|
|
|
|
# Cob walls
|
|
('Cob', 'AsBuilt'): None,
|
|
}
|
|
|
|
WALL_UNKNOWN_AGE_FALLBACK = {
|
|
"Cavity": EpcWallDescriptions.cavity_as_built_unknown,
|
|
"Solid Brick": EpcWallDescriptions.solid_brick_as_built_unknown,
|
|
"Timber Frame": EpcWallDescriptions.timber_frame_as_built_unknown,
|
|
"System": EpcWallDescriptions.system_as_built_unknown,
|
|
"Granite": EpcWallDescriptions.granite_as_built_unknown,
|
|
"Sandstone": EpcWallDescriptions.sandstone_as_built_unknown,
|
|
"Cob": EpcWallDescriptions.cob_as_built_unknown,
|
|
}
|
|
|
|
data["landlord_wall_description"] = (
|
|
data[["Wall Construction", "Wall Insulation"]]
|
|
.apply(tuple, axis=1)
|
|
.map(wall_mapping)
|
|
)
|
|
|
|
|
|
def fill_as_built(row):
|
|
# Already resolved via direct mapping
|
|
if row.landlord_wall_description is not None:
|
|
return row.landlord_wall_description
|
|
|
|
wall_type = row["Wall Construction"]
|
|
|
|
# Missing construction age → conservative fallback
|
|
if pd.isnull(row.construction_age_band):
|
|
return WALL_UNKNOWN_AGE_FALLBACK.get(wall_type)
|
|
|
|
classifier = AS_BUILT_WALL_CLASSIFIERS.get(wall_type)
|
|
if classifier is None:
|
|
return None
|
|
|
|
return classifier(row.construction_age_band)
|
|
|
|
|
|
def resolve_wall_efficiency(
|
|
description: EpcWallDescriptions,
|
|
age_band: EpcConstructionAgeBand | None,
|
|
) -> EpcEfficiency:
|
|
# Unknown / holding descriptions → efficiency unknown
|
|
if "unknown insulation" in description.value.lower():
|
|
return EpcEfficiency.NA
|
|
|
|
rule = WALL_DESCRIPTION_EFFICIENCIES.get(description)
|
|
|
|
if rule is None:
|
|
return EpcEfficiency.NA
|
|
|
|
if isinstance(rule, EpcEfficiency):
|
|
return rule
|
|
|
|
# Rule needs age band but we don't have one
|
|
if age_band is None or pd.isnull(age_band):
|
|
return EpcEfficiency.NA
|
|
|
|
return rule(age_band)
|
|
|
|
|
|
data["landlord_wall_description"] = data.progress_apply(fill_as_built, axis=1)
|
|
|
|
assert data["landlord_wall_description"].isnull().sum() == 0, (
|
|
"Some wall descriptions could not be resolved"
|
|
)
|
|
|
|
data["landlord_wall_efficiency"] = data.progress_apply(
|
|
lambda row: resolve_wall_efficiency(
|
|
row.landlord_wall_description,
|
|
row.construction_age_band,
|
|
),
|
|
axis=1,
|
|
)
|
|
# Sanity check
|
|
assert data["landlord_wall_efficiency"].isnull().sum() == 0
|
|
|
|
# ------------ Roof Construction ------------
|
|
|
|
|
|
roof_mapping = {
|
|
# Dwelling above
|
|
('AnotherDwellingAbove', 'Another Dwelling Above'): EpcRoofDescriptions.another_dwelling_above,
|
|
('SameDwellingAbove', 'Same Dwelling Above'): EpcRoofDescriptions.another_dwelling_above,
|
|
# Pitched, normal loft access, with a loft thickness
|
|
('PitchedNormalLoftAccess', 'mm25'): EpcRoofDescriptions.loft_25mm_insulation,
|
|
('PitchedNormalLoftAccess', 'mm50'): EpcRoofDescriptions.loft_50mm_insulation,
|
|
('PitchedNormalLoftAccess', 'mm75'): EpcRoofDescriptions.loft_75mm_insulation,
|
|
('PitchedNormalLoftAccess', 'mm100'): EpcRoofDescriptions.loft_100mm_insulation,
|
|
('PitchedNormalLoftAccess', 'mm150'): EpcRoofDescriptions.loft_150mm_insulation,
|
|
('PitchedNormalLoftAccess', 'mm200'): EpcRoofDescriptions.loft_200mm_insulation,
|
|
('PitchedNormalLoftAccess', 'mm250'): EpcRoofDescriptions.loft_250mm_insulation,
|
|
('PitchedNormalLoftAccess', 'mm270'): EpcRoofDescriptions.loft_270mm_insulation,
|
|
('PitchedNormalLoftAccess', 'mm300'): EpcRoofDescriptions.loft_300mm_insulation,
|
|
('PitchedNormalLoftAccess', 'mm350'): EpcRoofDescriptions.loft_350mm_insulation,
|
|
('PitchedNormalLoftAccess', 'mm400'): EpcRoofDescriptions.loft_400mm_plus_insulation,
|
|
|
|
# Pitched, no loft access, with a loft thickness
|
|
('PitchedNormalNoLoftAccess', 'mm25'): EpcRoofDescriptions.loft_25mm_insulation,
|
|
('PitchedNormalNoLoftAccess', 'mm50'): EpcRoofDescriptions.loft_50mm_insulation,
|
|
('PitchedNormalNoLoftAccess', 'mm75'): EpcRoofDescriptions.loft_75mm_insulation,
|
|
('PitchedNormalNoLoftAccess', 'mm100'): EpcRoofDescriptions.loft_100mm_insulation,
|
|
('PitchedNormalNoLoftAccess', 'mm150'): EpcRoofDescriptions.loft_150mm_insulation,
|
|
('PitchedNormalNoLoftAccess', 'mm200'): EpcRoofDescriptions.loft_200mm_insulation,
|
|
('PitchedNormalNoLoftAccess', 'mm250'): EpcRoofDescriptions.loft_250mm_insulation,
|
|
('PitchedNormalNoLoftAccess', 'mm270'): EpcRoofDescriptions.loft_270mm_insulation,
|
|
('PitchedNormalNoLoftAccess', 'mm300'): EpcRoofDescriptions.loft_300mm_insulation,
|
|
('PitchedNormalNoLoftAccess', 'mm350'): EpcRoofDescriptions.loft_350mm_insulation,
|
|
('PitchedNormalNoLoftAccess', 'mm400'): EpcRoofDescriptions.loft_400mm_plus_insulation,
|
|
|
|
# All pitched options with asbuilt or unknown got to EpcRoofDescriptions.pitched_insulated_assumed
|
|
# With access
|
|
('PitchedNormalLoftAccess', nan): EpcRoofDescriptions.pitched_insulated_assumed,
|
|
('PitchedNormalLoftAccess', 'AsBuilt'): EpcRoofDescriptions.pitched_insulated_assumed,
|
|
('PitchedNormalLoftAccess', 'Unknown'): EpcRoofDescriptions.pitched_insulated_assumed,
|
|
# No access
|
|
('PitchedNormalNoLoftAccess', nan): EpcRoofDescriptions.pitched_insulated_assumed,
|
|
('PitchedNormalNoLoftAccess', 'AsBuilt'): EpcRoofDescriptions.pitched_insulated_assumed,
|
|
('PitchedNormalNoLoftAccess', 'Unknown'): EpcRoofDescriptions.pitched_insulated_assumed,
|
|
|
|
# Flat
|
|
('Flat', 'NoInsulation'): EpcRoofDescriptions.flat_no_insulation,
|
|
# Flat - limited insulation
|
|
('Flat', '12mm'): EpcRoofDescriptions.flat_limited_insulation,
|
|
('Flat', 'mm25'): EpcRoofDescriptions.flat_limited_insulation,
|
|
('Flat', 'mm50'): EpcRoofDescriptions.flat_limited_insulation,
|
|
# Flat insulated
|
|
('Flat', 'mm75'): EpcRoofDescriptions.flat_insulated,
|
|
('Flat', 'mm100'): EpcRoofDescriptions.flat_insulated,
|
|
('Flat', 'mm150'): EpcRoofDescriptions.flat_insulated,
|
|
('Flat', 'mm200'): EpcRoofDescriptions.flat_insulated,
|
|
('Flat', 'mm250'): EpcRoofDescriptions.flat_insulated,
|
|
('Flat', 'mm300'): EpcRoofDescriptions.flat_insulated,
|
|
('Flat', 'mm350'): EpcRoofDescriptions.flat_insulated,
|
|
('Flat', 'mm400'): EpcRoofDescriptions.flat_insulated,
|
|
# Flat - as built or unknown
|
|
('Flat', 'AsBuilt'): None, # To be classified
|
|
('Flat', nan): None, # To be classified
|
|
('Flat', 'Unknown'): None, # To be classified
|
|
|
|
# 12mm = very poor & has limited insulation description
|
|
# 25, 50 = poor & has limited insulation description
|
|
# 75, 100, 125mm = average (Flat, insulated)
|
|
# 150, 175, 200, 225, 250mm = good (Flat, insulated)
|
|
# 270mm+ = very good (Flat, insulated)
|
|
|
|
# Thatched
|
|
('PitchedThatched', 'mm50'): EpcRoofDescriptions.thatched_with_additional_insulation,
|
|
('PitchedThatched', 'mm150'): EpcRoofDescriptions.thatched_with_additional_insulation,
|
|
('PitchedThatched', 'mm300'): EpcRoofDescriptions.thatched_with_additional_insulation,
|
|
('PitchedThatched', 'Unknown'): EpcRoofDescriptions.thatched, # efficiency classified based on age
|
|
|
|
# Sloping:
|
|
# Limited (12 very poor, 25-50 poor)
|
|
('PitchedWithSlopingCeiling', 'mm12'): EpcRoofDescriptions.sloping_pitched_limited_insulation,
|
|
('PitchedWithSlopingCeiling', 'mm25'): EpcRoofDescriptions.sloping_pitched_limited_insulation,
|
|
('PitchedWithSlopingCeiling', 'mm50'): EpcRoofDescriptions.sloping_pitched_limited_insulation,
|
|
# Insulated 75mm+ (75 - 125 average, 150 - 250 good, 270+ very good)
|
|
('PitchedWithSlopingCeiling', 'mm75'): EpcRoofDescriptions.sloping_pitched_insulated,
|
|
('PitchedWithSlopingCeiling', 'mm100'): EpcRoofDescriptions.sloping_pitched_insulated,
|
|
('PitchedWithSlopingCeiling', 'mm150'): EpcRoofDescriptions.sloping_pitched_insulated,
|
|
('PitchedWithSlopingCeiling', 'mm200'): EpcRoofDescriptions.sloping_pitched_insulated,
|
|
('PitchedWithSlopingCeiling', 'mm250'): EpcRoofDescriptions.sloping_pitched_insulated,
|
|
('PitchedWithSlopingCeiling', 'mm270'): EpcRoofDescriptions.sloping_pitched_insulated,
|
|
('PitchedWithSlopingCeiling', 'mm300'): EpcRoofDescriptions.sloping_pitched_insulated,
|
|
('PitchedWithSlopingCeiling', 'mm350'): EpcRoofDescriptions.sloping_pitched_insulated,
|
|
('PitchedWithSlopingCeiling', 'mm400'): EpcRoofDescriptions.sloping_pitched_insulated,
|
|
# As built/unknown
|
|
('PitchedWithSlopingCeiling', 'AsBuilt'): None, # To be classified
|
|
('PitchedWithSlopingCeiling', nan): None, # To be classified
|
|
('PitchedWithSlopingCeiling', 'Unknown'): None, #
|
|
}
|
|
|
|
ROOF_UNKNOWN_AGE_FALLBACK = {
|
|
"Flat": EpcRoofDescriptions.flat_as_built_unknown,
|
|
"PitchedWithSlopingCeiling": EpcRoofDescriptions.sloping_pitched_as_built_unknown,
|
|
"PitchedThatched": EpcRoofDescriptions.thatched_as_built_unknown,
|
|
"PitchedNormalLoftAccess": EpcRoofDescriptions.loft_as_built_unknown,
|
|
"PitchedNormalNoLoftAccess": EpcRoofDescriptions.loft_as_built_unknown,
|
|
}
|
|
|
|
|
|
def fill_roof_as_built(row):
|
|
# Already resolved
|
|
if not pd.isnull(row.landlord_roof_description):
|
|
return row.landlord_roof_description
|
|
|
|
roof_type = row["Roof Construction"]
|
|
|
|
classifier = AS_BUILT_ROOF_CLASSIFIERS.get(roof_type)
|
|
if classifier is None:
|
|
raise NotImplementedError(f"No roof classifier for roof type '{roof_type}'")
|
|
|
|
if pd.isnull(row.construction_age_band):
|
|
return ROOF_UNKNOWN_AGE_FALLBACK.get(roof_type)
|
|
|
|
output = classifier(row.construction_age_band)
|
|
if output is None:
|
|
raise NotImplementedError(
|
|
f"Roof classification returned None for roof type '{roof_type}'"
|
|
)
|
|
|
|
return output
|
|
|
|
|
|
data["landlord_roof_description"] = (
|
|
data[["Roof Construction", "Roof Insulation"]]
|
|
.progress_apply(tuple, axis=1)
|
|
.map(roof_mapping)
|
|
)
|
|
|
|
data["landlord_roof_description"] = data.progress_apply(
|
|
fill_roof_as_built,
|
|
axis=1,
|
|
)
|
|
# Sanity check
|
|
assert data["landlord_roof_description"].isnull().sum() == 0, (
|
|
"Some roof descriptions could not be resolved"
|
|
)
|
|
|
|
|
|
def extract_insulation_thickness(value: str | None) -> int | None:
|
|
"""
|
|
Extract insulation thickness in mm from a string like 'mm150'.
|
|
Returns None if not present or not parseable.
|
|
"""
|
|
if value is None or pd.isnull(value):
|
|
return None
|
|
|
|
match = re.search(r"(\d+)", str(value))
|
|
if not match:
|
|
return None
|
|
|
|
return int(match.group(1))
|
|
|
|
|
|
data["roof_insulation_thickness_mm"] = data["Roof Insulation"].apply(
|
|
extract_insulation_thickness
|
|
)
|
|
|
|
data["landlord_roof_efficiency"] = data.progress_apply(
|
|
lambda row: resolve_roof_efficiency(
|
|
description=row.landlord_roof_description,
|
|
age_band=row.construction_age_band,
|
|
insulation_thickness=row.roof_insulation_thickness_mm,
|
|
),
|
|
axis=1,
|
|
)
|
|
|
|
assert data["landlord_roof_efficiency"].isnull().sum() == 0
|
|
|
|
# Flag sloping ceiling
|
|
data["has_sloping_ceiling"] = data["Roof Construction"].apply(
|
|
lambda x: x == "PitchedWithSlopingCeiling"
|
|
)
|
|
|
|
# ------------ Floor Construction ------------
|
|
|
|
floor_mapping = {
|
|
# Solid floor
|
|
('Solid', 'AsBuilt'): None, # Mapped
|
|
('Solid', 'Unknown'): None, # Mapped
|
|
('Solid', nan): None, # Mapped
|
|
('Solid', 'RetroFitted'): EpcFloorDescriptions.solid_insulated,
|
|
|
|
# Suspended floor
|
|
('SuspendedTimber', nan): None, # Mapped suspended_floor_as_built
|
|
('SuspendedTimber', 'AsBuilt'): None, # Mapped suspended_floor_as_built
|
|
('SuspendedTimber', 'RetroFitted'): EpcFloorDescriptions.suspended_insulated,
|
|
('SuspendedTimber', 'Unknown'): None, # Mapped suspended_floor_as_built
|
|
('SuspendedNotTimber', 'RetroFitted'): EpcFloorDescriptions.suspended_insulated,
|
|
('SuspendedNotTimber', nan): None, # Mapped suspended_floor_as_built
|
|
('SuspendedNotTimber', 'Unknown'): None, # Mapped suspended_floor_as_built
|
|
('SuspendedNotTimber', 'AsBuilt'): None, # Mapped suspended_floor_as_built
|
|
|
|
# Unknown type - mapped on age
|
|
('Unknown', 'Unknown'): None, # Mapped unknown_floor_as_built
|
|
('Unknown', 'RetroFitted'): None, # Mapped unknown_floor_retrofitted
|
|
(nan, nan): None, # No actual information!
|
|
('Unknown', 'AsBuilt'): None, # Mapped unknown_floor_as_built
|
|
}
|
|
|
|
data["landlord_floor_description"] = (
|
|
data[["Floor Construction", "Floor Insulation"]]
|
|
.progress_apply(tuple, axis=1)
|
|
.map(floor_mapping)
|
|
)
|
|
|
|
|
|
def fill_floor_as_built(row):
|
|
# 1. Already resolved
|
|
if row.landlord_floor_description is not None:
|
|
return row.landlord_floor_description
|
|
|
|
age_band = row.construction_age_band
|
|
floor_type = row["Floor Construction"]
|
|
insulation = row["Floor Insulation"]
|
|
|
|
# 2. Missing age band → conservative fallback
|
|
if pd.isnull(age_band):
|
|
return EpcFloorDescriptions.unknown
|
|
|
|
# 3. Known floor types
|
|
if floor_type == "Solid":
|
|
return solid_floor_as_built(age_band)
|
|
|
|
if floor_type in {"SuspendedTimber", "SuspendedNotTimber"}:
|
|
return suspended_floor_as_built(age_band)
|
|
|
|
# 4. Unknown floor type
|
|
if floor_type == "Unknown":
|
|
if insulation == "RetroFitted":
|
|
return unknown_floor_retrofitted(age_band)
|
|
return unknown_floor_as_built(age_band)
|
|
|
|
# 5. Truly missing / garbage input
|
|
return EpcFloorDescriptions.unknown
|
|
|
|
|
|
data["landlord_floor_description"] = data.progress_apply(
|
|
fill_floor_as_built,
|
|
axis=1,
|
|
)
|
|
|
|
# All values should be remapped now
|
|
assert data["landlord_floor_description"].isnull().sum() == 0, (
|
|
"Some floor descriptions could not be resolved"
|
|
)
|
|
|
|
# ------------ Glazing ------------
|
|
glazing_map = {
|
|
# (description, energy efficiency, multi_glaze_proportion, glazed_type, glazed_area
|
|
# For SAP 10 assessments, The glazed type and glazed area are not populated in the EPC API data any more
|
|
"Double 2002 or later": ("Fully double glazed", EpcEfficiency.AVERAGE, 1, None, None),
|
|
"Double before 2002": ("Fully double glazed", EpcEfficiency.POOR, 1, None, None),
|
|
"Double but age unknown": ("Fully double glazed", EpcEfficiency.POOR, 1, None, None),
|
|
"Single": ("Single glazed", EpcEfficiency.VERY_POOR, 0, None, None),
|
|
# For triple glazing, with age unknown, the performance is only average, whereas if it's a post 2022
|
|
# installation, it's classed as high performance glazing with good efficiency. We'll need to be considerate as to
|
|
# how we make updates to the windows data.
|
|
# Triple known data is high performance glazing with Good efficiency (at least)
|
|
"Triple": ("Fully triple glazed", EpcEfficiency.AVERAGE, 1, None, None),
|
|
# This is also classed as high performance glazing
|
|
"DoubleKnownData": ("High performance glazing", EpcEfficiency.GOOD, 1, None, None),
|
|
# Under SAP 10, secondary glazing is classed as poor efficiency (whereas under SAP 2012 it was generally good)
|
|
"Secondary": ("Full secondary glazing", EpcEfficiency.POOR, 1, None, None),
|
|
"TripleKnownData": ("High performance glazing", EpcEfficiency.GOOD, 1, None, None),
|
|
}
|
|
|
|
data[["landlord_windows_description",
|
|
"landlord_windows_efficiency",
|
|
"landlord_multi_glaze_proportion",
|
|
"landlord_glazed_type",
|
|
"landlord_glazed_area"]] = data["Glazing"].map(glazing_map).progress_apply(pd.Series)
|
|
|
|
# Peform the remapping. The columns we wish to produce are the following:
|
|
# 1) landlord_windows_description
|
|
# 2) landlord_windows_efficiency
|
|
# 3) landlord_multi_glaze_proportion - maybe don't need to store this, same for glazing type and area
|
|
|
|
|
|
# ------------ Heating ------------
|
|
|
|
agg = data.groupby(['Heating', 'Boiler Efficiency', 'Main Fuel', 'Controls Adequacy']).size().reset_index(name='counts')
|
|
|
|
# We map to:
|
|
# 1) Heating description
|
|
# 2) Heating efficiency
|
|
# 3) Fuel type
|
|
# 4) Heating controls
|
|
# 5) Heating controls efficiency
|
|
|
|
# TODO - when mapping heating controls, we should check the existing heating controls and the efficiency rating
|
|
# For sub optimal heating controls, we're going to make an assumption as to what the heating controls are
|
|
# and the energy efficiency rating we prescribe here may not be accurate. We therefore use this as an upper limit
|
|
# as opposed to a guaranteed efficiency rating. To stress, this is only relevant for sub optimal heating
|
|
# controls. E.g. it may be programmer and room thermostat
|
|
|
|
# Boiler ratings based on efficiency
|
|
# 90%+ = A
|
|
# 86-89.9% = B -> Mapped to good efficiency
|
|
# 78 - 85% = C
|
|
# 70 - 77.9% = D
|
|
# 65 - 69.9% = E
|
|
# 60 - 64.9% = F
|
|
# <60% = G
|
|
|
|
heating_map = {
|
|
# 0
|
|
('Boilers', 'A', 'ElectricityNotCommunity', 'Optimal'): (
|
|
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
|
|
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD
|
|
),
|
|
# 1
|
|
('Boilers', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): (
|
|
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
|
|
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE
|
|
),
|
|
# 2
|
|
('Boilers', 'A', 'ElectricityNotCommunity', 'Top Spec'): (
|
|
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
|
|
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD
|
|
),
|
|
# 3
|
|
('Boilers', 'A', 'LPGNotCommunity', 'Optimal'): (
|
|
EpcHeatingSystems.boiler_and_radiators_lpg, EpcEfficiency.POOR, EpcFuel.lpg_not_community,
|
|
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD
|
|
),
|
|
# 4
|
|
('Boilers', 'A', 'MainsGasNotCommunity', 'Optimal'): (
|
|
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.VERY_GOOD, EpcFuel.mains_gas_not_community,
|
|
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD
|
|
),
|
|
# 5
|
|
('Boilers', 'A', 'MainsGasNotCommunity', 'Sub Optimal'): (
|
|
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.VERY_GOOD, EpcFuel.mains_gas_not_community,
|
|
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE
|
|
),
|
|
# 6
|
|
('Boilers', 'A', 'MainsGasNotCommunity', 'Top Spec'): (
|
|
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.VERY_GOOD, EpcFuel.mains_gas_not_community,
|
|
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD
|
|
),
|
|
# 7
|
|
('Boilers', 'B', 'MainsGasNotCommunity', 'Optimal'): (
|
|
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
|
|
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD
|
|
),
|
|
# 8
|
|
('Boilers', 'B', 'MainsGasNotCommunity', 'Sub Optimal'): (
|
|
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
|
|
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE
|
|
),
|
|
# 9
|
|
('Boilers', 'B', 'MainsGasNotCommunity', 'Top Spec'): (
|
|
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
|
|
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD
|
|
),
|
|
# 10
|
|
('Boilers', 'C', 'ElectricityNotCommunity', 'Optimal'): (
|
|
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
|
|
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD
|
|
),
|
|
# 11
|
|
('Boilers', 'C', 'ElectricityNotCommunity', 'Sub Optimal'): (
|
|
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
|
|
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE
|
|
),
|
|
# 12
|
|
('Boilers', 'C', 'ElectricityNotCommunity', 'Top Spec'): (
|
|
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
|
|
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD
|
|
),
|
|
# 13
|
|
('Boilers', 'C', 'LPGNotCommunity', 'Optimal'): (
|
|
EpcHeatingSystems.boiler_and_radiators_lpg, EpcEfficiency.POOR, EpcFuel.lpg_not_community,
|
|
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD
|
|
),
|
|
# 14
|
|
('Boilers', 'C', 'LPGNotCommunity', 'Sub Optimal'): (
|
|
EpcHeatingSystems.boiler_and_radiators_lpg, EpcEfficiency.POOR, EpcFuel.lpg_not_community,
|
|
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE
|
|
),
|
|
# 15
|
|
('Boilers', 'C', 'MainsGasNotCommunity', 'Optimal'): (
|
|
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
|
|
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD
|
|
),
|
|
# 16
|
|
('Boilers', 'C', 'MainsGasNotCommunity', 'Sub Optimal'): (
|
|
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
|
|
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE
|
|
),
|
|
# 17
|
|
('Boilers', 'C', 'MainsGasNotCommunity', 'Top Spec'): (
|
|
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
|
|
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD
|
|
),
|
|
# 18 - oil boilers have an average efficiency rating
|
|
('Boilers', 'C', 'OilNotCommunity', 'Optimal'): (
|
|
EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community,
|
|
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD
|
|
),
|
|
# 18
|
|
('Boilers', 'C', 'OilNotCommunity', 'Sub Optimal'): (
|
|
EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community,
|
|
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE
|
|
),
|
|
# 19
|
|
('Boilers', 'C', 'OilNotCommunity', 'Top Spec'): (
|
|
EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community,
|
|
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD
|
|
),
|
|
# 20
|
|
('Boilers', 'D', 'MainsGasNotCommunity', 'Optimal'): (
|
|
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
|
|
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD
|
|
),
|
|
# 21
|
|
('Boilers', 'D', 'MainsGasNotCommunity', 'Sub Optimal'): (
|
|
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
|
|
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE
|
|
),
|
|
# 22
|
|
('Boilers', 'D', 'MainsGasNotCommunity', 'Top Spec'): (
|
|
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
|
|
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD
|
|
),
|
|
# 23
|
|
('Boilers', 'E', 'ElectricityNotCommunity', 'Optimal'): (
|
|
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
|
|
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD
|
|
),
|
|
# 24
|
|
('Boilers', 'E', 'MainsGasNotCommunity', 'Optimal'): (
|
|
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
|
|
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD
|
|
),
|
|
# 25
|
|
('Boilers', 'E', 'MainsGasNotCommunity', 'Sub Optimal'): (
|
|
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
|
|
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE
|
|
),
|
|
('Boilers', 'E', 'MainsGasNotCommunity', 'Top Spec'): (
|
|
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
|
|
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD
|
|
),
|
|
('Boilers', 'E', 'OilNotCommunity', 'Optimal'): (
|
|
EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community,
|
|
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD
|
|
),
|
|
('Boilers', 'E', 'OilNotCommunity', 'Sub Optimal'): (
|
|
EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community,
|
|
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE
|
|
),
|
|
('Boilers', 'F', 'MainsGasNotCommunity', 'Optimal'): (
|
|
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
|
|
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD
|
|
),
|
|
('Boilers', 'F', 'MainsGasNotCommunity', 'Sub Optimal'): (
|
|
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
|
|
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE
|
|
),
|
|
('Boilers', 'F', 'MainsGasNotCommunity', 'Top Spec'): (
|
|
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
|
|
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD
|
|
),
|
|
('Boilers', 'G', 'MainsGasNotCommunity', 'Optimal'): (
|
|
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
|
|
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD
|
|
),
|
|
('Boilers', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): (
|
|
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
|
|
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE
|
|
),
|
|
('Boilers', 'G', 'MainsGasNotCommunity', 'Top Spec'): None,
|
|
('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Optimal'): None,
|
|
('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): None,
|
|
('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Top Spec'): None,
|
|
('Heat pumps (warm air)', 'A', 'ElectricityNotCommunity', 'Optimal'): None,
|
|
('Heat pumps (warm air)', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): None,
|
|
('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Optimal'): None,
|
|
('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): None,
|
|
('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Top Spec'): None,
|
|
('Room heaters', 'A', 'ElectricityNotCommunity', 'Optimal'): None,
|
|
('Room heaters', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): None,
|
|
('Room heaters', 'C', 'MainsGasNotCommunity', 'Sub Optimal'): None,
|
|
('Room heaters', 'F', 'MainsGasNotCommunity', 'Sub Optimal'): None,
|
|
('Room heaters', 'G', 'MainsGasNotCommunity', 'Optimal'): None,
|
|
('Room heaters', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): None,
|
|
('Room heaters', 'G', 'SmokelessCoal', 'Sub Optimal'): None,
|
|
('Storage heaters', 'A', 'ElectricityNotCommunity', 'Optimal'): None,
|
|
('Storage heaters', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): None,
|
|
('Warm Air (not heat pump)', 'G', 'ElectricityNotCommunity', 'Sub Optimal'): None,
|
|
('Warm Air (not heat pump)', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): None
|
|
}
|
|
|
|
example = list(heating_map.keys())[25]
|
|
|
|
example_data = data[
|
|
(data["Heating"] == example[0]) & (data["Boiler Efficiency"] == example[1]) & (data["Main Fuel"] == example[2]) & (
|
|
data["Controls Adequacy"] == example[3])
|
|
]
|
|
|
|
print(example_data["UPRN"].values.tolist())
|
|
|
|
agg_tuples = {
|
|
(row['Heating'], row['Boiler Efficiency'], row['Main Fuel']): None for _, row in agg.iterrows()
|
|
}
|
|
epcs = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/domestic-E08000003-Manchester/certificates.csv")
|
|
|
|
epcs[epcs["LODGEMENT_DATE"] > "2025-07-01"]["WINDOWS_DESCRIPTION"].value_counts()
|
|
epcs[epcs["LODGEMENT_DATE"] > "2025-07-01"]["GLAZED_AREA"].value_counts()
|
|
|
|
epcs[
|
|
(epcs["MAINHEAT_DESCRIPTION"] == "Boiler and radiators, mains gas")
|
|
]["MAINHEAT_ENERGY_EFF"].value_counts()
|
|
|
|
z = data[data["Address 1"].str.lower().str.contains("133, mayo")]
|
|
|
|
# ------------ Fuel ------------
|
|
|
|
# ------------ Heating Controls ------------
|
|
|
|
# ------------ Floor Area ------------
|
|
# TODO: Convert everything to values
|
|
|
|
# Variables we want to map
|
|
# 'Org Ref', 'Address 1', 'Address 2', 'Address 3', 'Postcode',
|
|
# 'Glazing', 'Heating',
|
|
# 'Boiler Efficiency', 'Main Fuel', 'Controls Adequacy', 'UPRN',
|
|
# 'Total Floor Area (m2)'
|
|
|
|
|
|
data["Glazing"].value_counts()
|
|
|
|
data["Glazing"].value_counts()
|