mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
95 lines
No EOL
3.8 KiB
Python
95 lines
No EOL
3.8 KiB
Python
import pandas as pd
|
|
from etl.epc.DataProcessor import construction_age_bounds_map
|
|
from backend.onboarders.mappings.property_type import parity_map as property_map
|
|
from backend.onboarders.mappings.age_band import party_map as age_band_map
|
|
from backend.onboarders.mappings.built_form import parity_map as built_form_map
|
|
|
|
|
|
def check_nulls(data, original_column, mapped_column):
|
|
# We only allow nulls if the oroginal value was null
|
|
null_vals = data[pd.isnull(data[mapped_column])]
|
|
if null_vals.empty:
|
|
return True
|
|
# We make sure all original values were null
|
|
assert pd.isnull(null_vals[original_column]).all(), (
|
|
f"Some values in {mapped_column} were not mapped, but original values were not null"
|
|
)
|
|
|
|
|
|
# Sample input data
|
|
|
|
data = pd.read_excel(
|
|
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
|
|
"- Data Extracts for Domna.xlsx",
|
|
sheet_name="Sustainability"
|
|
)
|
|
|
|
# We want to map the parity fields to standard EPC references. This will allow us to
|
|
# 1) Estimate EPCs, more accurately
|
|
# 2) Patch incorrect EPCs with ease
|
|
# 3) Indicate already installed measures
|
|
|
|
# ------------ construction_age_band ------------
|
|
# Map to EPC age bands
|
|
# def construction_date_to_band(year):
|
|
# if pd.isnull(year):
|
|
# return None
|
|
# # Get the year from the date which is numpy datetime format
|
|
# for label, ranges in construction_age_bounds_map.items():
|
|
# if ranges["l"] <= year <= ranges["u"]:
|
|
# return label
|
|
# raise NotImplementedError("year out of bounds")
|
|
#
|
|
#
|
|
# data["construction_age_band"] = pd.to_datetime(data["Construction Date"]).dt.year.apply(construction_date_to_band)
|
|
|
|
data["construction_age_band"] = data["Construction Years"].map(age_band_map)
|
|
|
|
check_nulls(data, "Construction Years", "construction_age_band")
|
|
|
|
# ------------ property_type ------------
|
|
data["property_type"] = data["Type"].map(property_map)
|
|
|
|
assert pd.isnull(data["property_type"]).sum() == 0, "Some property types were not mapped"
|
|
|
|
# ------------ built_form ------------
|
|
data["built_form"] = data["Attachment"].map(built_form_map)
|
|
|
|
assert pd.isnull(data["built_form"]).sum() == 0, "Some built forms were not mapped"
|
|
|
|
# ------------ Wall Construction ------------
|
|
|
|
data["walls_combined"] = data["Wall Construction"] + "+" + data["Wall Insulation"].fillna("Unknown Insulation")
|
|
|
|
data["Wall Insulation"].value_counts()
|
|
data["Wall Construction"].value_counts()
|
|
|
|
as_built_map = {
|
|
"Cavity": {"insulated_age_bands":[], "partial_insulated_age_bands": []},
|
|
"Solid Brick": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
|
|
"System": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
|
|
"Timber Frame": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
|
|
"Sandstone": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
|
|
"Granite": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
|
|
"Cob": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
|
|
}
|
|
|
|
def map_wall_construction(wall_constuction, wall_insulation, construction_age_band):
|
|
if wall_insulation == "AsBuilt":
|
|
# Deduce based on wall construction and age band
|
|
bands = as_built_map.get(wall_constuction, None)
|
|
if bands is None:
|
|
raise NotImplementedError(f"Wall construction {wall_constuction} not in as built map")
|
|
|
|
# We check if the age band is in insulated or partial insulated, and if neither, we assume uninsulated
|
|
|
|
|
|
|
|
|
|
# Variables we want to map
|
|
'Org Ref', 'Address 1', 'Address 2', 'Address 3', 'Postcode', 'Type',
|
|
'Attachment', 'Construction Years', 'Wall Construction',
|
|
'Wall Insulation', 'Roof Construction', 'Roof Insulation',
|
|
'Floor Construction', 'Floor Insulation', 'Glazing', 'Heating',
|
|
'Boiler Efficiency', 'Main Fuel', 'Controls Adequacy', 'UPRN',
|
|
'Total Floor Area (m2)' |