import pandas as pd from etl.epc.DataProcessor import construction_age_bounds_map from backend.onboarders.mappings.property_type import parity_map as property_map from backend.onboarders.mappings.age_band import party_map as age_band_map from backend.onboarders.mappings.built_form import parity_map as built_form_map def check_nulls(data, original_column, mapped_column): # We only allow nulls if the oroginal value was null null_vals = data[pd.isnull(data[mapped_column])] if null_vals.empty: return True # We make sure all original values were null assert pd.isnull(null_vals[original_column]).all(), ( f"Some values in {mapped_column} were not mapped, but original values were not null" ) # Sample input data data = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " "- Data Extracts for Domna.xlsx", sheet_name="Sustainability" ) # We want to map the parity fields to standard EPC references. This will allow us to # 1) Estimate EPCs, more accurately # 2) Patch incorrect EPCs with ease # 3) Indicate already installed measures # ------------ construction_age_band ------------ # Map to EPC age bands # def construction_date_to_band(year): # if pd.isnull(year): # return None # # Get the year from the date which is numpy datetime format # for label, ranges in construction_age_bounds_map.items(): # if ranges["l"] <= year <= ranges["u"]: # return label # raise NotImplementedError("year out of bounds") # # # data["construction_age_band"] = pd.to_datetime(data["Construction Date"]).dt.year.apply(construction_date_to_band) data["construction_age_band"] = data["Construction Years"].map(age_band_map) check_nulls(data, "Construction Years", "construction_age_band") # ------------ property_type ------------ data["property_type"] = data["Type"].map(property_map) assert pd.isnull(data["property_type"]).sum() == 0, "Some property types were not mapped" # ------------ built_form ------------ data["built_form"] = data["Attachment"].map(built_form_map) assert pd.isnull(data["built_form"]).sum() == 0, "Some built forms were not mapped" # ------------ Wall Construction ------------ data["walls_combined"] = data["Wall Construction"] + "+" + data["Wall Insulation"].fillna("Unknown Insulation") data["Wall Insulation"].value_counts() data["Wall Construction"].value_counts() as_built_map = { "Cavity": {"insulated_age_bands":[], "partial_insulated_age_bands": []}, "Solid Brick": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, "System": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, "Timber Frame": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, "Sandstone": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, "Granite": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, "Cob": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, } def map_wall_construction(wall_constuction, wall_insulation, construction_age_band): if wall_insulation == "AsBuilt": # Deduce based on wall construction and age band bands = as_built_map.get(wall_constuction, None) if bands is None: raise NotImplementedError(f"Wall construction {wall_constuction} not in as built map") # We check if the age band is in insulated or partial insulated, and if neither, we assume uninsulated # Variables we want to map 'Org Ref', 'Address 1', 'Address 2', 'Address 3', 'Postcode', 'Type', 'Attachment', 'Construction Years', 'Wall Construction', 'Wall Insulation', 'Roof Construction', 'Roof Insulation', 'Floor Construction', 'Floor Insulation', 'Glazing', 'Heating', 'Boiler Efficiency', 'Main Fuel', 'Controls Adequacy', 'UPRN', 'Total Floor Area (m2)'