import pandas as pd from etl.epc.DataProcessor import construction_age_bounds_map from backend.onboarders.mappings.property_type import parity_map as property_map from backend.onboarders.mappings.age_band import party_map as age_band_map from backend.onboarders.mappings.built_form import parity_map as built_form_map def check_nulls(data, original_column, mapped_column): # We only allow nulls if the oroginal value was null null_vals = data[pd.isnull(data[mapped_column])] if null_vals.empty: return True # We make sure all original values were null assert pd.isnull(null_vals[original_column]).all(), ( f"Some values in {mapped_column} were not mapped, but original values were not null" ) # Sample input data data = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " "- Data Extracts for Domna.xlsx", sheet_name="Sustainability" ) # We want to map the parity fields to standard EPC references. This will allow us to # 1) Estimate EPCs, more accurately # 2) Patch incorrect EPCs with ease # 3) Indicate already installed measures # ------------ construction_age_band ------------ # Map to EPC age bands # def construction_date_to_band(year): # if pd.isnull(year): # return None # # Get the year from the date which is numpy datetime format # for label, ranges in construction_age_bounds_map.items(): # if ranges["l"] <= year <= ranges["u"]: # return label # raise NotImplementedError("year out of bounds") # # # data["construction_age_band"] = pd.to_datetime(data["Construction Date"]).dt.year.apply(construction_date_to_band) data["construction_age_band"] = data["Construction Years"].map(age_band_map) check_nulls(data, "Construction Years", "construction_age_band") # ------------ property_type ------------ data["property_type"] = data["Type"].map(property_map) assert pd.isnull(data["property_type"]).sum() == 0, "Some property types were not mapped" # ------------ built_form ------------ data["built_form"] = data["Attachment"].map(built_form_map) assert pd.isnull(data["built_form"]).sum() == 0, "Some built forms were not mapped" # ------------ Wall Construction ------------ data["walls_combined"] = data["Wall Construction"] + "+" + data["Wall Insulation"].fillna("Unknown Insulation") data["Wall Insulation"].value_counts() data["Wall Construction"].value_counts() as_built_map = { "Cavity": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, "Solid Brick": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, "System": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, "Timber Frame": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, "Sandstone": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, "Granite": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, "Cob": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, } def map_wall_construction(wall_constuction, wall_insulation, construction_age_band): if wall_insulation == "AsBuilt": # Deduce based on wall construction and age band bands = as_built_map.get(wall_constuction, None) if bands is None: raise NotImplementedError(f"Wall construction {wall_constuction} not in as built map") # We check if the age band is in insulated or partial insulated, and if neither, we assume uninsulated # Variables we want to map # 'Org Ref', 'Address 1', 'Address 2', 'Address 3', 'Postcode', 'Type', # 'Attachment', 'Construction Years', 'Wall Construction', # 'Wall Insulation', 'Roof Construction', 'Roof Insulation', # 'Floor Construction', 'Floor Insulation', 'Glazing', 'Heating', # 'Boiler Efficiency', 'Main Fuel', 'Controls Adequacy', 'UPRN', # 'Total Floor Area (m2)'