From d3f941349aa08bbe46f1f28f7e2440dc3894fe24 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 22 Sep 2025 10:50:05 +0100 Subject: [PATCH] implementing decent homes wf --- backend/engine/engine.py | 2 +- etl/bill_savings/KwhData.py | 2 +- .../waltham_forest/decent_homes_pilot.py | 442 ++++++++++++++++++ 3 files changed, 444 insertions(+), 2 deletions(-) create mode 100644 etl/customers/waltham_forest/decent_homes_pilot.py diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 2e1ede79..cc17222f 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -900,7 +900,7 @@ async def model_engine(body: PlanTriggerRequest): r["uplift_project_score"] ) = funding.get_innovation_uplift( measure=r, - starting_sap=p.data["current-energy-efficiency"], + starting_sap=int(p.data["current-energy-efficiency"]), floor_area=p.floor_area, is_cavity=p.walls["is_cavity_wall"], current_wall_uvalue=current_wall_u_value, diff --git a/etl/bill_savings/KwhData.py b/etl/bill_savings/KwhData.py index 24ce9f2c..3291e909 100644 --- a/etl/bill_savings/KwhData.py +++ b/etl/bill_savings/KwhData.py @@ -310,7 +310,7 @@ class KwhData: False: "N", None: "N", "Y": "Y", - "N": "N" + "N": "N", } for v in bools_to_remap: epc[v] = bool_map[epc[v]] diff --git a/etl/customers/waltham_forest/decent_homes_pilot.py b/etl/customers/waltham_forest/decent_homes_pilot.py new file mode 100644 index 00000000..78460f5a --- /dev/null +++ b/etl/customers/waltham_forest/decent_homes_pilot.py @@ -0,0 +1,442 @@ +import json +import os + +import pandas as pd + +from datetime import datetime + + +def years_between(d1, d2): + # precise year difference (accounts for months/days) + return (d1.year - d2.year) - ((d1.month, d1.day) < (d2.month, d2.day)) + + +def get_element(elements, label): + """Safely get an element dict by display label (your JSON keys).""" + return elements.get(label) + + +def adequacy_result_by_text(attr_desc: str): + """ + Generic adequacy parser. + Pass if description clearly says 'Adequate' and not 'Inadequate'. + Fail if it says 'Inadequate' (or equivalent). + Unknown -> 'no_data' + """ + if not attr_desc or not isinstance(attr_desc, str): + return "no_data" + text = attr_desc.strip().lower() + # Common patterns + if "inadequate" in text or "unsatisfactory" in text or "problems" in text: + return "fail" + if "adequate" in text or "standard" in text or "appropriate" in text: + return "pass" + return "no_data" + + +def append_result(decent_homes, variable, result, install_date=None): + decent_homes.append({ + "variable": variable, + "result": result, + "hhsrs_rank": None, + "hhsrs_score": None, + "install_date": install_date + }) + + +# Read in static json, which is transformed by Jun-te's script +folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Waltham Forest/Decent Homes Pilot" +filenames = ["flat 1.json", "house 1.json"] + +houses_waltham_forest_data = pd.read_excel( + os.path.join(folder, "LBWF - Example Asset Data September 2025.xlsx"), + sheet_name="Houses Asset Data" +) +flats_waltham_forest_data = pd.read_excel( + os.path.join(folder, "LBWF - Example Asset Data September 2025.xlsx"), + sheet_name="CHINGFORD ROAD 236-254 Asset Bl" +) + +# Standardised variables which will form the enums in the db +HHSRS_VARIABLES = [ + "damp_and_mould_growth", + "excess_cold", + "excess_heat", + "asbestos_and_mm_fibres", + "biocides", + "carbon_monoxide_and_fuel_combustion_products", + "lead", + "radiation", + "uncombusted_fuel_gas", + "volatile_organic_compounds", + "crowding_and_space", + "entry_by_intruders", + "lighting", + "noise", + "domestic_hygiene_pests_and_refuse", + "food_safety", + "personal_hygiene_sanitation_and_drainage", + "water_supply", + "falls_associated_with_baths", + "falls_on_level_surfaces", + "falls_on_stairs_and_steps", + "falls_between_levels", + "electrical_hazards", + "fire", + "flames_hot_surfaces_and_materials", + "collision_and_entrapment", + "explosions", + "ergonomics", + "structural_collapse_and_falling_elements" +] + +CRITERION_B_VARIABLES = [ + "external_walls_structure", "lintels", "brickwork_spalling", "wall_finish", "roof_structure", "roof_finish", + "chimneys", "windows", "external_doors", "kitchens", "bathrooms", "central_heating_boiler", + "central_heating_distribution_system", "heating_other", "electrical_systems", +] + +CRITERION_C_VARIABLES = [ + "kitchen_facilities", +] + +# Criterion C explicit age limits (different from component lifespans used elsewhere) +CRITERION_C_AGE_LIMITS = { + "kitchen_years_max": 20, + "bathroom_years_max": 30, +} + +# Field labels as they appear in your JSON (based on your code) +LABEL_KITCHEN = "Adequacy of Kitchen and Type in Property" +LABEL_BATHROOM = "Adequacy of Bathroom Location in Property" +LABEL_NOISE = "Adequacy of Noise Insulation in Property" +LABEL_COMMON_CIRC = "Circulation Space in Common Area" # flats only + +STANDARD_HHSRS_MAPPING = {"pass": "TYPRISK", "fail": "MODRISK", "no_data": "TOBEASSESS"} + +# Criterion A - mapping of HHSRS variables to Waltham forest element codes +HHSRS_MAPPING = { + "damp_and_mould_growth": {"HHSRSDAMP": STANDARD_HHSRS_MAPPING}, + "excess_cold": {"HHSRSCOLD": STANDARD_HHSRS_MAPPING}, + "excess_heat": {"HHSRSHEAT": STANDARD_HHSRS_MAPPING}, + "asbestos_and_mm_fibres": {"HHSRSASB": STANDARD_HHSRS_MAPPING}, + "biocides": {"HHSRSBIOC": STANDARD_HHSRS_MAPPING}, + "carbon_monoxide_and_fuel_combustion_products": { + "HHSRSCO": STANDARD_HHSRS_MAPPING, + "HHSRSSO2": STANDARD_HHSRS_MAPPING, + "HHSRSNO2": STANDARD_HHSRS_MAPPING + }, + "lead": {"HHSRSLEAD": STANDARD_HHSRS_MAPPING}, + "radiation": {"HHSRSRADIA": STANDARD_HHSRS_MAPPING}, + "uncombusted_fuel_gas": {"HHSRSFUEL": STANDARD_HHSRS_MAPPING}, + "volatile_organic_compounds": {"HHSRSORGAN": STANDARD_HHSRS_MAPPING}, + "crowding_and_space": {"HHSRSCROWD": STANDARD_HHSRS_MAPPING}, + "entry_by_intruders": {"HHSRSENTRY": STANDARD_HHSRS_MAPPING}, + "lighting": {"HHSRSLIGHT": STANDARD_HHSRS_MAPPING}, + "noise": {"HHSRSNOISE": STANDARD_HHSRS_MAPPING}, + "domestic_hygiene_pests_and_refuse": {"HHSRSDOMES": STANDARD_HHSRS_MAPPING}, + "food_safety": {"HHSRSFOOD": STANDARD_HHSRS_MAPPING}, + "personal_hygiene_sanitation_and_drainage": {"HHSRSPERS": STANDARD_HHSRS_MAPPING}, + "water_supply": {"HHSRSWATER": STANDARD_HHSRS_MAPPING}, + "falls_associated_with_baths": {"HHSRSFBATH": STANDARD_HHSRS_MAPPING}, + "falls_on_level_surfaces": {"HHSRSFLEVE": STANDARD_HHSRS_MAPPING}, + "falls_on_stairs_and_steps": {"HHSRSFSTAI": STANDARD_HHSRS_MAPPING}, + "falls_between_levels": {"HHSRSFBETW": STANDARD_HHSRS_MAPPING}, + "electrical_hazards": {"HHSRSELEC": STANDARD_HHSRS_MAPPING}, + "fire": {"HHSRSFIRE": STANDARD_HHSRS_MAPPING}, + "flames_hot_surfaces_and_materials": {"HHSRSFLAME": STANDARD_HHSRS_MAPPING}, + "collision_and_entrapment": {"HHSRSENTRP": STANDARD_HHSRS_MAPPING, "HHSRSCLOW": STANDARD_HHSRS_MAPPING}, + "explosions": {"HHSRSEXPLO": STANDARD_HHSRS_MAPPING}, + "ergonomics": {"HHSRSPOSI": STANDARD_HHSRS_MAPPING}, + "structural_collapse_and_falling_elements": {"HHSRSSTRUC": STANDARD_HHSRS_MAPPING} +} + +print(houses_waltham_forest_data[ + houses_waltham_forest_data["ELEMENT CODE"] == "INTHTIMP" + ][["ATTRIBUTE CODE", "ATTRIBUTE CODE DESCRIPTION"]].drop_duplicates()) + +print(flats_waltham_forest_data[ + flats_waltham_forest_data["ELEMENT CODE"] == "INTBTHADEQ" + ][["ATTRIBUTE CODE", "ATTRIBUTE CODE DESCRIPTION"]].drop_duplicates()) + +# Criterion B +CRITERION_B_MAPPING = { + # TODO: Needs to be sorted!!! + # "external_walls_structure": { + # "EXTWALLSTR": {"pass": "GOOD", "fail": "POOR", "no_data": "Unknown if Structural Defects in External Area"} + # } + "lintels": { + "EXTLINTELS": {"pass": "GOOD", "fail": "POOR", "no_data": "Unknown Condition of Lintels"} + } +} + +# Criterion C +CRITERION_C_MAPPING = { + # "kitchen_less_than_20_years_old": +} + +COMPONENT_LIFESPANS = { + "kitchen": {"house": 30, "flat_below_6_storeys": 30, "flat_above_6_storeys": 30}, + "bathroom": {"house": 50, "flat_below_6_storeys": 50, "flat_above_6_storeys": 50} +} + +# Database design +# creation_date, uprn, variable, result, hhsrs_score (optional, numeric), hhsrs_rank (A-J), install_date (for +# components which expire, e.g. kitchen) + +decent_homes = [] +# Use to capture criterion A, B, C and D. Should be: +# {"uprn": int, "creation_date": datetime, "criterion_a": bool, "criterion_b": bool, "criterion_c": bool, +# "criterion_d": bool, "decent_homes": bool"} +property_decent_homes = [] +for fn in filenames: + with open(os.path.join(folder, fn), "rb") as f: + data = json.load(f) + + from pprint import pprint + + pprint(data["elements"]) + + property_info = data["property_info"] + if property_info["PROP TYPE"] in ["HOU"]: + property_type = "house" + elif property_info["PROP TYPE"] == "FLA": + raise Exception("Implement distrinction between below and above 6 storeys") + property_type = "flat" + else: + raise NotImplementedError("Unknown property type") + + # Criterion A + for hhsrs_variable, mapping in HHSRS_MAPPING.items(): + element_code = list(mapping.keys())[0] + + # Find the data in the JSON within data["elements"] + check_pass = [] + for k, v in data["elements"].items(): + if v["ELEMENT CODE"] == element_code: + # We check the attribute code + # Check if pass + if v["ATTRIBUTE CODE"] == mapping[element_code]["pass"]: + result = "pass" + elif v["ATTRIBUTE CODE"] == mapping[element_code]["fail"]: + result = "fail" + elif v["ATTRIBUTE CODE"] == mapping[element_code]["no_data"]: + result = "no_data" + else: + raise ValueError("Unknown attribute code") + check_pass.append(result) + + # We check if we have a pass, fail or no_data + if all([x == "pass" for x in check_pass]): + hhsrs_result = "pass" + elif any([x == "fail" for x in check_pass]): + hhsrs_result = "fail" + elif any([x == "no_data" for x in check_pass]): + hhsrs_result = "no_data" + else: + raise NotImplementedError("Mixed results not implemented") + decent_homes.append( + {"variable": hhsrs_variable, 'result': hhsrs_result, "hhsrs_rank": None, "hhsrs_score": None, + "install_date": None} + ) + + # Criterion B + + # --- Criterion C --- + today = pd.Timestamp.today().normalize() + + # Guard: property type string already set earlier + is_flat = (property_info["PROP TYPE"] == "FLA") + + # 1) Kitchen age ≤ 20 years + kitchen = get_element(data["elements"], LABEL_KITCHEN) + if kitchen: + kit_install_raw = kitchen.get("INSTALL DATE") + try: + kit_install = pd.to_datetime(kit_install_raw) + kit_age_years = years_between(today.to_pydatetime(), kit_install.to_pydatetime()) + kitchen_age_result = "pass" if kit_age_years <= CRITERION_C_AGE_LIMITS["kitchen_years_max"] else "fail" + # For transparency, store next renewal as install + 20 years (criterion C perspective) + kit_next_due = kit_install + pd.DateOffset(years=CRITERION_C_AGE_LIMITS["kitchen_years_max"]) + except Exception: + kitchen_age_result = "no_data" + kit_next_due = None + else: + kitchen_age_result = "no_data" + kit_next_due = None + append_result(decent_homes, "kitchen_less_than_20_years_old", kitchen_age_result, kit_next_due) + + # 2) Kitchen adequate space/layout + # Prefer explicit codes if you have them, fall back to text in ATTRIBUTE CODE DESCRIPTION + if kitchen: + kit_attr_desc = kitchen.get("ATTRIBUTE CODE DESCRIPTION", "") + # If you prefer codes, you can also branch here on kitchen.get("ATTRIBUTE CODE") == "STDKITADQ" + kitchen_adequacy_result = adequacy_result_by_text(kit_attr_desc) + else: + kitchen_adequacy_result = "no_data" + append_result(decent_homes, "kitchen_adequate_space_and_layout", kitchen_adequacy_result) + + # 3) Bathroom age ≤ 30 years + bath = get_element(data["elements"], LABEL_BATHROOM) + if bath: + bth_install_raw = bath.get("INSTALL DATE") + try: + bth_install = pd.to_datetime(bth_install_raw) + bth_age_years = years_between(today.to_pydatetime(), bth_install.to_pydatetime()) + bathroom_age_result = "pass" if bth_age_years <= CRITERION_C_AGE_LIMITS["bathroom_years_max"] else "fail" + bth_next_due = bth_install + pd.DateOffset(years=CRITERION_C_AGE_LIMITS["bathroom_years_max"]) + except Exception: + bathroom_age_result = "no_data" + bth_next_due = None + else: + bathroom_age_result = "no_data" + bth_next_due = None + append_result(decent_homes, "bathroom_less_than_30_years_old", bathroom_age_result, bth_next_due) + + # 4) Bathroom/WC appropriately located + if bath: + # You already observed codes like STDBTHADQ / ADPBTHADQ as 'pass' + bth_attr_code = bath.get("ATTRIBUTE CODE", "") + bth_attr_desc = bath.get("ATTRIBUTE CODE DESCRIPTION", "") + known_pass_codes = {"STDBTHADQ", "ADPBTHADQ"} + if bth_attr_code in known_pass_codes: + bathroom_location_result = "pass" + else: + # Fallback to text adequacy check + bathroom_location_result = adequacy_result_by_text(bth_attr_desc) + else: + bathroom_location_result = "no_data" + append_result(decent_homes, "bathroom_wc_appropriately_located", bathroom_location_result) + + # 5) Adequate external noise insulation + noise = get_element(data["elements"], LABEL_NOISE) + if noise: + noise_desc = noise.get("ATTRIBUTE CODE DESCRIPTION", "") + noise_result = adequacy_result_by_text(noise_desc) + else: + noise_result = "no_data" + append_result(decent_homes, "adequate_external_noise_insulation", noise_result) + + # 6) Adequate common entrance areas (flats only) + if is_flat: + raise Exception("Pls check this") + common = get_element(data["elements"], LABEL_COMMON_CIRC) + if common: + circ_desc = common.get("ATTRIBUTE CODE DESCRIPTION", "") + common_areas_result = adequacy_result_by_text(circ_desc) + else: + common_areas_result = "no_data" + append_result(decent_homes, "adequate_common_entrance_areas", common_areas_result) + + # ---------------- Criterion D ---------------- + # heating system type + heating = get_element(data["elements"], "Heating Improvement Required in Property") + if heating: + # Example: ATTRIBUTE CODE == "GOOD" means pass, "POOR" means fail + heat_type_code = heating.get("ATTRIBUTE CODE", "") + if heat_type_code in {"NOTAPPLIC"}: + heating_type_result = "pass" + elif heat_type_code in {"WETINSFULL"}: + heating_type_result = "fail" + else: + raise NotImplementedError("No other observed codes yet") + else: + raise NotImplementedError("Heating element missing in dataset") + + append_result(decent_homes, "efficient_heating_system_type", heating_type_result) + + # heating distribution + heating_dist = get_element(data["elements"], "Heating Distribution System in Property") + if heating_dist: + dist_desc = heating_dist.get("ATTRIBUTE CODE DESCRIPTION", "") + heating_dist_result = adequacy_result_by_text(dist_desc) + else: + raise NotImplementedError("Heating distribution element missing in dataset") + + append_result(decent_homes, "efficient_heating_distribution", heating_dist_result) + + # insulation + loft = get_element(data["elements"], "Size in mm of Loft Insulation Thickness in Property") + wall = get_element(data["elements"], "Wall Insulation Improvement in External Area") + heating = get_element(data["elements"], "Heating Improvement Required in Property") + # To determine how much loft insulation is required + + # Loft insulation check (example threshold: ≥ 270mm = pass) + if loft: + # We have a specific code, where further loft insulation is needed + loft_code = loft.get("ATTRIBUTE CODE", "") + if loft_code == "LOFTINSRQD": + loft_result = "fail" + elif loft_code.isnumeric(): + loft_result = "pass" + else: + raise NotImplementedError("Unknown loft insulation code - pls check") + else: + raise NotImplementedError("Loft insulation data missing - pls check") + append_result(decent_homes, "loft_insulation_sufficient", loft_result) + + # Wall insulation check (simple adequacy parser) + if wall: + wall_desc = wall.get("ATTRIBUTE CODE DESCRIPTION", "") + wall_result = adequacy_result_by_text(wall_desc) + else: + raise NotImplementedError("Wall insulation data missing - pls check") + append_result(decent_homes, "wall_insulation_sufficient", wall_result) + + # ---------------- Criterion A overall ---------------- + a_vars = set(HHSRS_MAPPING.keys()) + latest_a_results = {r["variable"]: r["result"] for r in decent_homes if r["variable"] in a_vars} + + if any(v == "fail" for v in latest_a_results.values()): + criterion_a_result = "fail" + elif all(v == "pass" for v in latest_a_results.values()): + criterion_a_result = "pass" + else: + criterion_a_result = "no_data" + + # ---------------- Criterion C overall ---------------- + criterion_c_vars = [ + "kitchen_less_than_20_years_old", + "kitchen_adequate_space_and_layout", + "bathroom_less_than_30_years_old", + "bathroom_wc_appropriately_located", + "adequate_external_noise_insulation", + ] + if is_flat: + criterion_c_vars.append("adequate_common_entrance_areas") + + latest_c_results = {r["variable"]: r["result"] for r in decent_homes if r["variable"] in criterion_c_vars} + + count_fails = sum(1 for v in latest_c_results.values() if v == "fail") + # optionally count no_data too if you want strict interpretation + criterion_c_result = "fail" if count_fails >= 3 else "pass" + + # ---------------- Criterion D overall ---------------- + criterion_d_vars = [ + "efficient_heating_system_type", + "efficient_heating_distribution", + "loft_insulation_sufficient", + "wall_insulation_sufficient", + ] + latest_d_results = {r["variable"]: r["result"] for r in decent_homes if r["variable"] in criterion_d_vars} + + if any(v == "fail" for v in latest_d_results.values()): + criterion_d_result = "fail" + elif all(v == "pass" for v in latest_d_results.values()): + criterion_d_result = "pass" + else: + criterion_d_result = "no_data" + + # ---------------- Append to property_decent_homes ---------------- + property_decent_homes.append({ + "uprn": property_info.get("UPRN"), # update field name if needed + "creation_date": datetime.now().date().isoformat(), + "criterion_a": criterion_a_result, + "criterion_b": None, # not yet implemented + "criterion_c": criterion_c_result, + "criterion_d": criterion_d_result, + "decent_homes": ( + criterion_a_result == "pass" + and criterion_c_result == "pass" + ) + })