From 335164eaf1b558e45a3cf377ed73f756716e05e6 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 18 Sep 2025 16:23:18 +0100 Subject: [PATCH 01/23] multiple remote assessments --- asset_list/AssetList.py | 21 ++++-- asset_list/app.py | 88 ++++++++++++++++++++++++++ asset_list/mappings/built_form.py | 4 +- asset_list/mappings/heating_systems.py | 7 +- asset_list/mappings/roof.py | 30 +++++++++ asset_list/mappings/walls.py | 12 +++- backend/apis/GoogleSolarApi.py | 1 - 7 files changed, 154 insertions(+), 9 deletions(-) diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index 9569afe8..dce929ae 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -1783,9 +1783,16 @@ class AssetList: ) ) - not_a_flat = ( - self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] != "flat" - ) + # Determine if the client gave us property type in the first place + if all(self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "unknown"): + # Use EPC + not_a_flat = ( + self.standardised_asset_list[self.EPC_API_DATA_NAMES["property-type"]] != "Flat" + ) + else: + not_a_flat = ( + self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] != "flat" + ) solar_roof_meets_criteria = ( self.standardised_asset_list["solar_epc_roof_insulated"] | @@ -3452,7 +3459,13 @@ class AssetList: raise ValueError("No installer column found in master data") measure_mix_col = "MEASURE COMBO" - town_colname = "TOWN" if "TOWN" in master_data.columns else 'Town/Area' + + if "TOWN" in master_data.columns: + town_colname = "TOWN" + elif 'Town/Area' in master_data.columns: + town_colname = 'Town/Area' + else: + town_colname = "Town/City" logger.info("Matching master data to asset list") matched = [] diff --git a/asset_list/app.py b/asset_list/app.py index 01c31f0f..833050fb 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -59,6 +59,74 @@ def app(): Property UPRN """ + # CDS - Sept 2025 + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/CDS/September 2025 Programme" + data_filename = "Founder Estates CDS.xlsx" + sheet_name = "Combined List" + postcode_column = 'Postcode' + address1_column = None # Is only patchily populated so we create it + address1_method = 'house_number_extraction' + fulladdress_column = "Address" + address_cols_to_concat = [] + missing_postcodes_method = None + landlord_year_built = None + landlord_os_uprn = None + landlord_property_type = "Property Type" + landlord_built_form = None + landlord_wall_construction = None + landlord_roof_construction = None + landlord_heating_system = "Heating Type" + landlord_existing_pv = None + landlord_property_id = "(Do Not Modify) Property" + landlord_sap = None + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + outcomes_id = None + outcomes_address = None + master_filepaths = [] + master_id_colnames = [] + master_to_asset_list_filepath = None + phase = False + ecosurv_landlords = None + asset_list_header = 0 + landlord_block_reference = None + + # Project from Nick + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/sfr/Sep2025 Project" + data_filename = "AL Test.xlsx" + sheet_name = "Sheet1" + postcode_column = 'postcode' + address1_column = None + address1_method = 'house_number_extraction' + fulladdress_column = "address" + address_cols_to_concat = [] + missing_postcodes_method = None + landlord_year_built = None + landlord_os_uprn = None + landlord_property_type = None + landlord_built_form = None + landlord_wall_construction = None + landlord_roof_construction = None + landlord_heating_system = None + landlord_existing_pv = None + landlord_property_id = "row_id" + landlord_sap = None + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + outcomes_id = None + outcomes_address = None + master_filepaths = [] + master_id_colnames = [] + master_to_asset_list_filepath = None + phase = False + ecosurv_landlords = None + asset_list_header = 0 + landlord_block_reference = None + # Lambeth data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lambeth" data_filename = "LAMBETH Asset List ( Incomplete).xlsx" @@ -1307,6 +1375,26 @@ def app(): filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx" # Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data + # Determine inspections priority + # solar_jobs = asset_list.standardised_asset_list[~pd.isnull(asset_list.standardised_asset_list["solar_reason"])][ + # "domna_postcode"].unique() + # asset_list.standardised_asset_list["in_solar_area"] = asset_list.standardised_asset_list["domna_postcode"].isin( + # solar_jobs + # ) + # # Same for cav + # cavity_jobs = asset_list.standardised_asset_list[ + # ~pd.isnull(asset_list.standardised_asset_list["cavity_reason"]) + # ]["domna_postcode"].unique() + # asset_list.standardised_asset_list["in_cavity_area"] = asset_list.standardised_asset_list["domna_postcode"].isin( + # cavity_jobs + # ) + # # We prioritise properties that are in solar areas and cavity areas + # import numpy as np + # asset_list.standardised_asset_list["inspection_priority"] = np.where( + # asset_list.standardised_asset_list["in_solar_area"] | asset_list.standardised_asset_list["in_cavity_area"], + # 1, 2 + # ) + with pd.ExcelWriter(filename) as writer: asset_list.standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False) if asset_list.block_analysis_df is not None: diff --git a/asset_list/mappings/built_form.py b/asset_list/mappings/built_form.py index 0dc51129..bdd82883 100644 --- a/asset_list/mappings/built_form.py +++ b/asset_list/mappings/built_form.py @@ -438,6 +438,6 @@ BUILT_FORM_MAPPINGS = { 'Maisonette - Mid Terrace': 'mid-terrace', 'Chalet - Wheelchair': 'unknown', 'Studio Flat': 'unknown', - 'Bungalow - Attached': 'semi-detached' - + 'Bungalow - Attached': 'semi-detached', + 'ND': 'unknown' } diff --git a/asset_list/mappings/heating_systems.py b/asset_list/mappings/heating_systems.py index 424b9b46..4ab8ca72 100644 --- a/asset_list/mappings/heating_systems.py +++ b/asset_list/mappings/heating_systems.py @@ -473,5 +473,10 @@ HEATING_MAPPINGS = { 'Boiler and radiators, oil': 'oil boiler', 'Boiler and radiators, electric': 'electric boiler', 'No system present: electric heaters assumed': 'electric radiators', - 'Boiler and radiators, anthracite': 'solid fuel' + 'Boiler and radiators, anthracite': 'solid fuel', + + 'Heat networks Heat networks (mains gas)': 'communal heating', + 'ND Oil': 'oil fuel', + 'Boiler Biofuel': 'boiler - other fuel' + } diff --git a/asset_list/mappings/roof.py b/asset_list/mappings/roof.py index 60f0473c..8ac926c0 100644 --- a/asset_list/mappings/roof.py +++ b/asset_list/mappings/roof.py @@ -246,4 +246,34 @@ ROOF_CONSTRUCTION_MAPPINGS = { 'Pitched, 150 mm loft insulation': 'pitched insulated', 'Flat, limited insulation (assumed)': 'flat uninsulated', + 'Pitched (no access to loft) 350mm': 'pitched insulated', + 'Pitched (no access to loft) 200mm': 'pitched insulated', + 'Pitched (access to loft) 200mm': 'pitched insulated', + 'Pitched (no access to loft) 250mm': 'pitched insulated', + 'Pitched (access to loft) 100mm': 'pitched insulated', + 'Another dwelling above ND (inferred)': 'another dwelling above', + 'Pitched (no access to loft) N/A': 'pitched no access to loft', + 'Pitched (no access to loft) ND (inferred)': 'pitched no access to loft', + 'Pitched (no access to loft) 150mm': 'pitched insulated', + 'Pitched (access to loft) 400mm+': 'pitched insulated', + 'Pitched (no access to loft) 300mm': 'pitched insulated', + 'Pitched (access to loft) <25mm': 'pitched less than 100mm insulation', + 'Pitched (access to loft) None': 'pitched less than 100mm insulation', + 'Pitched (access to loft) 300mm': 'pitched insulated', + 'Pitched (access to loft) 50mm': 'pitched less than 100mm insulation', + 'Pitched (access to loft) 270mm': 'pitched insulated', + 'Pitched (access to loft) Non-joist': 'pitched access to loft', + 'Pitched (access to loft) 250mm': 'pitched insulated', + 'Another dwelling above N/A': 'another dwelling above', + 'Pitched (access to loft) 150mm': 'pitched insulated', + 'Pitched (access to loft) ND (inferred)': 'pitched access to loft', + 'Pitched (access to loft) 350mm': 'pitched insulated', + 'Pitched (access to loft) NR': 'pitched unknown insulation', + 'Pitched (access to loft) 75mm': 'pitched less than 100mm insulation', + 'Pitched (access to loft) N/A': 'pitched access to loft', + 'ND (inferred) 250mm': 'unknown insulated', + 'Pitched (vaulted ceiling) Non-joist': 'pitched unknown insulation', + 'ND (inferred) ND (inferred)': 'unknown', + 'Flat Non-joist': 'flat insulated', + 'Same dwelling above N/A': 'another dwelling above' } diff --git a/asset_list/mappings/walls.py b/asset_list/mappings/walls.py index 14e4565c..73db586e 100644 --- a/asset_list/mappings/walls.py +++ b/asset_list/mappings/walls.py @@ -342,5 +342,15 @@ WALL_CONSTRUCTION_MAPPINGS = { 'Solid brick, as built, partial insulation (assumed)': 'insulated solid brick', 'Sandstone, as built, no insulation (assumed)': 'uninsulated sandstone or limestone', 'System built, as built, partial insulation (assumed)': 'system built unknown insulation', - 'Timber frame, with external insulation': 'insulated timber frame' + 'Timber frame, with external insulation': 'insulated timber frame', + + 'Cob As-built': 'cob', + 'System built Unknown insulation': 'system built unknown insulation', + 'Solid brick Unknown insulation': 'solid brick unknown insulation', + 'Timber frame Internal': 'insulated timber frame', + 'System built External': 'insulated system built', + 'Stone As-built': 'uninsulated sandstone or limestone', + 'System built As-built': "uninsulated system built", + 'System built Internal': 'insulated system built', + } diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py index 043f41a9..532afec0 100644 --- a/backend/apis/GoogleSolarApi.py +++ b/backend/apis/GoogleSolarApi.py @@ -332,7 +332,6 @@ class GoogleSolarApi: ) if solar_product is None: - logger.info("No suitable solar product found for the configuration with %d panels.", total_panels) continue total_cost = Costs.solar_pv( From d3f941349aa08bbe46f1f28f7e2440dc3894fe24 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 22 Sep 2025 10:50:05 +0100 Subject: [PATCH 02/23] implementing decent homes wf --- backend/engine/engine.py | 2 +- etl/bill_savings/KwhData.py | 2 +- .../waltham_forest/decent_homes_pilot.py | 442 ++++++++++++++++++ 3 files changed, 444 insertions(+), 2 deletions(-) create mode 100644 etl/customers/waltham_forest/decent_homes_pilot.py diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 2e1ede79..cc17222f 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -900,7 +900,7 @@ async def model_engine(body: PlanTriggerRequest): r["uplift_project_score"] ) = funding.get_innovation_uplift( measure=r, - starting_sap=p.data["current-energy-efficiency"], + starting_sap=int(p.data["current-energy-efficiency"]), floor_area=p.floor_area, is_cavity=p.walls["is_cavity_wall"], current_wall_uvalue=current_wall_u_value, diff --git a/etl/bill_savings/KwhData.py b/etl/bill_savings/KwhData.py index 24ce9f2c..3291e909 100644 --- a/etl/bill_savings/KwhData.py +++ b/etl/bill_savings/KwhData.py @@ -310,7 +310,7 @@ class KwhData: False: "N", None: "N", "Y": "Y", - "N": "N" + "N": "N", } for v in bools_to_remap: epc[v] = bool_map[epc[v]] diff --git a/etl/customers/waltham_forest/decent_homes_pilot.py b/etl/customers/waltham_forest/decent_homes_pilot.py new file mode 100644 index 00000000..78460f5a --- /dev/null +++ b/etl/customers/waltham_forest/decent_homes_pilot.py @@ -0,0 +1,442 @@ +import json +import os + +import pandas as pd + +from datetime import datetime + + +def years_between(d1, d2): + # precise year difference (accounts for months/days) + return (d1.year - d2.year) - ((d1.month, d1.day) < (d2.month, d2.day)) + + +def get_element(elements, label): + """Safely get an element dict by display label (your JSON keys).""" + return elements.get(label) + + +def adequacy_result_by_text(attr_desc: str): + """ + Generic adequacy parser. + Pass if description clearly says 'Adequate' and not 'Inadequate'. + Fail if it says 'Inadequate' (or equivalent). + Unknown -> 'no_data' + """ + if not attr_desc or not isinstance(attr_desc, str): + return "no_data" + text = attr_desc.strip().lower() + # Common patterns + if "inadequate" in text or "unsatisfactory" in text or "problems" in text: + return "fail" + if "adequate" in text or "standard" in text or "appropriate" in text: + return "pass" + return "no_data" + + +def append_result(decent_homes, variable, result, install_date=None): + decent_homes.append({ + "variable": variable, + "result": result, + "hhsrs_rank": None, + "hhsrs_score": None, + "install_date": install_date + }) + + +# Read in static json, which is transformed by Jun-te's script +folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Waltham Forest/Decent Homes Pilot" +filenames = ["flat 1.json", "house 1.json"] + +houses_waltham_forest_data = pd.read_excel( + os.path.join(folder, "LBWF - Example Asset Data September 2025.xlsx"), + sheet_name="Houses Asset Data" +) +flats_waltham_forest_data = pd.read_excel( + os.path.join(folder, "LBWF - Example Asset Data September 2025.xlsx"), + sheet_name="CHINGFORD ROAD 236-254 Asset Bl" +) + +# Standardised variables which will form the enums in the db +HHSRS_VARIABLES = [ + "damp_and_mould_growth", + "excess_cold", + "excess_heat", + "asbestos_and_mm_fibres", + "biocides", + "carbon_monoxide_and_fuel_combustion_products", + "lead", + "radiation", + "uncombusted_fuel_gas", + "volatile_organic_compounds", + "crowding_and_space", + "entry_by_intruders", + "lighting", + "noise", + "domestic_hygiene_pests_and_refuse", + "food_safety", + "personal_hygiene_sanitation_and_drainage", + "water_supply", + "falls_associated_with_baths", + "falls_on_level_surfaces", + "falls_on_stairs_and_steps", + "falls_between_levels", + "electrical_hazards", + "fire", + "flames_hot_surfaces_and_materials", + "collision_and_entrapment", + "explosions", + "ergonomics", + "structural_collapse_and_falling_elements" +] + +CRITERION_B_VARIABLES = [ + "external_walls_structure", "lintels", "brickwork_spalling", "wall_finish", "roof_structure", "roof_finish", + "chimneys", "windows", "external_doors", "kitchens", "bathrooms", "central_heating_boiler", + "central_heating_distribution_system", "heating_other", "electrical_systems", +] + +CRITERION_C_VARIABLES = [ + "kitchen_facilities", +] + +# Criterion C explicit age limits (different from component lifespans used elsewhere) +CRITERION_C_AGE_LIMITS = { + "kitchen_years_max": 20, + "bathroom_years_max": 30, +} + +# Field labels as they appear in your JSON (based on your code) +LABEL_KITCHEN = "Adequacy of Kitchen and Type in Property" +LABEL_BATHROOM = "Adequacy of Bathroom Location in Property" +LABEL_NOISE = "Adequacy of Noise Insulation in Property" +LABEL_COMMON_CIRC = "Circulation Space in Common Area" # flats only + +STANDARD_HHSRS_MAPPING = {"pass": "TYPRISK", "fail": "MODRISK", "no_data": "TOBEASSESS"} + +# Criterion A - mapping of HHSRS variables to Waltham forest element codes +HHSRS_MAPPING = { + "damp_and_mould_growth": {"HHSRSDAMP": STANDARD_HHSRS_MAPPING}, + "excess_cold": {"HHSRSCOLD": STANDARD_HHSRS_MAPPING}, + "excess_heat": {"HHSRSHEAT": STANDARD_HHSRS_MAPPING}, + "asbestos_and_mm_fibres": {"HHSRSASB": STANDARD_HHSRS_MAPPING}, + "biocides": {"HHSRSBIOC": STANDARD_HHSRS_MAPPING}, + "carbon_monoxide_and_fuel_combustion_products": { + "HHSRSCO": STANDARD_HHSRS_MAPPING, + "HHSRSSO2": STANDARD_HHSRS_MAPPING, + "HHSRSNO2": STANDARD_HHSRS_MAPPING + }, + "lead": {"HHSRSLEAD": STANDARD_HHSRS_MAPPING}, + "radiation": {"HHSRSRADIA": STANDARD_HHSRS_MAPPING}, + "uncombusted_fuel_gas": {"HHSRSFUEL": STANDARD_HHSRS_MAPPING}, + "volatile_organic_compounds": {"HHSRSORGAN": STANDARD_HHSRS_MAPPING}, + "crowding_and_space": {"HHSRSCROWD": STANDARD_HHSRS_MAPPING}, + "entry_by_intruders": {"HHSRSENTRY": STANDARD_HHSRS_MAPPING}, + "lighting": {"HHSRSLIGHT": STANDARD_HHSRS_MAPPING}, + "noise": {"HHSRSNOISE": STANDARD_HHSRS_MAPPING}, + "domestic_hygiene_pests_and_refuse": {"HHSRSDOMES": STANDARD_HHSRS_MAPPING}, + "food_safety": {"HHSRSFOOD": STANDARD_HHSRS_MAPPING}, + "personal_hygiene_sanitation_and_drainage": {"HHSRSPERS": STANDARD_HHSRS_MAPPING}, + "water_supply": {"HHSRSWATER": STANDARD_HHSRS_MAPPING}, + "falls_associated_with_baths": {"HHSRSFBATH": STANDARD_HHSRS_MAPPING}, + "falls_on_level_surfaces": {"HHSRSFLEVE": STANDARD_HHSRS_MAPPING}, + "falls_on_stairs_and_steps": {"HHSRSFSTAI": STANDARD_HHSRS_MAPPING}, + "falls_between_levels": {"HHSRSFBETW": STANDARD_HHSRS_MAPPING}, + "electrical_hazards": {"HHSRSELEC": STANDARD_HHSRS_MAPPING}, + "fire": {"HHSRSFIRE": STANDARD_HHSRS_MAPPING}, + "flames_hot_surfaces_and_materials": {"HHSRSFLAME": STANDARD_HHSRS_MAPPING}, + "collision_and_entrapment": {"HHSRSENTRP": STANDARD_HHSRS_MAPPING, "HHSRSCLOW": STANDARD_HHSRS_MAPPING}, + "explosions": {"HHSRSEXPLO": STANDARD_HHSRS_MAPPING}, + "ergonomics": {"HHSRSPOSI": STANDARD_HHSRS_MAPPING}, + "structural_collapse_and_falling_elements": {"HHSRSSTRUC": STANDARD_HHSRS_MAPPING} +} + +print(houses_waltham_forest_data[ + houses_waltham_forest_data["ELEMENT CODE"] == "INTHTIMP" + ][["ATTRIBUTE CODE", "ATTRIBUTE CODE DESCRIPTION"]].drop_duplicates()) + +print(flats_waltham_forest_data[ + flats_waltham_forest_data["ELEMENT CODE"] == "INTBTHADEQ" + ][["ATTRIBUTE CODE", "ATTRIBUTE CODE DESCRIPTION"]].drop_duplicates()) + +# Criterion B +CRITERION_B_MAPPING = { + # TODO: Needs to be sorted!!! + # "external_walls_structure": { + # "EXTWALLSTR": {"pass": "GOOD", "fail": "POOR", "no_data": "Unknown if Structural Defects in External Area"} + # } + "lintels": { + "EXTLINTELS": {"pass": "GOOD", "fail": "POOR", "no_data": "Unknown Condition of Lintels"} + } +} + +# Criterion C +CRITERION_C_MAPPING = { + # "kitchen_less_than_20_years_old": +} + +COMPONENT_LIFESPANS = { + "kitchen": {"house": 30, "flat_below_6_storeys": 30, "flat_above_6_storeys": 30}, + "bathroom": {"house": 50, "flat_below_6_storeys": 50, "flat_above_6_storeys": 50} +} + +# Database design +# creation_date, uprn, variable, result, hhsrs_score (optional, numeric), hhsrs_rank (A-J), install_date (for +# components which expire, e.g. kitchen) + +decent_homes = [] +# Use to capture criterion A, B, C and D. Should be: +# {"uprn": int, "creation_date": datetime, "criterion_a": bool, "criterion_b": bool, "criterion_c": bool, +# "criterion_d": bool, "decent_homes": bool"} +property_decent_homes = [] +for fn in filenames: + with open(os.path.join(folder, fn), "rb") as f: + data = json.load(f) + + from pprint import pprint + + pprint(data["elements"]) + + property_info = data["property_info"] + if property_info["PROP TYPE"] in ["HOU"]: + property_type = "house" + elif property_info["PROP TYPE"] == "FLA": + raise Exception("Implement distrinction between below and above 6 storeys") + property_type = "flat" + else: + raise NotImplementedError("Unknown property type") + + # Criterion A + for hhsrs_variable, mapping in HHSRS_MAPPING.items(): + element_code = list(mapping.keys())[0] + + # Find the data in the JSON within data["elements"] + check_pass = [] + for k, v in data["elements"].items(): + if v["ELEMENT CODE"] == element_code: + # We check the attribute code + # Check if pass + if v["ATTRIBUTE CODE"] == mapping[element_code]["pass"]: + result = "pass" + elif v["ATTRIBUTE CODE"] == mapping[element_code]["fail"]: + result = "fail" + elif v["ATTRIBUTE CODE"] == mapping[element_code]["no_data"]: + result = "no_data" + else: + raise ValueError("Unknown attribute code") + check_pass.append(result) + + # We check if we have a pass, fail or no_data + if all([x == "pass" for x in check_pass]): + hhsrs_result = "pass" + elif any([x == "fail" for x in check_pass]): + hhsrs_result = "fail" + elif any([x == "no_data" for x in check_pass]): + hhsrs_result = "no_data" + else: + raise NotImplementedError("Mixed results not implemented") + decent_homes.append( + {"variable": hhsrs_variable, 'result': hhsrs_result, "hhsrs_rank": None, "hhsrs_score": None, + "install_date": None} + ) + + # Criterion B + + # --- Criterion C --- + today = pd.Timestamp.today().normalize() + + # Guard: property type string already set earlier + is_flat = (property_info["PROP TYPE"] == "FLA") + + # 1) Kitchen age ≤ 20 years + kitchen = get_element(data["elements"], LABEL_KITCHEN) + if kitchen: + kit_install_raw = kitchen.get("INSTALL DATE") + try: + kit_install = pd.to_datetime(kit_install_raw) + kit_age_years = years_between(today.to_pydatetime(), kit_install.to_pydatetime()) + kitchen_age_result = "pass" if kit_age_years <= CRITERION_C_AGE_LIMITS["kitchen_years_max"] else "fail" + # For transparency, store next renewal as install + 20 years (criterion C perspective) + kit_next_due = kit_install + pd.DateOffset(years=CRITERION_C_AGE_LIMITS["kitchen_years_max"]) + except Exception: + kitchen_age_result = "no_data" + kit_next_due = None + else: + kitchen_age_result = "no_data" + kit_next_due = None + append_result(decent_homes, "kitchen_less_than_20_years_old", kitchen_age_result, kit_next_due) + + # 2) Kitchen adequate space/layout + # Prefer explicit codes if you have them, fall back to text in ATTRIBUTE CODE DESCRIPTION + if kitchen: + kit_attr_desc = kitchen.get("ATTRIBUTE CODE DESCRIPTION", "") + # If you prefer codes, you can also branch here on kitchen.get("ATTRIBUTE CODE") == "STDKITADQ" + kitchen_adequacy_result = adequacy_result_by_text(kit_attr_desc) + else: + kitchen_adequacy_result = "no_data" + append_result(decent_homes, "kitchen_adequate_space_and_layout", kitchen_adequacy_result) + + # 3) Bathroom age ≤ 30 years + bath = get_element(data["elements"], LABEL_BATHROOM) + if bath: + bth_install_raw = bath.get("INSTALL DATE") + try: + bth_install = pd.to_datetime(bth_install_raw) + bth_age_years = years_between(today.to_pydatetime(), bth_install.to_pydatetime()) + bathroom_age_result = "pass" if bth_age_years <= CRITERION_C_AGE_LIMITS["bathroom_years_max"] else "fail" + bth_next_due = bth_install + pd.DateOffset(years=CRITERION_C_AGE_LIMITS["bathroom_years_max"]) + except Exception: + bathroom_age_result = "no_data" + bth_next_due = None + else: + bathroom_age_result = "no_data" + bth_next_due = None + append_result(decent_homes, "bathroom_less_than_30_years_old", bathroom_age_result, bth_next_due) + + # 4) Bathroom/WC appropriately located + if bath: + # You already observed codes like STDBTHADQ / ADPBTHADQ as 'pass' + bth_attr_code = bath.get("ATTRIBUTE CODE", "") + bth_attr_desc = bath.get("ATTRIBUTE CODE DESCRIPTION", "") + known_pass_codes = {"STDBTHADQ", "ADPBTHADQ"} + if bth_attr_code in known_pass_codes: + bathroom_location_result = "pass" + else: + # Fallback to text adequacy check + bathroom_location_result = adequacy_result_by_text(bth_attr_desc) + else: + bathroom_location_result = "no_data" + append_result(decent_homes, "bathroom_wc_appropriately_located", bathroom_location_result) + + # 5) Adequate external noise insulation + noise = get_element(data["elements"], LABEL_NOISE) + if noise: + noise_desc = noise.get("ATTRIBUTE CODE DESCRIPTION", "") + noise_result = adequacy_result_by_text(noise_desc) + else: + noise_result = "no_data" + append_result(decent_homes, "adequate_external_noise_insulation", noise_result) + + # 6) Adequate common entrance areas (flats only) + if is_flat: + raise Exception("Pls check this") + common = get_element(data["elements"], LABEL_COMMON_CIRC) + if common: + circ_desc = common.get("ATTRIBUTE CODE DESCRIPTION", "") + common_areas_result = adequacy_result_by_text(circ_desc) + else: + common_areas_result = "no_data" + append_result(decent_homes, "adequate_common_entrance_areas", common_areas_result) + + # ---------------- Criterion D ---------------- + # heating system type + heating = get_element(data["elements"], "Heating Improvement Required in Property") + if heating: + # Example: ATTRIBUTE CODE == "GOOD" means pass, "POOR" means fail + heat_type_code = heating.get("ATTRIBUTE CODE", "") + if heat_type_code in {"NOTAPPLIC"}: + heating_type_result = "pass" + elif heat_type_code in {"WETINSFULL"}: + heating_type_result = "fail" + else: + raise NotImplementedError("No other observed codes yet") + else: + raise NotImplementedError("Heating element missing in dataset") + + append_result(decent_homes, "efficient_heating_system_type", heating_type_result) + + # heating distribution + heating_dist = get_element(data["elements"], "Heating Distribution System in Property") + if heating_dist: + dist_desc = heating_dist.get("ATTRIBUTE CODE DESCRIPTION", "") + heating_dist_result = adequacy_result_by_text(dist_desc) + else: + raise NotImplementedError("Heating distribution element missing in dataset") + + append_result(decent_homes, "efficient_heating_distribution", heating_dist_result) + + # insulation + loft = get_element(data["elements"], "Size in mm of Loft Insulation Thickness in Property") + wall = get_element(data["elements"], "Wall Insulation Improvement in External Area") + heating = get_element(data["elements"], "Heating Improvement Required in Property") + # To determine how much loft insulation is required + + # Loft insulation check (example threshold: ≥ 270mm = pass) + if loft: + # We have a specific code, where further loft insulation is needed + loft_code = loft.get("ATTRIBUTE CODE", "") + if loft_code == "LOFTINSRQD": + loft_result = "fail" + elif loft_code.isnumeric(): + loft_result = "pass" + else: + raise NotImplementedError("Unknown loft insulation code - pls check") + else: + raise NotImplementedError("Loft insulation data missing - pls check") + append_result(decent_homes, "loft_insulation_sufficient", loft_result) + + # Wall insulation check (simple adequacy parser) + if wall: + wall_desc = wall.get("ATTRIBUTE CODE DESCRIPTION", "") + wall_result = adequacy_result_by_text(wall_desc) + else: + raise NotImplementedError("Wall insulation data missing - pls check") + append_result(decent_homes, "wall_insulation_sufficient", wall_result) + + # ---------------- Criterion A overall ---------------- + a_vars = set(HHSRS_MAPPING.keys()) + latest_a_results = {r["variable"]: r["result"] for r in decent_homes if r["variable"] in a_vars} + + if any(v == "fail" for v in latest_a_results.values()): + criterion_a_result = "fail" + elif all(v == "pass" for v in latest_a_results.values()): + criterion_a_result = "pass" + else: + criterion_a_result = "no_data" + + # ---------------- Criterion C overall ---------------- + criterion_c_vars = [ + "kitchen_less_than_20_years_old", + "kitchen_adequate_space_and_layout", + "bathroom_less_than_30_years_old", + "bathroom_wc_appropriately_located", + "adequate_external_noise_insulation", + ] + if is_flat: + criterion_c_vars.append("adequate_common_entrance_areas") + + latest_c_results = {r["variable"]: r["result"] for r in decent_homes if r["variable"] in criterion_c_vars} + + count_fails = sum(1 for v in latest_c_results.values() if v == "fail") + # optionally count no_data too if you want strict interpretation + criterion_c_result = "fail" if count_fails >= 3 else "pass" + + # ---------------- Criterion D overall ---------------- + criterion_d_vars = [ + "efficient_heating_system_type", + "efficient_heating_distribution", + "loft_insulation_sufficient", + "wall_insulation_sufficient", + ] + latest_d_results = {r["variable"]: r["result"] for r in decent_homes if r["variable"] in criterion_d_vars} + + if any(v == "fail" for v in latest_d_results.values()): + criterion_d_result = "fail" + elif all(v == "pass" for v in latest_d_results.values()): + criterion_d_result = "pass" + else: + criterion_d_result = "no_data" + + # ---------------- Append to property_decent_homes ---------------- + property_decent_homes.append({ + "uprn": property_info.get("UPRN"), # update field name if needed + "creation_date": datetime.now().date().isoformat(), + "criterion_a": criterion_a_result, + "criterion_b": None, # not yet implemented + "criterion_c": criterion_c_result, + "criterion_d": criterion_d_result, + "decent_homes": ( + criterion_a_result == "pass" + and criterion_c_result == "pass" + ) + }) From a5ae1669718ac1fd6b17fba13678920534d17ea1 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 22 Sep 2025 12:32:51 +0100 Subject: [PATCH 03/23] mvp implementation for A, C, D, year mapping for B --- .../waltham_forest/decent_homes_pilot.py | 367 ++++++++++++------ 1 file changed, 254 insertions(+), 113 deletions(-) diff --git a/etl/customers/waltham_forest/decent_homes_pilot.py b/etl/customers/waltham_forest/decent_homes_pilot.py index 78460f5a..b59168fb 100644 --- a/etl/customers/waltham_forest/decent_homes_pilot.py +++ b/etl/customers/waltham_forest/decent_homes_pilot.py @@ -16,26 +16,8 @@ def get_element(elements, label): return elements.get(label) -def adequacy_result_by_text(attr_desc: str): - """ - Generic adequacy parser. - Pass if description clearly says 'Adequate' and not 'Inadequate'. - Fail if it says 'Inadequate' (or equivalent). - Unknown -> 'no_data' - """ - if not attr_desc or not isinstance(attr_desc, str): - return "no_data" - text = attr_desc.strip().lower() - # Common patterns - if "inadequate" in text or "unsatisfactory" in text or "problems" in text: - return "fail" - if "adequate" in text or "standard" in text or "appropriate" in text: - return "pass" - return "no_data" - - -def append_result(decent_homes, variable, result, install_date=None): - decent_homes.append({ +def append_result(decent_homes_meta, variable, result, install_date=None): + decent_homes_meta.append({ "variable": variable, "result": result, "hhsrs_rank": None, @@ -97,7 +79,8 @@ CRITERION_B_VARIABLES = [ ] CRITERION_C_VARIABLES = [ - "kitchen_facilities", + "kitchen_less_than_20_years_old", "kitchen_adequate_space_and_layout", "bathroom_less_than_30_years_old", + "bathroom_wc_appropriately_located", "adequate_external_noise_insulation", "adequate_common_entrance_areas", ] # Criterion C explicit age limits (different from component lifespans used elsewhere) @@ -151,40 +134,163 @@ HHSRS_MAPPING = { "structural_collapse_and_falling_elements": {"HHSRSSTRUC": STANDARD_HHSRS_MAPPING} } -print(houses_waltham_forest_data[ - houses_waltham_forest_data["ELEMENT CODE"] == "INTHTIMP" - ][["ATTRIBUTE CODE", "ATTRIBUTE CODE DESCRIPTION"]].drop_duplicates()) +# print(houses_waltham_forest_data[ +# houses_waltham_forest_data["ELEMENT CODE"] == "INTBTHADEQ" +# ][["ATTRIBUTE CODE", "ATTRIBUTE CODE DESCRIPTION"]].drop_duplicates()) + +# print(flats_waltham_forest_data[ +# flats_waltham_forest_data["ELEMENT CODE"] == "INTBTHADEQ" +# ][["ATTRIBUTE CODE", "ATTRIBUTE CODE DESCRIPTION"]].drop_duplicates()) -print(flats_waltham_forest_data[ - flats_waltham_forest_data["ELEMENT CODE"] == "INTBTHADEQ" - ][["ATTRIBUTE CODE", "ATTRIBUTE CODE DESCRIPTION"]].drop_duplicates()) # Criterion B -CRITERION_B_MAPPING = { - # TODO: Needs to be sorted!!! - # "external_walls_structure": { - # "EXTWALLSTR": {"pass": "GOOD", "fail": "POOR", "no_data": "Unknown if Structural Defects in External Area"} - # } - "lintels": { - "EXTLINTELS": {"pass": "GOOD", "fail": "POOR", "no_data": "Unknown Condition of Lintels"} - } +B_COMPONENT_LABELS = { + # Key components + "wall_structure": [ + "Wall Structure in External Area", + ], + "lintels": [ + "Lintels in External Area", + ], + "brickwork_spalling": [ + "Wall Spalling in External Area", + ], + "wall_finish": [ + "Wall Finish 1 in External Area", + "Wall Finish 2 in External Area", + "External Decorations in External Area", + "Brickwork Pointing in External Area", + ], + "roof_structure": [ + "Roof Structure 1 in External Area", + "Roof Structure 2 in External Area", + "Roof Structure 3 in External Area", + # If you later decide to include ancillary items, add: + # "Fascia / Soffit / Bargeboard in External Area", + # "Gutters in External Area", "Downpipes in External Area", + # "Internal Downpipes in External Area", + # and give them a clear condition rule. + ], + "roof_finish": [ + "Roof Covering 1 in External Area", + "Roof Covering 2 in External Area", + "Roof Covering 3 in External Area", + ], + "chimneys": [ + "Chimneys in External Area", + ], + "windows": [ + "Windows in Property", + "Windows 1 in External Area", + "Windows 2 in External Area", + "Garage and Store Windows in External Area", + "Garage Windows in External Area", + "Store Windows in External Area", + ], + "external_doors": [ + "Type and Location of Front Door in Property", + "Front Door Fire Rating in Property", + "Patio and French Doors 1 in External Area", + "Back and Side Doors 1 in External Area", + "Back and Side Doors 2 in External Area", + "Garage and Store Doors in External Area", + "Garage Door in External Area", + "Store Door in External Area", + ], + "central_heating_boiler": [ + # If the dataset exposes a specific boiler element, put it here. + # For now we only have "Heating Improvement Required in Property" elsewhere (Criterion D), + # which isn't reliable for age. If your JSON later includes a boiler line with INSTALL DATE, + # add its label here. + ], + "heating_other": [ + # e.g., gas fires/storage heaters if present as discrete elements later. + ], + "electrical_systems": [ + # If you have an installation line with dates (e.g. "Electrics Required in Property") + # add it here; we will rely on INSTALL DATE + REMAINING LIFE. + "Electrics Required in Property", + ], + + # Other components + "kitchen": [ + "Adequacy of Kitchen and Type in Property", + ], + "bathroom": [ + "Adequacy of Bathroom Location in Property", + ], + "central_heating_distribution_system": [ + "Heating Distribution System in Property", + ], +} + +KEY_COMPONENTS = { + "wall_structure", "lintels", "brickwork_spalling", "wall_finish", + "roof_structure", "roof_finish", "chimneys", "windows", + "external_doors", "central_heating_boiler", "heating_other", + "electrical_systems", +} +OTHER_COMPONENTS = { + "kitchen", "bathroom", "central_heating_distribution_system", } # Criterion C -CRITERION_C_MAPPING = { - # "kitchen_less_than_20_years_old": -} - COMPONENT_LIFESPANS = { - "kitchen": {"house": 30, "flat_below_6_storeys": 30, "flat_above_6_storeys": 30}, - "bathroom": {"house": 50, "flat_below_6_storeys": 50, "flat_above_6_storeys": 50} + # Key components + "wall_structure": { + "house": 80, "flat_below_6_storeys": 80, "flat_above_6_storeys": 80 + }, + "lintels": { + "house": 60, "flat_below_6_storeys": 60, "flat_above_6_storeys": 60 + }, + "brickwork_spalling": { + "house": 30, "flat_below_6_storeys": 30, "flat_above_6_storeys": 30 + }, + "wall_finish": { + "house": 60, "flat_below_6_storeys": 60, "flat_above_6_storeys": 30 + }, + "roof_structure": { + "house": 50, "flat_below_6_storeys": 30, "flat_above_6_storeys": 30 + }, + "roof_finish": { + "house": 50, "flat_below_6_storeys": 30, "flat_above_6_storeys": 30 + }, + "chimneys": { + "house": 50, "flat_below_6_storeys": 50, "flat_above_6_storeys": None # N/A + }, + "windows": { + "house": 40, "flat_below_6_storeys": 30, "flat_above_6_storeys": 30 + }, + "external_doors": { + "house": 40, "flat_below_6_storeys": 30, "flat_above_6_storeys": 30 + }, + "central_heating_boiler": { + "house": 15, "flat_below_6_storeys": 15, "flat_above_6_storeys": 15 + }, + "heating_other": { + "house": 30, "flat_below_6_storeys": 30, "flat_above_6_storeys": 30 + }, + "electrical_systems": { + "house": 30, "flat_below_6_storeys": 30, "flat_above_6_storeys": 30 + }, + + # Other components + "kitchen": { + "house": 30, "flat_below_6_storeys": 30, "flat_above_6_storeys": 30 + }, + "bathroom": { + "house": 40, "flat_below_6_storeys": 40, "flat_above_6_storeys": 40 + }, + "central_heating_distribution_system": { + "house": 40, "flat_below_6_storeys": 40, "flat_above_6_storeys": 40 + }, } # Database design # creation_date, uprn, variable, result, hhsrs_score (optional, numeric), hhsrs_rank (A-J), install_date (for # components which expire, e.g. kitchen) -decent_homes = [] +decent_homes_meta = [] # Use to capture criterion A, B, C and D. Should be: # {"uprn": int, "creation_date": datetime, "criterion_a": bool, "criterion_b": bool, "criterion_c": bool, # "criterion_d": bool, "decent_homes": bool"} @@ -193,20 +299,16 @@ for fn in filenames: with open(os.path.join(folder, fn), "rb") as f: data = json.load(f) - from pprint import pprint - - pprint(data["elements"]) - property_info = data["property_info"] if property_info["PROP TYPE"] in ["HOU"]: property_type = "house" elif property_info["PROP TYPE"] == "FLA": raise Exception("Implement distrinction between below and above 6 storeys") - property_type = "flat" + # property_type = "flat" else: raise NotImplementedError("Unknown property type") - # Criterion A + # ---------------- Criterion A ---------------- for hhsrs_variable, mapping in HHSRS_MAPPING.items(): element_code = list(mapping.keys())[0] @@ -235,14 +337,48 @@ for fn in filenames: hhsrs_result = "no_data" else: raise NotImplementedError("Mixed results not implemented") - decent_homes.append( + decent_homes_meta.append( {"variable": hhsrs_variable, 'result': hhsrs_result, "hhsrs_rank": None, "hhsrs_score": None, "install_date": None} ) - # Criterion B + # ---------------- Criterion B ---------------- + # Check each of the components - # --- Criterion C --- + component_pass_or_fail = [] + # TODO: Delete me + component, labels = list(B_COMPONENT_LABELS.items())[1] + for component, labels in B_COMPONENT_LABELS.items(): + # TODO: labels may not need to be multiple variables + for label in labels: + # Grab the label + label_data = get_element(data["elements"], label) + # 1) We check if the component is old + install_date = pd.to_datetime(label_data["INSTALL DATE"]) + if pd.isnull(install_date): + raise ValueError("Missing install date - pls check") + component_lifetime = COMPONENT_LIFESPANS[component][property_type] + # This should be populated, and for the pilot it's okay if this errors if missing - we'll handle accordingly + is_old = years_between(today.to_pydatetime(), install_date.to_pydatetime()) >= component_lifetime + # 2) We check if the component is in poor condition + if pd.isnull(label_data["REMAINING LIFE"]): + raise ValueError("Missing remaining life - pls check") + has_failed = label_data["REMAINING LIFE"] < 0 + # The component needs to have both failed and be old to fail criterion B + component_result = "fail" if is_old and has_failed else "pass" + component_pass_or_fail.append( + { + "component": component, + "label": label, + "install_date": str(install_date), + "remaining_life": label_data["REMAINING LIFE"], + "is_old": is_old, + "has_failed": has_failed, + "result": component_result + } + ) + + # ---------------- Criterion C ---------------- today = pd.Timestamp.today().normalize() # Guard: property type string already set earlier @@ -251,71 +387,67 @@ for fn in filenames: # 1) Kitchen age ≤ 20 years kitchen = get_element(data["elements"], LABEL_KITCHEN) if kitchen: - kit_install_raw = kitchen.get("INSTALL DATE") - try: - kit_install = pd.to_datetime(kit_install_raw) - kit_age_years = years_between(today.to_pydatetime(), kit_install.to_pydatetime()) - kitchen_age_result = "pass" if kit_age_years <= CRITERION_C_AGE_LIMITS["kitchen_years_max"] else "fail" - # For transparency, store next renewal as install + 20 years (criterion C perspective) - kit_next_due = kit_install + pd.DateOffset(years=CRITERION_C_AGE_LIMITS["kitchen_years_max"]) - except Exception: - kitchen_age_result = "no_data" - kit_next_due = None + kit_install_raw = kitchen["INSTALL DATE"] + kit_install = pd.to_datetime(kit_install_raw) + kit_age_years = years_between(today.to_pydatetime(), kit_install.to_pydatetime()) + kitchen_age_result = "pass" if kit_age_years <= CRITERION_C_AGE_LIMITS["kitchen_years_max"] else "fail" + # For transparency, store next renewal as install + 20 years (criterion C perspective) + kit_next_due = kit_install + pd.DateOffset(years=CRITERION_C_AGE_LIMITS["kitchen_years_max"]) else: - kitchen_age_result = "no_data" - kit_next_due = None - append_result(decent_homes, "kitchen_less_than_20_years_old", kitchen_age_result, kit_next_due) + raise NotImplementedError("Kitchen data missing - pls check") + append_result( + decent_homes_meta, "kitchen_less_than_20_years_old", kitchen_age_result, install_date=str(kit_install) + ) # 2) Kitchen adequate space/layout # Prefer explicit codes if you have them, fall back to text in ATTRIBUTE CODE DESCRIPTION if kitchen: - kit_attr_desc = kitchen.get("ATTRIBUTE CODE DESCRIPTION", "") - # If you prefer codes, you can also branch here on kitchen.get("ATTRIBUTE CODE") == "STDKITADQ" - kitchen_adequacy_result = adequacy_result_by_text(kit_attr_desc) + kit_attr_desc = kitchen["ATTRIBUTE CODE"] + if kit_attr_desc == "STDKITADQ": + kitchen_adequacy_result = "pass" + else: + raise NotImplementedError("No other observed codes yet") else: - kitchen_adequacy_result = "no_data" - append_result(decent_homes, "kitchen_adequate_space_and_layout", kitchen_adequacy_result) + raise NotImplementedError("Kitchen data missing - pls check") + append_result(decent_homes_meta, "kitchen_adequate_space_and_layout", kitchen_adequacy_result) # 3) Bathroom age ≤ 30 years bath = get_element(data["elements"], LABEL_BATHROOM) if bath: - bth_install_raw = bath.get("INSTALL DATE") - try: - bth_install = pd.to_datetime(bth_install_raw) - bth_age_years = years_between(today.to_pydatetime(), bth_install.to_pydatetime()) - bathroom_age_result = "pass" if bth_age_years <= CRITERION_C_AGE_LIMITS["bathroom_years_max"] else "fail" - bth_next_due = bth_install + pd.DateOffset(years=CRITERION_C_AGE_LIMITS["bathroom_years_max"]) - except Exception: - bathroom_age_result = "no_data" - bth_next_due = None + bth_install_raw = bath["INSTALL DATE"] + bth_install = pd.to_datetime(bth_install_raw) + bth_age_years = years_between(today.to_pydatetime(), bth_install.to_pydatetime()) + bathroom_age_result = "pass" if bth_age_years <= CRITERION_C_AGE_LIMITS["bathroom_years_max"] else "fail" + bth_next_due = bth_install + pd.DateOffset(years=CRITERION_C_AGE_LIMITS["bathroom_years_max"]) else: - bathroom_age_result = "no_data" - bth_next_due = None - append_result(decent_homes, "bathroom_less_than_30_years_old", bathroom_age_result, bth_next_due) + raise NotImplementedError("Bathroom data missing - pls check") + append_result( + decent_homes_meta, "bathroom_less_than_30_years_old", bathroom_age_result, install_date=str(bth_install) + ) # 4) Bathroom/WC appropriately located if bath: - # You already observed codes like STDBTHADQ / ADPBTHADQ as 'pass' - bth_attr_code = bath.get("ATTRIBUTE CODE", "") - bth_attr_desc = bath.get("ATTRIBUTE CODE DESCRIPTION", "") - known_pass_codes = {"STDBTHADQ", "ADPBTHADQ"} - if bth_attr_code in known_pass_codes: + bth_attr_code = bath["ATTRIBUTE CODE"] + if bth_attr_code in {"STDBTHADQ", "ADPBTHADQ"}: bathroom_location_result = "pass" else: - # Fallback to text adequacy check - bathroom_location_result = adequacy_result_by_text(bth_attr_desc) + raise NotImplementedError("No other observed codes yet") else: - bathroom_location_result = "no_data" - append_result(decent_homes, "bathroom_wc_appropriately_located", bathroom_location_result) + raise NotImplementedError("Bathroom data missing - pls check") + + append_result(decent_homes_meta, "bathroom_wc_appropriately_located", bathroom_location_result) # 5) Adequate external noise insulation noise = get_element(data["elements"], LABEL_NOISE) if noise: - noise_desc = noise.get("ATTRIBUTE CODE DESCRIPTION", "") - noise_result = adequacy_result_by_text(noise_desc) + noise_code = noise["ATTRIBUTE CODE"] + if noise_code in {"ADEQUATE"}: + noise_result = "pass" + else: + raise NotImplementedError("No other observed codes yet") else: - noise_result = "no_data" - append_result(decent_homes, "adequate_external_noise_insulation", noise_result) + raise NotImplementedError("Noise insulation data missing - pls check") + append_result(decent_homes_meta, "adequate_external_noise_insulation", noise_result) # 6) Adequate common entrance areas (flats only) if is_flat: @@ -326,14 +458,13 @@ for fn in filenames: common_areas_result = adequacy_result_by_text(circ_desc) else: common_areas_result = "no_data" - append_result(decent_homes, "adequate_common_entrance_areas", common_areas_result) + append_result(decent_homes_meta, "adequate_common_entrance_areas", common_areas_result) # ---------------- Criterion D ---------------- # heating system type heating = get_element(data["elements"], "Heating Improvement Required in Property") if heating: - # Example: ATTRIBUTE CODE == "GOOD" means pass, "POOR" means fail - heat_type_code = heating.get("ATTRIBUTE CODE", "") + heat_type_code = heating["ATTRIBUTE CODE"] if heat_type_code in {"NOTAPPLIC"}: heating_type_result = "pass" elif heat_type_code in {"WETINSFULL"}: @@ -343,28 +474,33 @@ for fn in filenames: else: raise NotImplementedError("Heating element missing in dataset") - append_result(decent_homes, "efficient_heating_system_type", heating_type_result) + append_result(decent_homes_meta, "efficient_heating_system_type", heating_type_result) # heating distribution heating_dist = get_element(data["elements"], "Heating Distribution System in Property") if heating_dist: - dist_desc = heating_dist.get("ATTRIBUTE CODE DESCRIPTION", "") - heating_dist_result = adequacy_result_by_text(dist_desc) + dist_code = heating_dist["ATTRIBUTE CODE"] + if dist_code == "UNKNOWN": + # For the observed case, there was no heating and wet heating needed to be installed in full so the value + # was unknown + heating_dist_result = "no_data" + else: + raise NotImplementedError("No other observed codes yet") else: raise NotImplementedError("Heating distribution element missing in dataset") - append_result(decent_homes, "efficient_heating_distribution", heating_dist_result) + append_result(decent_homes_meta, "efficient_heating_distribution", heating_dist_result) # insulation loft = get_element(data["elements"], "Size in mm of Loft Insulation Thickness in Property") wall = get_element(data["elements"], "Wall Insulation Improvement in External Area") - heating = get_element(data["elements"], "Heating Improvement Required in Property") # To determine how much loft insulation is required # Loft insulation check (example threshold: ≥ 270mm = pass) if loft: - # We have a specific code, where further loft insulation is needed - loft_code = loft.get("ATTRIBUTE CODE", "") + # We have a specific code, where further loft insulation is needed - It appears the heating type check has + # already been completed in this dataset and so we just need to check the code + loft_code = loft["ATTRIBUTE CODE"] if loft_code == "LOFTINSRQD": loft_result = "fail" elif loft_code.isnumeric(): @@ -373,19 +509,22 @@ for fn in filenames: raise NotImplementedError("Unknown loft insulation code - pls check") else: raise NotImplementedError("Loft insulation data missing - pls check") - append_result(decent_homes, "loft_insulation_sufficient", loft_result) + append_result(decent_homes_meta, "loft_insulation_sufficient", loft_result) - # Wall insulation check (simple adequacy parser) + # Wall insulation check if wall: - wall_desc = wall.get("ATTRIBUTE CODE DESCRIPTION", "") - wall_result = adequacy_result_by_text(wall_desc) + wall_code = wall["ATTRIBUTE CODE"] + if wall_code in {"NONE"}: # Means no insulation improvement required + wall_result = "pass" + else: + raise NotImplementedError("No other observed codes yet") else: raise NotImplementedError("Wall insulation data missing - pls check") - append_result(decent_homes, "wall_insulation_sufficient", wall_result) + append_result(decent_homes_meta, "wall_insulation_sufficient", wall_result) # ---------------- Criterion A overall ---------------- a_vars = set(HHSRS_MAPPING.keys()) - latest_a_results = {r["variable"]: r["result"] for r in decent_homes if r["variable"] in a_vars} + latest_a_results = {r["variable"]: r["result"] for r in decent_homes_meta if r["variable"] in a_vars} if any(v == "fail" for v in latest_a_results.values()): criterion_a_result = "fail" @@ -405,20 +544,21 @@ for fn in filenames: if is_flat: criterion_c_vars.append("adequate_common_entrance_areas") - latest_c_results = {r["variable"]: r["result"] for r in decent_homes if r["variable"] in criterion_c_vars} + latest_c_results = {r["variable"]: r["result"] for r in decent_homes_meta if r["variable"] in criterion_c_vars} count_fails = sum(1 for v in latest_c_results.values() if v == "fail") # optionally count no_data too if you want strict interpretation criterion_c_result = "fail" if count_fails >= 3 else "pass" # ---------------- Criterion D overall ---------------- + # Needs to have both efficient geating and distribution so all should pass criterion_d_vars = [ "efficient_heating_system_type", "efficient_heating_distribution", "loft_insulation_sufficient", "wall_insulation_sufficient", ] - latest_d_results = {r["variable"]: r["result"] for r in decent_homes if r["variable"] in criterion_d_vars} + latest_d_results = {r["variable"]: r["result"] for r in decent_homes_meta if r["variable"] in criterion_d_vars} if any(v == "fail" for v in latest_d_results.values()): criterion_d_result = "fail" @@ -429,7 +569,7 @@ for fn in filenames: # ---------------- Append to property_decent_homes ---------------- property_decent_homes.append({ - "uprn": property_info.get("UPRN"), # update field name if needed + "uprn": property_info.get("UPRN"), # TODO: Need UPRN "creation_date": datetime.now().date().isoformat(), "criterion_a": criterion_a_result, "criterion_b": None, # not yet implemented @@ -438,5 +578,6 @@ for fn in filenames: "decent_homes": ( criterion_a_result == "pass" and criterion_c_result == "pass" + and criterion_d_result == "pass" ) }) From d68ef88b9db7735a55a74732a58dabe5f3ff8463 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 22 Sep 2025 14:57:58 +0100 Subject: [PATCH 04/23] wip --- .../waltham_forest/decent_homes_pilot.py | 107 ++++++++++++++---- 1 file changed, 85 insertions(+), 22 deletions(-) diff --git a/etl/customers/waltham_forest/decent_homes_pilot.py b/etl/customers/waltham_forest/decent_homes_pilot.py index b59168fb..ba9bb3b7 100644 --- a/etl/customers/waltham_forest/decent_homes_pilot.py +++ b/etl/customers/waltham_forest/decent_homes_pilot.py @@ -5,6 +5,8 @@ import pandas as pd from datetime import datetime +from docutils.nodes import table + def years_between(d1, d2): # precise year difference (accounts for months/days) @@ -16,13 +18,14 @@ def get_element(elements, label): return elements.get(label) -def append_result(decent_homes_meta, variable, result, install_date=None): +def append_result(decent_homes_meta, variable, result, install_date=None, expiry_date=None): decent_homes_meta.append({ "variable": variable, "result": result, "hhsrs_rank": None, "hhsrs_score": None, - "install_date": install_date + "install_date": install_date, + "expiry_date": expiry_date, }) @@ -165,11 +168,13 @@ B_COMPONENT_LABELS = { "Roof Structure 1 in External Area", "Roof Structure 2 in External Area", "Roof Structure 3 in External Area", - # If you later decide to include ancillary items, add: - # "Fascia / Soffit / Bargeboard in External Area", - # "Gutters in External Area", "Downpipes in External Area", - # "Internal Downpipes in External Area", - # and give them a clear condition rule. + "Garage Roof in External Area", + "Garage and Store Roofs in External Area", + "Store Roof in External Area", + "Fascia / Soffit / Bargeboard in External Area", + "Gutters in External Area", + "Downpipes in External Area", + "Internal Downpipes in External Area" ], "roof_finish": [ "Roof Covering 1 in External Area", @@ -198,20 +203,15 @@ B_COMPONENT_LABELS = { "Store Door in External Area", ], "central_heating_boiler": [ - # If the dataset exposes a specific boiler element, put it here. - # For now we only have "Heating Improvement Required in Property" elsewhere (Criterion D), - # which isn't reliable for age. If your JSON later includes a boiler line with INSTALL DATE, - # add its label here. + # TODO ], "heating_other": [ - # e.g., gas fires/storage heaters if present as discrete elements later. + # TODO ], "electrical_systems": [ - # If you have an installation line with dates (e.g. "Electrics Required in Property") - # add it here; we will rely on INSTALL DATE + REMAINING LIFE. + # TODO "Electrics Required in Property", ], - # Other components "kitchen": [ "Adequacy of Kitchen and Type in Property", @@ -287,9 +287,10 @@ COMPONENT_LIFESPANS = { } # Database design -# creation_date, uprn, variable, result, hhsrs_score (optional, numeric), hhsrs_rank (A-J), install_date (for -# components which expire, e.g. kitchen) +# creation_date, uprn, variable, result (pass/fail/nodata), hhsrs_score (optional, numeric), hhsrs_rank (A-J), +# install_date (for components which expire, e.g. kitchen), remaining_life (for components which expire, e.g. kitchen), +# TODO: Add the criterion decent_homes_meta = [] # Use to capture criterion A, B, C and D. Should be: # {"uprn": int, "creation_date": datetime, "criterion_a": bool, "criterion_b": bool, "criterion_c": bool, @@ -303,12 +304,15 @@ for fn in filenames: if property_info["PROP TYPE"] in ["HOU"]: property_type = "house" elif property_info["PROP TYPE"] == "FLA": - raise Exception("Implement distrinction between below and above 6 storeys") + raise NotImplementedError("Implement distrinction between below and above 6 storeys") # property_type = "flat" else: raise NotImplementedError("Unknown property type") # ---------------- Criterion A ---------------- + # TODO: Map out the sub-information + # Critrion A: pass/fail + # If fail, why? for hhsrs_variable, mapping in HHSRS_MAPPING.items(): element_code = list(mapping.keys())[0] @@ -347,19 +351,36 @@ for fn in filenames: component_pass_or_fail = [] # TODO: Delete me - component, labels = list(B_COMPONENT_LABELS.items())[1] + component, labels = list(B_COMPONENT_LABELS.items())[9] + label = labels[0] + # TODO: need to handle the case where there is no survey data at all for a component for component, labels in B_COMPONENT_LABELS.items(): # TODO: labels may not need to be multiple variables for label in labels: # Grab the label label_data = get_element(data["elements"], label) + if label_data["ATTRIBUTE CODE"] in ["UNKNOWN", "NONE", "UNKNOWNG", "UNKNOWNS"]: + # This isn't applicable + component_pass_or_fail.append( + { + "component": component, + "label": label, + "install_date": None, + "remaining_life": None, + "is_old": False, + "has_failed": False, + "result": "pass", + "appliable": False + } + ) + continue # 1) We check if the component is old install_date = pd.to_datetime(label_data["INSTALL DATE"]) if pd.isnull(install_date): raise ValueError("Missing install date - pls check") component_lifetime = COMPONENT_LIFESPANS[component][property_type] # This should be populated, and for the pilot it's okay if this errors if missing - we'll handle accordingly - is_old = years_between(today.to_pydatetime(), install_date.to_pydatetime()) >= component_lifetime + is_old = years_between(today.to_pydatetime(), install_date.to_pydatetime()) > component_lifetime # 2) We check if the component is in poor condition if pd.isnull(label_data["REMAINING LIFE"]): raise ValueError("Missing remaining life - pls check") @@ -369,15 +390,54 @@ for fn in filenames: component_pass_or_fail.append( { "component": component, + "component_type": "key" if component in KEY_COMPONENTS else "other", + "component_sub_description": label_data["ATTRIBUTE CODE DESCRIPTION"], "label": label, "install_date": str(install_date), "remaining_life": label_data["REMAINING LIFE"], "is_old": is_old, "has_failed": has_failed, - "result": component_result + "result": component_result, + "appliable": True } ) + # TODO: We need to check by component + # Example of a pass for a component + # [ + # {"component": "external_walls", "component_type": "key", "descr": "A", "result": "pass"}, + # {"component": "external_walls", "component_type": "key", "descr": "B", "result": "pass"}, + # {"component": "external_walls", "component_type": "key", "descr": "C", "result": "pass"}, + # ] + + # Example of a fail for a component + # [ + # {"component": "external_walls", "component_type": "key", "descr": "A", "result": "pass"}, + # {"component": "external_walls", "component_type": "key", "descr": "B", "result": "fail"}, + # {"component": "external_walls", "component_type": "key", "descr": "C", "result": "pass"}, + # ] + + # Example of a no data for a component + # [ + # {"component": "external_walls", "component_type": "key", "descr": "A", "result": "pass"}, + # {"component": "external_walls", "component_type": "key", "descr": "B", "result": "nodata", "appliable": True}, + # {"component": "external_walls", "component_type": "key", "descr": "C", "result": "pass"}, + # ] + # OR + # Everything is unknown + # [ + # {"component": "external_walls", "component_type": "key", "descr": "A", "result": "pass", "appliable": False}, + # {"component": "external_walls", "component_type": "key", "descr": "B", "result": "pass", "appliable": False}, + # {"component": "external_walls", "component_type": "key", "descr": "C", "result": "pass", "appliable": False}, + # ] + + # Component 1: pass/fail, key: true/False + # Component 2: pass/fail, key: true/False + # Component 3: pass/fail, key: true/False + # Component 4: pass/fail, key: true/False + # Component 4: pass/fail, key: true/False + # -> Decide on outcome. If failure of 1 key component -> fail criterion B, or 2 other components -> fail criterion B + # ---------------- Criterion C ---------------- today = pd.Timestamp.today().normalize() @@ -396,7 +456,8 @@ for fn in filenames: else: raise NotImplementedError("Kitchen data missing - pls check") append_result( - decent_homes_meta, "kitchen_less_than_20_years_old", kitchen_age_result, install_date=str(kit_install) + decent_homes_meta, "kitchen_less_than_20_years_old", kitchen_age_result, + install_date=str(kit_install), expiry_date=str(kit_next_due) ) # 2) Kitchen adequate space/layout @@ -533,6 +594,8 @@ for fn in filenames: else: criterion_a_result = "no_data" + # ---------------- Criterion B overall ---------------- + # ---------------- Criterion C overall ---------------- criterion_c_vars = [ "kitchen_less_than_20_years_old", From a22db51be9ffedd782c38f4120b4cb5e26e23919 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 22 Sep 2025 22:33:31 +0100 Subject: [PATCH 05/23] pilot implementation --- .../waltham_forest/decent_homes_pilot.py | 319 ++++++++++++------ 1 file changed, 215 insertions(+), 104 deletions(-) diff --git a/etl/customers/waltham_forest/decent_homes_pilot.py b/etl/customers/waltham_forest/decent_homes_pilot.py index ba9bb3b7..33836236 100644 --- a/etl/customers/waltham_forest/decent_homes_pilot.py +++ b/etl/customers/waltham_forest/decent_homes_pilot.py @@ -18,9 +18,11 @@ def get_element(elements, label): return elements.get(label) -def append_result(decent_homes_meta, variable, result, install_date=None, expiry_date=None): +def append_result(decent_homes_meta, criteria, variable, sub_variable, result, install_date=None, expiry_date=None): decent_homes_meta.append({ + "criteria": criteria, "variable": variable, + "sub_variable": sub_variable, "result": result, "hhsrs_rank": None, "hhsrs_score": None, @@ -75,6 +77,44 @@ HHSRS_VARIABLES = [ "structural_collapse_and_falling_elements" ] +ELEMENT_CODE_TO_DESCRIPTION = { + # One-to-one + "HHSRSDAMP": "damp_and_mould_growth", + "HHSRSCOLD": "excess_cold", + "HHSRSHEAT": "excess_heat", + "HHSRSASB": "asbestos_and_mm_fibres", + "HHSRSBIOC": "biocides", + "HHSRSLEAD": "lead", + "HHSRSRADIA": "radiation", + "HHSRSFUEL": "uncombusted_fuel_gas", + "HHSRSORGAN": "volatile_organic_compounds", + "HHSRSCROWD": "crowding_and_space", + "HHSRSENTRY": "entry_by_intruders", + "HHSRSLIGHT": "lighting", + "HHSRSNOISE": "noise", + "HHSRSDOMES": "domestic_hygiene_pests_and_refuse", + "HHSRSFOOD": "food_safety", + "HHSRSPERS": "personal_hygiene_sanitation_and_drainage", + "HHSRSWATER": "water_supply", + "HHSRSFBATH": "falls_associated_with_baths", + "HHSRSFLEVE": "falls_on_level_surfaces", + "HHSRSFSTAI": "falls_on_stairs_and_steps", + "HHSRSFBETW": "falls_between_levels", + "HHSRSELEC": "electrical_hazards", + "HHSRSFIRE": "fire", + "HHSRSFLAME": "flames_hot_surfaces_and_materials", + "HHSRSEXPLO": "explosions", + "HHSRSPOSI": "ergonomics", + "HHSRSSTRUC": "structural_collapse_and_falling_elements", + + # One-to-many expansions + "HHSRSCO": "carbon_monoxide", + "HHSRSSO2": "sulphur_dioxide_and_smoke", + "HHSRSNO2": "nitrogen_dioxide", + "HHSRSENTRP": "collision_and_entrapment", + "HHSRSCLOW": "collision_hazards_and_low_headroom", +} + CRITERION_B_VARIABLES = [ "external_walls_structure", "lintels", "brickwork_spalling", "wall_finish", "roof_structure", "roof_finish", "chimneys", "windows", "external_doors", "kitchens", "bathrooms", "central_heating_boiler", @@ -203,13 +243,16 @@ B_COMPONENT_LABELS = { "Store Door in External Area", ], "central_heating_boiler": [ - # TODO + # "Heating Improvement Required in Property", + "Boiler Fuel in Property", + "Type of Water Heating in Property", ], "heating_other": [ - # TODO + # "Heating Distribution System in Property" + "Boiler Fuel in Property", + "Type of Water Heating in Property", ], "electrical_systems": [ - # TODO "Electrics Required in Property", ], # Other components @@ -300,6 +343,8 @@ for fn in filenames: with open(os.path.join(folder, fn), "rb") as f: data = json.load(f) + today = pd.Timestamp.today().normalize() + property_info = data["property_info"] if property_info["PROP TYPE"] in ["HOU"]: property_type = "house" @@ -310,7 +355,6 @@ for fn in filenames: raise NotImplementedError("Unknown property type") # ---------------- Criterion A ---------------- - # TODO: Map out the sub-information # Critrion A: pass/fail # If fail, why? for hhsrs_variable, mapping in HHSRS_MAPPING.items(): @@ -331,115 +375,97 @@ for fn in filenames: else: raise ValueError("Unknown attribute code") check_pass.append(result) + append_result( + decent_homes_meta, + criteria="A", + variable=hhsrs_variable, + sub_variable=ELEMENT_CODE_TO_DESCRIPTION[element_code], + result=result, + install_date=None, + expiry_date=None, + ) # We check if we have a pass, fail or no_data - if all([x == "pass" for x in check_pass]): - hhsrs_result = "pass" - elif any([x == "fail" for x in check_pass]): - hhsrs_result = "fail" - elif any([x == "no_data" for x in check_pass]): - hhsrs_result = "no_data" - else: - raise NotImplementedError("Mixed results not implemented") - decent_homes_meta.append( - {"variable": hhsrs_variable, 'result': hhsrs_result, "hhsrs_rank": None, "hhsrs_score": None, - "install_date": None} - ) + # if all([x == "pass" for x in check_pass]): + # hhsrs_result = "pass" + # elif any([x == "fail" for x in check_pass]): + # hhsrs_result = "fail" + # elif any([x == "no_data" for x in check_pass]): + # hhsrs_result = "no_data" + # else: + # raise NotImplementedError("Mixed results not implemented") # ---------------- Criterion B ---------------- # Check each of the components - component_pass_or_fail = [] - # TODO: Delete me - component, labels = list(B_COMPONENT_LABELS.items())[9] - label = labels[0] - # TODO: need to handle the case where there is no survey data at all for a component + # ---------------- Criterion B ---------------- + property_boiler = get_element(data["elements"], "Boiler Fuel in Property") + for component, labels in B_COMPONENT_LABELS.items(): - # TODO: labels may not need to be multiple variables for label in labels: - # Grab the label label_data = get_element(data["elements"], label) + + # Handle no-data or not-applicable if label_data["ATTRIBUTE CODE"] in ["UNKNOWN", "NONE", "UNKNOWNG", "UNKNOWNS"]: - # This isn't applicable - component_pass_or_fail.append( - { - "component": component, - "label": label, - "install_date": None, - "remaining_life": None, - "is_old": False, - "has_failed": False, - "result": "pass", - "appliable": False - } - ) + # append_result( + # decent_homes_meta, + # criteria="B", + # variable=component, + # sub_variable=label, + # result="pass", + # install_date=None, + # expiry_date=None, + # ) continue - # 1) We check if the component is old + + # Special skip conditions for heating + no_boiler_condition = ( + property_boiler["ATTRIBUTE CODE"] in ["NONENOCH"] + and component == "central_heating_boiler" + ) + other_heating_condition = ( + label_data["ATTRIBUTE CODE"] in ["NONENOCH"] + and component == "heating_other" + ) + if no_boiler_condition or other_heating_condition: + # append_result( + # decent_homes_meta, + # criteria="B", + # variable=component, + # sub_variable=label, + # result="pass", + # install_date=None, + # expiry_date=None, + # ) + continue + + # Normal case: evaluate install date + lifetime + remaining life install_date = pd.to_datetime(label_data["INSTALL DATE"]) if pd.isnull(install_date): - raise ValueError("Missing install date - pls check") + raise ValueError(f"Missing install date for {component}/{label}") + component_lifetime = COMPONENT_LIFESPANS[component][property_type] - # This should be populated, and for the pilot it's okay if this errors if missing - we'll handle accordingly is_old = years_between(today.to_pydatetime(), install_date.to_pydatetime()) > component_lifetime - # 2) We check if the component is in poor condition + if pd.isnull(label_data["REMAINING LIFE"]): - raise ValueError("Missing remaining life - pls check") + raise ValueError(f"Missing remaining life for {component}/{label}") has_failed = label_data["REMAINING LIFE"] < 0 - # The component needs to have both failed and be old to fail criterion B + + expiry_date = install_date + pd.DateOffset(years=component_lifetime) component_result = "fail" if is_old and has_failed else "pass" - component_pass_or_fail.append( - { - "component": component, - "component_type": "key" if component in KEY_COMPONENTS else "other", - "component_sub_description": label_data["ATTRIBUTE CODE DESCRIPTION"], - "label": label, - "install_date": str(install_date), - "remaining_life": label_data["REMAINING LIFE"], - "is_old": is_old, - "has_failed": has_failed, - "result": component_result, - "appliable": True - } + + # Push into decent_homes_meta + append_result( + decent_homes_meta, + criteria="B", + variable=component, + sub_variable=label, + result=component_result, + install_date=str(install_date), + expiry_date=str(expiry_date), ) - # TODO: We need to check by component - # Example of a pass for a component - # [ - # {"component": "external_walls", "component_type": "key", "descr": "A", "result": "pass"}, - # {"component": "external_walls", "component_type": "key", "descr": "B", "result": "pass"}, - # {"component": "external_walls", "component_type": "key", "descr": "C", "result": "pass"}, - # ] - - # Example of a fail for a component - # [ - # {"component": "external_walls", "component_type": "key", "descr": "A", "result": "pass"}, - # {"component": "external_walls", "component_type": "key", "descr": "B", "result": "fail"}, - # {"component": "external_walls", "component_type": "key", "descr": "C", "result": "pass"}, - # ] - - # Example of a no data for a component - # [ - # {"component": "external_walls", "component_type": "key", "descr": "A", "result": "pass"}, - # {"component": "external_walls", "component_type": "key", "descr": "B", "result": "nodata", "appliable": True}, - # {"component": "external_walls", "component_type": "key", "descr": "C", "result": "pass"}, - # ] - # OR - # Everything is unknown - # [ - # {"component": "external_walls", "component_type": "key", "descr": "A", "result": "pass", "appliable": False}, - # {"component": "external_walls", "component_type": "key", "descr": "B", "result": "pass", "appliable": False}, - # {"component": "external_walls", "component_type": "key", "descr": "C", "result": "pass", "appliable": False}, - # ] - - # Component 1: pass/fail, key: true/False - # Component 2: pass/fail, key: true/False - # Component 3: pass/fail, key: true/False - # Component 4: pass/fail, key: true/False - # Component 4: pass/fail, key: true/False - # -> Decide on outcome. If failure of 1 key component -> fail criterion B, or 2 other components -> fail criterion B - # ---------------- Criterion C ---------------- - today = pd.Timestamp.today().normalize() # Guard: property type string already set earlier is_flat = (property_info["PROP TYPE"] == "FLA") @@ -456,8 +482,13 @@ for fn in filenames: else: raise NotImplementedError("Kitchen data missing - pls check") append_result( - decent_homes_meta, "kitchen_less_than_20_years_old", kitchen_age_result, - install_date=str(kit_install), expiry_date=str(kit_next_due) + decent_homes_meta, + criteria="C", + variable="kitchen_less_than_20_years_old", + sub_variable="kitchen_less_than_20_years_old", + result=kitchen_age_result, + install_date=str(kit_install), + expiry_date=str(kit_next_due) ) # 2) Kitchen adequate space/layout @@ -470,7 +501,13 @@ for fn in filenames: raise NotImplementedError("No other observed codes yet") else: raise NotImplementedError("Kitchen data missing - pls check") - append_result(decent_homes_meta, "kitchen_adequate_space_and_layout", kitchen_adequacy_result) + append_result( + decent_homes_meta, + criteria="C", + variable="kitchen_adequate_space_and_layout", + sub_variable="kitchen_adequate_space_and_layout", + result=kitchen_adequacy_result, + ) # 3) Bathroom age ≤ 30 years bath = get_element(data["elements"], LABEL_BATHROOM) @@ -483,7 +520,13 @@ for fn in filenames: else: raise NotImplementedError("Bathroom data missing - pls check") append_result( - decent_homes_meta, "bathroom_less_than_30_years_old", bathroom_age_result, install_date=str(bth_install) + decent_homes_meta, + criteria="C", + variable="bathroom_less_than_30_years_old", + sub_variable="bathroom_less_than_30_years_old", + result=bathroom_age_result, + install_date=str(bth_install), + expiry_date=bth_next_due ) # 4) Bathroom/WC appropriately located @@ -496,7 +539,13 @@ for fn in filenames: else: raise NotImplementedError("Bathroom data missing - pls check") - append_result(decent_homes_meta, "bathroom_wc_appropriately_located", bathroom_location_result) + append_result( + decent_homes_meta, + criteria="C", + variable="bathroom_wc_appropriately_located", + sub_variable="bathroom_wc_appropriately_located", + result=bathroom_location_result + ) # 5) Adequate external noise insulation noise = get_element(data["elements"], LABEL_NOISE) @@ -508,7 +557,13 @@ for fn in filenames: raise NotImplementedError("No other observed codes yet") else: raise NotImplementedError("Noise insulation data missing - pls check") - append_result(decent_homes_meta, "adequate_external_noise_insulation", noise_result) + append_result( + decent_homes_meta, + criteria="C", + variable="adequate_external_noise_insulation", + sub_variable="adequate_external_noise_insulation", + result=noise_result + ) # 6) Adequate common entrance areas (flats only) if is_flat: @@ -535,7 +590,13 @@ for fn in filenames: else: raise NotImplementedError("Heating element missing in dataset") - append_result(decent_homes_meta, "efficient_heating_system_type", heating_type_result) + append_result( + decent_homes_meta, + criteria="D", + variable="efficient_heating_system_type", + sub_variable="efficient_heating_system_type", + result=heating_type_result + ) # heating distribution heating_dist = get_element(data["elements"], "Heating Distribution System in Property") @@ -550,7 +611,13 @@ for fn in filenames: else: raise NotImplementedError("Heating distribution element missing in dataset") - append_result(decent_homes_meta, "efficient_heating_distribution", heating_dist_result) + append_result( + decent_homes_meta, + criteria="D", + variable="efficient_heating_distribution", + sub_variable="efficient_heating_distribution", + result=heating_dist_result + ) # insulation loft = get_element(data["elements"], "Size in mm of Loft Insulation Thickness in Property") @@ -570,7 +637,13 @@ for fn in filenames: raise NotImplementedError("Unknown loft insulation code - pls check") else: raise NotImplementedError("Loft insulation data missing - pls check") - append_result(decent_homes_meta, "loft_insulation_sufficient", loft_result) + append_result( + decent_homes_meta, + criteria="D", + variable="loft_insulation_sufficient", + sub_variable="loft_insulation_sufficient", + result=loft_result + ) # Wall insulation check if wall: @@ -581,7 +654,13 @@ for fn in filenames: raise NotImplementedError("No other observed codes yet") else: raise NotImplementedError("Wall insulation data missing - pls check") - append_result(decent_homes_meta, "wall_insulation_sufficient", wall_result) + append_result( + decent_homes_meta, + criteria="D", + variable="wall_insulation_sufficient", + sub_variable="wall_insulation_sufficient", + result=wall_result + ) # ---------------- Criterion A overall ---------------- a_vars = set(HHSRS_MAPPING.keys()) @@ -596,6 +675,38 @@ for fn in filenames: # ---------------- Criterion B overall ---------------- + component_results = {} + + for component in B_COMPONENT_LABELS.keys(): + comp_rows = [r for r in decent_homes_meta if + r["criteria"] == "B" and r["variable"] == component and r["sub_variable"] is not None] + comp_sub_results = [r["result"] for r in comp_rows] + + if not comp_sub_results: # no rows at all + comp_result = "no_data" + elif any(r == "fail" for r in comp_sub_results): + comp_result = "fail" + elif all(r == "pass" for r in comp_sub_results if r != "no_data"): + comp_result = "pass" + elif all(r == "no_data" for r in comp_sub_results): + comp_result = "no_data" + else: + comp_result = "no_data" + + component_results[component] = comp_result + + key_fails = [c for c, r in component_results.items() if c in KEY_COMPONENTS and r == "fail"] + other_fails = [c for c, r in component_results.items() if c in OTHER_COMPONENTS and r == "fail"] + + if key_fails: + criterion_b_result = "fail" + elif len(other_fails) >= 2: + criterion_b_result = "fail" + elif all(r == "no_data" for r in component_results.values()): + criterion_b_result = "no_data" + else: + criterion_b_result = "pass" + # ---------------- Criterion C overall ---------------- criterion_c_vars = [ "kitchen_less_than_20_years_old", @@ -635,7 +746,7 @@ for fn in filenames: "uprn": property_info.get("UPRN"), # TODO: Need UPRN "creation_date": datetime.now().date().isoformat(), "criterion_a": criterion_a_result, - "criterion_b": None, # not yet implemented + "criterion_b": criterion_b_result, "criterion_c": criterion_c_result, "criterion_d": criterion_d_result, "decent_homes": ( From e410e8d9c862e1d46ec3270399af97a833f970dd Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 22 Sep 2025 22:36:47 +0100 Subject: [PATCH 06/23] minor tidy --- etl/customers/waltham_forest/decent_homes_pilot.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/etl/customers/waltham_forest/decent_homes_pilot.py b/etl/customers/waltham_forest/decent_homes_pilot.py index 33836236..0c7ea98f 100644 --- a/etl/customers/waltham_forest/decent_homes_pilot.py +++ b/etl/customers/waltham_forest/decent_homes_pilot.py @@ -1,12 +1,8 @@ import json import os - import pandas as pd - from datetime import datetime -from docutils.nodes import table - def years_between(d1, d2): # precise year difference (accounts for months/days) @@ -35,15 +31,6 @@ def append_result(decent_homes_meta, criteria, variable, sub_variable, result, i folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Waltham Forest/Decent Homes Pilot" filenames = ["flat 1.json", "house 1.json"] -houses_waltham_forest_data = pd.read_excel( - os.path.join(folder, "LBWF - Example Asset Data September 2025.xlsx"), - sheet_name="Houses Asset Data" -) -flats_waltham_forest_data = pd.read_excel( - os.path.join(folder, "LBWF - Example Asset Data September 2025.xlsx"), - sheet_name="CHINGFORD ROAD 236-254 Asset Bl" -) - # Standardised variables which will form the enums in the db HHSRS_VARIABLES = [ "damp_and_mould_growth", From 754644a8574827f4cf318b5fc2e6faa495fda2fb Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 24 Sep 2025 00:26:46 +0100 Subject: [PATCH 07/23] minor bug fix with solar --- backend/apis/GoogleSolarApi.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py index 532afec0..a8982061 100644 --- a/backend/apis/GoogleSolarApi.py +++ b/backend/apis/GoogleSolarApi.py @@ -854,18 +854,21 @@ class GoogleSolarApi: ): continue + solar_api_client = cls(api_key=google_solar_api_key, solar_materials=solar_materials) + if unit["longitude"] is None or unit["latitude"] is None: # At this point, we've checked that solar PV is valid, and so we provide some defaults property_instance.set_solar_panel_configuration( solar_panel_configuration={ "insights_data": None, - "panel_performance": cls.default_panel_performance(property_instance=property_instance), + "panel_performance": solar_api_client.default_panel_performance( + property_instance=property_instance + ), "unit_share_of_energy": 1 }, ) continue - solar_api_client = cls(api_key=google_solar_api_key, solar_materials=solar_materials) solar_api_client.get( longitude=unit["longitude"], latitude=unit["latitude"], From 7c5c7ceb0cdc989237d95be4f1dfa96ca7f4277f Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 28 Sep 2025 15:34:05 +0100 Subject: [PATCH 08/23] added better logging on dupes and adding new types to material table --- backend/Funding.py | 5 +++++ backend/app/db/models/materials.py | 2 ++ backend/engine/engine.py | 5 ++++- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/backend/Funding.py b/backend/Funding.py index 33c94e11..d590474c 100644 --- a/backend/Funding.py +++ b/backend/Funding.py @@ -578,6 +578,11 @@ class Funding: return pps.squeeze()["Cost Savings"] if measure_type == "flat_roof_insulation": + + # Not funding for properties starting at C or above + if self.starting_sap_band in ["Low_C", "High_C", "Low_B", "High_B", "Low_A", "High_A"]: + return 0 + pps = filtered_pps_matrix[filtered_pps_matrix["Measure_Type"] == "FRI"] if pps.shape[0] != 1: raise ValueError("Invalid FRI category") diff --git a/backend/app/db/models/materials.py b/backend/app/db/models/materials.py index 617ea0ac..347b66d5 100644 --- a/backend/app/db/models/materials.py +++ b/backend/app/db/models/materials.py @@ -45,6 +45,8 @@ class MaterialType(enum.Enum): scaffolding = "scaffolding" high_heat_retention_storage_heaters = "high_heat_retention_storage_heaters" sealing_fireplace = "sealing_fireplace" + roomstat_programmer_trvs = "roomstat_programmer_trvs" + time_temperature_zone_control = "time_temperature_zone_control" class DepthUnit(enum.Enum): diff --git a/backend/engine/engine.py b/backend/engine/engine.py index cc17222f..f4bffb17 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -534,7 +534,10 @@ async def model_engine(body: PlanTriggerRequest): if input_uprns: # Check for dupes if len(input_uprns) != len(set(input_uprns)): - raise ValueError("Duplicate UPRNs in the input data") + # Find the duplicate UPRNs + duplicates = set([x for x in input_uprns if input_uprns.count(x) > 1]) + # de-dupe input_uprns + raise ValueError(f"Duplicate UPRNs in the input data: {duplicates}") # If we have patches or overrides, we should read them in here patches, already_installed, non_invasive_recommendations, valuation_data = get_request_property_data(body) From 9f92e856d3bbb4f0f310126fb56febb98e28f587 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 1 Oct 2025 16:02:04 +0100 Subject: [PATCH 09/23] set up of AWS SES --- infrastructure/terraform/main.tf | 13 ++++ infrastructure/terraform/modules/ses/main.tf | 50 ++++++++++++++ .../terraform/modules/ses/outputs.tf | 66 +++++++++++++++++++ .../terraform/modules/ses/variables.tf | 9 +++ 4 files changed, 138 insertions(+) create mode 100644 infrastructure/terraform/modules/ses/main.tf create mode 100644 infrastructure/terraform/modules/ses/outputs.tf create mode 100644 infrastructure/terraform/modules/ses/variables.tf diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf index 10ef31c2..c2840d62 100644 --- a/infrastructure/terraform/main.tf +++ b/infrastructure/terraform/main.tf @@ -261,4 +261,17 @@ module "cloudfront_distribution" { bucket_arn = module.s3.bucket_arn bucket_domain_name = module.s3.bucket_domain_name stage = var.stage +} + +################################################ +# SES - Email sending +################################################ +module "ses" { + source = "./modules/ses" + domain_name = "domna.homes" + stage = var.stage +} + +output "ses_dns_records" { + value = module.ses.dns_records } \ No newline at end of file diff --git a/infrastructure/terraform/modules/ses/main.tf b/infrastructure/terraform/modules/ses/main.tf new file mode 100644 index 00000000..e8f183ae --- /dev/null +++ b/infrastructure/terraform/modules/ses/main.tf @@ -0,0 +1,50 @@ +resource "aws_ses_domain_identity" "this" { + domain = var.domain_name +} + +# DKIM signing +resource "aws_ses_domain_dkim" "this" { + domain = aws_ses_domain_identity.this.domain +} + +# IAM user for SES SMTP +resource "aws_iam_user" "ses_user" { + name = "${var.stage}-ses-user" +} + +resource "aws_iam_user_policy" "ses_send_policy" { + name = "AllowSESSendEmail" + user = aws_iam_user.ses_user.name + + policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Effect = "Allow" + Action = [ + "ses:SendEmail", + "ses:SendRawEmail" + ] + Resource = "*" + } + ] + }) +} + +resource "aws_iam_access_key" "ses_user" { + user = aws_iam_user.ses_user.name +} + +# Store SMTP credentials in AWS Secrets Manager +resource "aws_secretsmanager_secret" "ses_smtp" { + name = "${var.stage}/ses/smtp_credentials" + description = "SMTP credentials for SES (${var.stage})" +} + +resource "aws_secretsmanager_secret_version" "ses_smtp" { + secret_id = aws_secretsmanager_secret.ses_smtp.id + secret_string = jsonencode({ + username = aws_iam_access_key.ses_user.id + password = aws_iam_access_key.ses_user.ses_smtp_password_v4 + }) +} \ No newline at end of file diff --git a/infrastructure/terraform/modules/ses/outputs.tf b/infrastructure/terraform/modules/ses/outputs.tf new file mode 100644 index 00000000..de708983 --- /dev/null +++ b/infrastructure/terraform/modules/ses/outputs.tf @@ -0,0 +1,66 @@ +# These are our DNS records that will need to be added to our Krystal account + +# TXT record +output "verification_record" { + description = "TXT record required to verify the domain with SES" + value = { + name = "_amazonses.${aws_ses_domain_identity.this.domain}" + type = "TXT" + value = aws_ses_domain_identity.this.verification_token + } +} + +# DKIM CNAME records +output "dkim_records" { + description = "CNAME records required to enable DKIM for SES" + value = [ + for dkim in aws_ses_domain_dkim.this.dkim_tokens : { + name = "${dkim}._domainkey.${aws_ses_domain_identity.this.domain}" + type = "CNAME" + value = "${dkim}.dkim.amazonses.com" + } + ] +} + +# SMTP credentials - send them to secrets manager +output "ses_smtp_secret_arn" { + description = "ARN of the SES SMTP credentials stored in Secrets Manager" + value = aws_secretsmanager_secret.ses_smtp.arn +} + +output "smtp_password" { + value = aws_iam_access_key.ses_user.ses_smtp_password_v4 + sensitive = true + description = "SMTP password for SES" +} + +output "dns_records" { + description = "All DNS records required for SES verification and recommended deliverability" + value = concat( + [ + { + name = "_amazonses.${aws_ses_domain_identity.this.domain}" + type = "TXT" + value = aws_ses_domain_identity.this.verification_token + }, + { + name = var.domain_name + type = "TXT" + value = "v=spf1 include:amazonses.com -all" + }, + { + name = "_dmarc.${var.domain_name}" + type = "TXT" + value = "v=DMARC1; p=quarantine; rua=mailto:postmaster@${var.domain_name}" + } + ], + [ + for dkim in aws_ses_domain_dkim.this.dkim_tokens : { + name = "${dkim}._domainkey.${aws_ses_domain_identity.this.domain}" + type = "CNAME" + value = "${dkim}.dkim.amazonses.com" + } + ] + ) +} + diff --git a/infrastructure/terraform/modules/ses/variables.tf b/infrastructure/terraform/modules/ses/variables.tf new file mode 100644 index 00000000..d8c97d6d --- /dev/null +++ b/infrastructure/terraform/modules/ses/variables.tf @@ -0,0 +1,9 @@ +variable "domain_name" { + description = "The domain to verify with SES (e.g. domna.homes)" + type = string +} + +variable "stage" { + description = "Deployment stage (e.g. dev, prod)" + type = string +} From 930a5d83985502dfd10aee1ec5e8b531d38c4c62 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 1 Oct 2025 16:07:49 +0100 Subject: [PATCH 10/23] use 14.17 verion for rds --- infrastructure/terraform/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf index c2840d62..f345c985 100644 --- a/infrastructure/terraform/main.tf +++ b/infrastructure/terraform/main.tf @@ -66,7 +66,7 @@ resource "aws_security_group" "allow_db" { resource "aws_db_instance" "default" { allocated_storage = var.allocated_storage engine = "postgres" - engine_version = "14.13" + engine_version = "14.17" instance_class = var.instance_class db_name = var.database_name username = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)["db_assessment_model_username"] From aa03ef1b0f4ce0162a383304d79b14d9629e4954 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 10 Oct 2025 15:59:38 +0100 Subject: [PATCH 11/23] handle case of property already EPC C or above for funding --- recommendations/optimiser/funding_optimiser.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/recommendations/optimiser/funding_optimiser.py b/recommendations/optimiser/funding_optimiser.py index 03824ea0..73475fc0 100644 --- a/recommendations/optimiser/funding_optimiser.py +++ b/recommendations/optimiser/funding_optimiser.py @@ -231,8 +231,8 @@ def optimise_with_funding_paths(p, input_measures, housing_type, funding: Fundin # We now produce a fabric only path for ECO4 # We add in generic insulation funding paths (where there is no fixed measure) # Heating controls are only eligible if installed as part of a heating upgrade and so we do not include them - # here - if housing_type == "Social": + # here. We don't have an option if the property is a C or above + if housing_type == "Social" and p.data["current-energy-rating"] not in ["C", "B", "A"]: funding_paths = ( [ { @@ -301,7 +301,6 @@ def optimise_with_funding_paths(p, input_measures, housing_type, funding: Fundin # We log an error and skip this - we should not see any errors but we can probably get a reasonable # outcome for the end user without a complete termination of the process logger.error("Skipping fixed selection due to minimum insulation violation: %s", fixed) - blah continue scheme = _path_scheme(path_spec) @@ -829,6 +828,11 @@ def make_funding_paths(p, input_measures, housing_type, funding: Funding): :param funding: The funding object that provides methods to check eligibility and calculate funding. :return: """ + + # If the property is currently EPC C, there is no funding availability + if p.data["current-energy-rating"] in ["C", "B", "A"]: + return [], input_measures + # We handle the case of minimum insulation requirements. Whenever we have a heating system recommendation, # we *must* include an additional insulation measure, unless the property already has sufficient insulation. From 570d4630971ab772c274baba125ceac9442b5385 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 10 Oct 2025 18:55:02 +0100 Subject: [PATCH 12/23] allow for no build form --- backend/engine/engine.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/backend/engine/engine.py b/backend/engine/engine.py index f4bffb17..fa1f191c 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -484,12 +484,19 @@ async def model_engine(body: PlanTriggerRequest): plan_input["uprn"] = np.where(plan_input["estimated"].isin([1, True]), None, plan_input["uprn"]) # We handle the landlord property type and built form plan_input["property_type"] = plan_input["landlord_property_type"].copy() - plan_input["built_form"] = plan_input["landlord_built_form"].copy() + if "landlord_built_form" in plan_input.columns: + plan_input["built_form"] = plan_input["landlord_built_form"].copy() + else: + plan_input["built_form"] = None plan_input["property_type"] = np.where( plan_input["property_type"] == "unknown", plan_input["epc_property_type"], plan_input["property_type"] ) + + if "epc_archetype" not in plan_input.columns: + plan_input["epc_archetype"] = None + plan_input["built_form"] = np.where( plan_input["built_form"] == "unknown", plan_input["epc_archetype"], plan_input["built_form"] ) From e5272e2e64b97528f9c1ec8631ccdba468142c0f Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 13 Oct 2025 18:36:35 +0100 Subject: [PATCH 13/23] SAL for hyde --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- asset_list/AssetList.py | 22 +++++++-- asset_list/app.py | 68 ++++++++++++++++++++++++++ asset_list/mappings/built_form.py | 20 +++++++- asset_list/mappings/heating_systems.py | 19 ++++++- asset_list/mappings/property_type.py | 20 +++++++- asset_list/mappings/roof.py | 27 +++++++++- asset_list/mappings/walls.py | 3 ++ 9 files changed, 174 insertions(+), 9 deletions(-) diff --git a/.idea/Model.iml b/.idea/Model.iml index c6561970..09f2e496 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index 50cad4ca..fb10c6b0 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index dce929ae..da20432b 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -309,6 +309,17 @@ class AssetList: 'NAME OF SURVEYOR' ] + # Solar non-intrusive fields + NON_INTRUSIVES_SOLAR_COLNAMES = [ + 'PV, ACCESS ISSUE, SEE NOTES', 'ROOF ORIENTATION', + 'AREA (m²) OF ROOF WHERE PV WILL BE SITUATED ', 'SHADING', + 'Roof Tiles - CONCRETE/SLATE/ROSEMARY', + 'NO. OF PANELS (Typical size of 420W panel is 1mx1.7m and need 30cm all the way around panels)', + 'SCAFFOLD REQUIRED? IF YES, ARE THERE ANY SURROUNDING ACCESS ISSUES - PLEASE DESCRIBE', + 'IF PANELS ARE GOING ON REAR PLEASE CHECK FOR SPACE FOR SCAFFOLDING - DESCRIBE ANY ISSUES BELOW', + 'DATE', 'NAME OF SURVEYOR' + ] + NON_INTRUSIVES_ELIGIBILITY_COLUMN = "Eligibility (Red/Yellow/Green)" OLD_FORMAT_NON_INTRUSIVE_COLNAMES = ['WFT Findings', 'ECO Eligibility'] @@ -461,6 +472,8 @@ class AssetList: self.new_format_non_insturives_present_v2 = 'TILE HUNG' in self.raw_asset_list.columns + self.solar_non_intrusives_present = "AREA (m²) OF ROOF WHERE PV WILL BE SITUATED" in self.raw_asset_list.columns + # Names of columns self.landlord_property_id = landlord_property_id self.address1_colname = address1_colname @@ -774,6 +787,9 @@ class AssetList: if self.new_format_non_insturives_present_v2: non_intrusive_columns += self.NON_INTRUSIVES_NEW_FORMAT_COLNAMES_V2 + if self.solar_non_intrusives_present: + non_intrusive_columns += self.NON_INTRUSIVES_SOLAR_COLNAMES + if self.old_format_non_intrusives_present: # We check if we have the ECO Eligibility column, which we might not have non_intrusive_columns = [ @@ -946,7 +962,7 @@ class AssetList: if self.phase: # We filter on just the properties that have had an inspection - if self.new_format_non_insturives_present_v2: + if self.new_format_non_insturives_present_v2 or self.solar_non_intrusives_present: self.standardised_asset_list = self.standardised_asset_list[ ~self.standardised_asset_list['NAME OF SURVEYOR'].isin( ["YET TO BE SURVEYED", "", None] @@ -1341,10 +1357,10 @@ class AssetList: # for identifying cavity jobs if self.non_intrusives_present and not self.old_format_non_intrusives_present: - if self.new_format_non_insturives_present_v2: + if self.new_format_non_insturives_present_v2 or self.solar_non_intrusives_present: existing_solar_non_intrusives_check = ( self.standardised_asset_list["non-intrusives: ROOF ORIENTATION"].str.strip().isin( - ["ALREADY HAS SOLAR PV"] + ["ALREADY HAS SOLAR PV", "ALREADY HAS PV"] ) ) else: diff --git a/asset_list/app.py b/asset_list/app.py index 833050fb..2903e083 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -59,6 +59,74 @@ def app(): Property UPRN """ + # Hyde - solar + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hyde/Solar" + data_filename = "Domna Property Analysis HYDE (Chichester Removed).xlsx" + sheet_name = "Electric Property Inspections" + postcode_column = 'Postcode' + address1_column = None # Is only patchily populated so we create it + address1_method = 'house_number_extraction' + fulladdress_column = "Address" + address_cols_to_concat = [] + missing_postcodes_method = None + landlord_year_built = None + landlord_os_uprn = None + landlord_property_type = "Property Type" + landlord_built_form = "Property Type" + landlord_wall_construction = "Walls " + landlord_roof_construction = "Roofs" + landlord_heating_system = "Heating" + landlord_existing_pv = None + landlord_property_id = "Address ID" + landlord_sap = "SAP" + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + outcomes_id = None + outcomes_address = None + master_filepaths = [] + master_id_colnames = [] + master_to_asset_list_filepath = None + phase = True + ecosurv_landlords = None + asset_list_header = 0 + landlord_block_reference = None + + # Hyde cavity + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hyde/Cavity" + data_filename = "Domna Property Analysis HYDE (Chichester Removed).xlsx" + sheet_name = "Cavity Inspections" + postcode_column = 'Postcode' + address1_column = None # Is only patchily populated so we create it + address1_method = 'house_number_extraction' + fulladdress_column = "Address" + address_cols_to_concat = [] + missing_postcodes_method = None + landlord_year_built = None + landlord_os_uprn = None + landlord_property_type = "Property Type" + landlord_built_form = "Property Type" + landlord_wall_construction = "Walls " + landlord_roof_construction = "Roofs" + landlord_heating_system = "Heating" + landlord_existing_pv = None + landlord_property_id = "Address ID" + landlord_sap = "SAP" + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + outcomes_id = None + outcomes_address = None + master_filepaths = [] + master_id_colnames = [] + master_to_asset_list_filepath = None + phase = True + ecosurv_landlords = None + asset_list_header = 0 + landlord_block_reference = None + # CDS - Sept 2025 data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/CDS/September 2025 Programme" data_filename = "Founder Estates CDS.xlsx" diff --git a/asset_list/mappings/built_form.py b/asset_list/mappings/built_form.py index bdd82883..b02b8810 100644 --- a/asset_list/mappings/built_form.py +++ b/asset_list/mappings/built_form.py @@ -439,5 +439,23 @@ BUILT_FORM_MAPPINGS = { 'Chalet - Wheelchair': 'unknown', 'Studio Flat': 'unknown', 'Bungalow - Attached': 'semi-detached', - 'ND': 'unknown' + 'ND': 'unknown', + + 'Maisonette: Mid Terrace: Mid Floor': 'mid-floor', + 'Maisonette: Semi Detached: Ground Floor': 'semi-detached', + 'Maisonette: Enclosed Mid Terrace: Ground Floor': 'enclosed mid-terrace', + 'Maisonette: Enclosed End Terrace: Ground Floor': 'end-terrace', + 'Maisonette: Mid Terrace: Ground Floor': 'mid-terrace', + 'Flat: Semi Detached: Basement': 'semi-detached', + 'Maisonette: Semi Detached: Top Floor': 'semi-detached', + 'Maisonette: Enclosed Mid Terrace: Mid Floor': 'enclosed mid-terrace', + 'Flat: Detached: Basement': 'detached', + 'Maisonette: Enclosed Mid Terrace: Top Floor': 'enclosed mid-terrace', + + 'Maisonette: End Terrace: Top Floor': 'top-floor', + 'House: Mid Terrace: Ground Floor': 'ground floor', + 'Maisonette: Semi Detached: Mid Floor': 'detached', + 'Maisonette: Detached: Mid Floor': 'detached', + 'Bungalow: EnclosedMidTerrace': 'enclosed mid-terrace' + } diff --git a/asset_list/mappings/heating_systems.py b/asset_list/mappings/heating_systems.py index 4ab8ca72..ffd1b198 100644 --- a/asset_list/mappings/heating_systems.py +++ b/asset_list/mappings/heating_systems.py @@ -477,6 +477,23 @@ HEATING_MAPPINGS = { 'Heat networks Heat networks (mains gas)': 'communal heating', 'ND Oil': 'oil fuel', - 'Boiler Biofuel': 'boiler - other fuel' + 'Boiler Biofuel': 'boiler - other fuel', + + 'Electric (direct acting) room heaters: Water- or oil-filled radiators': 'room heaters', + 'Other: Electric ceiling heating': 'electric ceiling', + 'Heat Pump: Electric Heat pumps: Air source heat pump with flow temperature <= 35°C': 'air source heat pump', + 'Oil room heaters: Room heater, 2000 or later': 'room heaters', + 'Electric Underfloor Heating: In screed above insulation (standard or off peak)': 'electric underfloor', + 'Heat Pump: Electric Heat pumps: Air source heat pump in other cases': 'air source heat pump', + 'Electric Storage Systems: Old (large volume) storage heaters': 'electric storage heaters', + + 'Gas (including LPG) room heaters: Condensing gas fire': 'room heaters', + 'Solid fuel room heaters: Open fire in grate': 'solid fuel', + 'Solid fuel room heaters: Open fire with back boiler (no radiators)': 'solid fuel', + 'Community Heating Systems: Community heat pump (RdSAP)': 'communal heating', + 'Gas (including LPG) room heaters: Gas fire, open flue, 1980 or later (open fronted), sitting proud of, ' + 'and sealed to, fireplace opening': 'room heaters', + 'Boiler: A rated Regular Boiler, System 2: Boiler: C rated Regular Boiler': 'boiler - other fuel', + 'Boiler: G rated Combi': 'gas condensing combi' } diff --git a/asset_list/mappings/property_type.py b/asset_list/mappings/property_type.py index 290e172a..88ec2934 100644 --- a/asset_list/mappings/property_type.py +++ b/asset_list/mappings/property_type.py @@ -343,5 +343,23 @@ PROPERTY_MAPPING = { 'bungalow': 'bungalow', 'flat': 'flat', 'FLA': 'flat', - 'HOU': 'house' + 'HOU': 'house', + + 'Maisonette: Mid Terrace: Mid Floor': 'maisonette', + 'Maisonette: Semi Detached: Ground Floor': 'maisonette', + 'Maisonette: Enclosed Mid Terrace: Ground Floor': 'maisonette', + 'Maisonette: Enclosed End Terrace: Ground Floor': 'maisonette', + 'Maisonette: Mid Terrace: Ground Floor': 'maisonette', + 'Flat: Semi Detached: Basement': 'flat', + 'Maisonette: Semi Detached: Top Floor': 'maisonette', + 'Maisonette: Enclosed Mid Terrace: Mid Floor': 'maisonette', + 'Flat: Detached: Basement': 'flat', + 'Maisonette: Enclosed Mid Terrace: Top Floor': 'maisonette', + + 'Maisonette: End Terrace: Top Floor': 'maisonette', + 'House: Mid Terrace: Ground Floor': 'house', + 'Bungalow: EnclosedMidTerrace': 'bungalow', + 'Maisonette: Semi Detached: Mid Floor': 'maisonette', + 'Maisonette: Detached: Mid Floor': 'maisonette' + } diff --git a/asset_list/mappings/roof.py b/asset_list/mappings/roof.py index 8ac926c0..0857b046 100644 --- a/asset_list/mappings/roof.py +++ b/asset_list/mappings/roof.py @@ -275,5 +275,30 @@ ROOF_CONSTRUCTION_MAPPINGS = { 'Pitched (vaulted ceiling) Non-joist': 'pitched unknown insulation', 'ND (inferred) ND (inferred)': 'unknown', 'Flat Non-joist': 'flat insulated', - 'Same dwelling above N/A': 'another dwelling above' + 'Same dwelling above N/A': 'another dwelling above', + + 'Flat: As Built, PitchedNormalLoftAccess: Unknown': 'flat unknown insulation', + 'PitchedNormalLoftAccess: Unknown, PitchedNormalNoLoftAccess: Unknown': 'pitched unknown insulation', + 'PitchedNormalLoftAccess: 400mm+': 'pitched insulated', + 'AnotherDwellingAbove: 150mm': 'another dwelling above', + 'Flat: 150mm': 'flat insulated', + 'AnotherDwellingAbove: 50mm': 'another dwelling above', + 'PitchedNormalNoLoftAccess: As Built': 'pitched no access to loft', + 'PitchedNormalLoftAccess: 250mm, PitchedWithSlopingCeiling: As Built': 'pitched insulated', + 'PitchedNormalLoftAccess: 200mm, PitchedWithSlopingCeiling: As Built': 'pitched insulated', + 'PitchedNormalLoftAccess: 350mm': 'pitched insulated', + 'PitchedNormalNoLoftAccess: 270mm': 'pitched no access to loft', + 'AnotherDwellingAbove: 100mm': 'another dwelling above', + + 'PitchedWithSlopingCeiling: Unknown': 'piched unknown insulation', + 'AnotherDwellingAbove: Unknown, Flat: As Built': 'another dwelling above', + 'Flat: Unknown, PitchedNormalLoftAccess: 25mm': 'flat unknown insulation', + 'SameDwellingAbove: Unknown': 'another dwelling above', + 'Flat: Unknown': 'flat unknown insulation', + 'Flat: 50mm, PitchedNormalLoftAccess: 100mm': 'flat insulated', + 'Flat: As Built, PitchedNormalLoftAccess: 250mm, PitchedWithSlopingCeiling: As Built': 'flat unknown insulation', + 'Flat: As Built, PitchedNormalLoftAccess: 400mm+': 'flat unknown insulation', + 'PitchedWithSlopingCeiling: As Built': 'pitched insulated', + 'PitchedNormalLoftAccess: As Built': 'pitched unknown insulation', + } diff --git a/asset_list/mappings/walls.py b/asset_list/mappings/walls.py index 73db586e..418ae9f8 100644 --- a/asset_list/mappings/walls.py +++ b/asset_list/mappings/walls.py @@ -353,4 +353,7 @@ WALL_CONSTRUCTION_MAPPINGS = { 'System built As-built': "uninsulated system built", 'System built Internal': 'insulated system built', + 'Cavity: AsBuilt (1976-1982), TimberFrame: AsBuilt': 'cavity unknown insulation', + 'Cavity: FilledCavityPlusExternal': 'filled cavity' + } From 93723697a18aeed93ef9d784fae9fff477cf62e8 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 27 Oct 2025 15:27:32 +0000 Subject: [PATCH 14/23] allow no valuation and work with relative --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- asset_list/app.py | 110 ++++++++++++++++++++++++++- backend/SearchEpc.py | 5 +- backend/engine/engine.py | 46 +++++++++--- backend/ml_models/Valuation.py | 15 +++- etl/webscrape/Zoopla.py | 133 +++++++++++++++++++++++++-------- etl/webscrape/requirements.txt | 5 ++ 8 files changed, 265 insertions(+), 53 deletions(-) create mode 100644 etl/webscrape/requirements.txt diff --git a/.idea/Model.iml b/.idea/Model.iml index 09f2e496..c6561970 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index fb10c6b0..50cad4ca 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/asset_list/app.py b/asset_list/app.py index 2903e083..20cf04f1 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -59,9 +59,111 @@ def app(): Property UPRN """ + # + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Cambridge/" + data_filename = "22.10_Cambridge_west addresses.xlsx" + sheet_name = "Asset List" + postcode_column = 'Postcode' + address1_column = None + address1_method = "house_number_extraction" + fulladdress_column = "Full Address" + address_cols_to_concat = [] + missing_postcodes_method = None + landlord_year_built = None + landlord_os_uprn = None + landlord_property_type = None + landlord_built_form = None + landlord_wall_construction = None + landlord_roof_construction = None + landlord_heating_system = None + landlord_existing_pv = None + landlord_property_id = "id" + landlord_sap = None + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + outcomes_id = None + outcomes_address = None + master_filepaths = [] + master_id_colnames = [] + master_to_asset_list_filepath = None + phase = False + ecosurv_landlords = None + asset_list_header = 0 + landlord_block_reference = None + + # Property Box + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NRLA/Property Box" + data_filename = "Property Box Finance Portfolio.xlsx" + sheet_name = "Sheet1" + postcode_column = 'Postcode' + address1_column = None + address1_method = "house_number_extraction" + fulladdress_column = "Address 1" + address_cols_to_concat = [] + missing_postcodes_method = None + landlord_year_built = None + landlord_os_uprn = None + landlord_property_type = None + landlord_built_form = None + landlord_wall_construction = None + landlord_roof_construction = None + landlord_heating_system = None + landlord_existing_pv = None + landlord_property_id = "row_id" + landlord_sap = None + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + outcomes_id = None + outcomes_address = None + master_filepaths = [] + master_id_colnames = [] + master_to_asset_list_filepath = None + phase = False + ecosurv_landlords = None + asset_list_header = 0 + landlord_block_reference = "block_id" + + # CDS - able-to-pay + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/CDS/Able to pay" + data_filename = "CDS_ASSET LIST_(2314).xlsx" + sheet_name = "Sheet1" + postcode_column = 'Property Address - Postcode' + address1_column = "Property Address - Line 1" + address1_method = None + fulladdress_column = "Property Address - Line 1" + address_cols_to_concat = [] + missing_postcodes_method = None + landlord_year_built = None + landlord_os_uprn = None + landlord_property_type = None + landlord_built_form = None + landlord_wall_construction = None + landlord_roof_construction = None + landlord_heating_system = None + landlord_existing_pv = None + landlord_property_id = "row_id" + landlord_sap = None + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + outcomes_id = None + outcomes_address = None + master_filepaths = [] + master_id_colnames = [] + master_to_asset_list_filepath = None + phase = False + ecosurv_landlords = None + asset_list_header = 0 + landlord_block_reference = None + # Hyde - solar data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hyde/Solar" - data_filename = "Domna Property Analysis HYDE (Chichester Removed).xlsx" + data_filename = "Domna Property Analysis HYDE (Chichester Removed)V2-Completed.xlsx" sheet_name = "Electric Property Inspections" postcode_column = 'Postcode' address1_column = None # Is only patchily populated so we create it @@ -88,14 +190,14 @@ def app(): master_filepaths = [] master_id_colnames = [] master_to_asset_list_filepath = None - phase = True + phase = False ecosurv_landlords = None asset_list_header = 0 landlord_block_reference = None # Hyde cavity data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hyde/Cavity" - data_filename = "Domna Property Analysis HYDE (Chichester Removed).xlsx" + data_filename = "Domna Property Analysis HYDE (Chichester Removed)V2-Completed.xlsx" sheet_name = "Cavity Inspections" postcode_column = 'Postcode' address1_column = None # Is only patchily populated so we create it @@ -122,7 +224,7 @@ def app(): master_filepaths = [] master_id_colnames = [] master_to_asset_list_filepath = None - phase = True + phase = False ecosurv_landlords = None asset_list_header = 0 landlord_block_reference = None diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index 16dd8f04..1a14e87a 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -347,7 +347,8 @@ class SearchEpc: # We update the data with the correct uprn if self.uprn: for x in api_response["response"]["rows"]: - x["uprn"] = self.uprn + if pd.isnull(x["uprn"]): + x["uprn"] = self.uprn data["rows"].extend(api_response["response"]["rows"]) @@ -357,6 +358,8 @@ class SearchEpc: row for row in data["rows"] if row["lmk-key"] not in seen and not seen.add(row["lmk-key"]) ] + # Overwrite the data + self.data = data if data["rows"]: api_response["msg"] = self.SUCCESS diff --git a/backend/engine/engine.py b/backend/engine/engine.py index fa1f191c..f2674290 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -145,14 +145,17 @@ def extract_portfolio_aggregation_data( cost = sum([r["total"] for r in default_recommendations]) sap_point_improvement = sum([r["sap_points"] for r in default_recommendations]) - lower_bound_valuation_uplift = ( - property_value_increase_ranges[p.id]["lower_bound_increased_value"] - - property_value_increase_ranges[p.id]["current_value"] - ) - upper_bound_valuation_uplift = ( - property_value_increase_ranges[p.id]["upper_bound_increased_value"] - - property_value_increase_ranges[p.id]["current_value"] - ) + if not pd.isnull(property_value_increase_ranges[p.id]["current_value"]): + lower_bound_valuation_uplift = ( + property_value_increase_ranges[p.id]["lower_bound_increased_value"] - + property_value_increase_ranges[p.id]["current_value"] + ) + upper_bound_valuation_uplift = ( + property_value_increase_ranges[p.id]["upper_bound_increased_value"] - + property_value_increase_ranges[p.id]["current_value"] + ) + else: + lower_bound_valuation_uplift, upper_bound_valuation_uplift = 0, 0 agg_data.append({ "pre_retrofit_epc": p.data["current-energy-rating"], @@ -523,6 +526,7 @@ async def model_engine(body: PlanTriggerRequest): plan_input["built_form"] = plan_input["built_form"].map(built_form_map) plan_input = plan_input.to_dict("records") + else: raise ValueError("Other formats not yet supported") @@ -549,6 +553,13 @@ async def model_engine(body: PlanTriggerRequest): # If we have patches or overrides, we should read them in here patches, already_installed, non_invasive_recommendations, valuation_data = get_request_property_data(body) + if body.file_type == "xlsx" and body.file_format == "domna_asset_list": + # We check if we have valution data + if not valuation_data and body.valuation_file_path in [None, ""]: + # We check plan_input + if "domna_valuation" in plan_input[0]: + valuation_data = [{"uprn": x["uprn"], "valuation": x["domna_valuation"]} for x in plan_input] + cleaning_data = read_dataframe_from_s3_parquet( bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet", ) @@ -563,12 +574,22 @@ async def model_engine(body: PlanTriggerRequest): if uprn: uprn = int(float(uprn)) + address1 = config.get("address", None) + # Handle domna address list format + if pd.isnull(address1) and body.file_format == "domna_asset_list": + address1 = config.get("domna_full_address", None) + + address1 = str(int(address1)) if isinstance(address1, float) else str(address1) + + full_address = config["domna_full_address"] if body.file_format == "domna_asset_list" else None + epc_searcher = SearchEpc( - address1=str(config["address"]), + address1=address1, postcode=config["postcode"], uprn=uprn, auth_token=get_settings().EPC_AUTH_TOKEN, os_api_key="", + full_address=full_address ) epc_searcher.ordnance_survey_client.built_form = config.get("built_form", None) epc_searcher.ordnance_survey_client.property_type = config.get("property_type", None) @@ -1176,9 +1197,10 @@ async def model_engine(body: PlanTriggerRequest): upload_funding(session, p, new_plan_id, recommendations_to_upload) - property_valuation_increases.append( - valuations["average_increased_value"] - valuations["current_value"] - ) + if valuations["current_value"] > 0: + property_valuation_increases.append( + valuations["average_increased_value"] - valuations["current_value"] + ) # Commit the session after each batch session.commit() diff --git a/backend/ml_models/Valuation.py b/backend/ml_models/Valuation.py index 8c57900f..17db0dae 100644 --- a/backend/ml_models/Valuation.py +++ b/backend/ml_models/Valuation.py @@ -219,12 +219,19 @@ class PropertyValuation: current_epc = property_instance.data["current-energy-rating"] if not current_value: + # In this case, we return a % improvement rather than an absolute + relative_improvement = cls.estimate_valuation_improvement( + current_value=1, + current_epc=current_epc, + target_epc=target_epc, + total_cost=1 + ) return { "current_value": 0, - "lower_bound_increased_value": 0, - "upper_bound_increased_value": 0, - "average_increased_value": 0, - "average_increase": 0 + "lower_bound_increased_value": relative_improvement["lower_bound_increased_value"] - 1, + "upper_bound_increased_value": relative_improvement["upper_bound_increased_value"] - 1, + "average_increased_value": relative_improvement["average_increased_value"] - 1, + "average_increase": relative_improvement["average_increase"] } return cls.estimate_valuation_improvement(current_value, current_epc, target_epc, total_cost) diff --git a/etl/webscrape/Zoopla.py b/etl/webscrape/Zoopla.py index bb86c759..7b3fd5b6 100644 --- a/etl/webscrape/Zoopla.py +++ b/etl/webscrape/Zoopla.py @@ -1,38 +1,111 @@ -# Initial Code - -from seleniumbase import SB +from bs4 import BeautifulSoup +import pandas as pd import time +from stealth_requests import StealthSession +import random +from multiprocessing import Pool +from tqdm import tqdm -uprns = [ - 100071297618, - 100080893397, - 100060778033, - 200004793081, - 100071265143, - 100071297618, - 100080893397, - 100060778033, - 200004793081, - 100071265143, -] +ENGINES = ["safari", "chrome"] -estimate_list = [] -for uprn in uprns: +def scrape_all_estimates(session, url): + # Rotate impersonation per request + resp = session.get(url, impersonate=ENGINES[random.randint(0, 1)]) + page_source = BeautifulSoup(resp.text, "html.parser") + estimates = page_source.find_all("div", {"data-testid": "sale-estimate"}) + is_blocked = len(estimates) == 0 + return estimates, is_blocked - # Probably can change the timings here - time.sleep(5) - with SB(uc=True) as sb: - sb.uc_open_with_reconnect( - f"https://www.zoopla.co.uk/property/uprn/{uprn}/", - 3, + +def parallel_task(url): + # No impersonate argument here + with StealthSession() as session: + estimates, is_blocked = scrape_all_estimates(session, url) + + while is_blocked: + print(f"Blocked by Zoopla for URL: {url}") + time.sleep(random.uniform(0, 1)) + estimates, is_blocked = scrape_all_estimates(session, url) + + low_estimate = estimates[0].find("span", {"data-testid": "low-estimate-blurred"}).text + middle_estimate = estimates[0].find("p", {"data-testid": "estimate-blurred"}).text + high_estimate = estimates[0].find("span", {"data-testid": "high-estimate-blurred"}).text + + return { + "URL": url, + "Low Estimate": low_estimate, + "Middle Estimate": middle_estimate, + "High Estimate": high_estimate, + } + + +def parse_price(p): + p = p.replace("£", "").strip().lower() + if p.endswith("k"): + return float(p[:-1]) * 1000 + elif p.endswith("m"): + return float(p[:-1]) * 1_000_000 + else: + return float(p) + + +# def parallel_task(url): +# with StealthSession(impersonate=ENGINES[random.randint(0, 1)]) as session: +# estimates, is_blocked = scrape_all_estimates(session, url) +# +# while is_blocked: +# # Will need to wait and retry if blocked by Zoopla +# print(f"Blocked by Zoopla for URL: {url}") +# sleep_factor = random.uniform(0, 1) # Random delay to avoid detection +# time.sleep(sleep_factor * 1) +# estimates, is_blocked = scrape_all_estimates(session, url) +# +# low_estimate = ( +# estimates[0].find("span", {"data-testid": "low-estimate-blurred"}).text +# ) # Find all span elements with data-testid="low-estimate" +# middle_estimate = ( +# estimates[0].find("p", {"data-testid": "estimate-blurred"}).text +# ) # Find all span elements with data-testid="middle-estimate" +# high_estimate = ( +# estimates[0].find("span", {"data-testid": "high-estimate-blurred"}).text +# ) # Find all span elements with data-testid="high-estimate-blurred" +# +# return { +# "URL": url, +# "Low Estimate": low_estimate, +# "Middle Estimate": middle_estimate, +# "High Estimate": high_estimate, +# } + + +if __name__ == "__main__": + # Get a SAL + asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NRLA/Property Box/Property Box Finance Portfolio - " + "Standardised.xlsx", + sheet_name="Standardised Asset List" + ) + asset_list["epc_os_uprn"] = asset_list["epc_os_uprn"].astype(int).astype(str) + uprns = asset_list["epc_os_uprn"].tolist() + urls = [f"https://www.zoopla.co.uk/property/uprn/{uprn}/" for uprn in uprns] + + with Pool(processes=5) as pool: + estimates_list = list( + tqdm( + pool.imap(parallel_task, urls), + total=len(urls), + ) ) - soup = sb.get_beautiful_soup() + df = pd.DataFrame(estimates_list) + # Extract UPRN from URL + df["uprn"] = df["URL"].str.extract(r"uprn/(\d+)/") + df["valuation"] = df["Middle Estimate"].apply(parse_price) + df.to_csv("zoopla_estimates.csv", index=False) - estimates = soup.find_all("div", {"data-testid": "sale-estimate"}) - # Can change the way we extract the text here - estimate_text = ( - estimates[-1].find_all("p")[-1].find_all("span")[-1]["aria-label"] - ) - estimate_list.append(estimate_text) + df["uprn"] = df["uprn"].astype(int).astype(str) + + asset_list.merge(df[["uprn", "valuation"]], left_on="epc_os_uprn", right_on="uprn", how="left").to_excel( + "Property Box Finance Portfolio - Standardised - with valuations.xlsx", index=False + ) diff --git a/etl/webscrape/requirements.txt b/etl/webscrape/requirements.txt new file mode 100644 index 00000000..4027a224 --- /dev/null +++ b/etl/webscrape/requirements.txt @@ -0,0 +1,5 @@ +beautifulsoup4>=4.12.0 +pandas>=2.0.0 +stealth-requests>=1.0.7 +tqdm>=4.65.0 +openpyxl \ No newline at end of file From 504a714fc65ffe17f5f2c373ffeb3f0b2065956f Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 28 Oct 2025 13:43:10 +0000 Subject: [PATCH 15/23] fixed ranking algo for unfunded --- backend/engine/engine.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/backend/engine/engine.py b/backend/engine/engine.py index f2674290..f4152852 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -959,15 +959,19 @@ async def model_engine(body: PlanTriggerRequest): ) # Given the solutions we select the optimal one + # 1) If the scheme is ECO4, the full project funding and uplift are deducted from the cost + # 2) If the sheme is GBIS, the partial project funding and uplift are deducted from the cost + # 3) Otherwise, no funding is deducted from the cost solutions["cost_less_full_project_funding"] = np.where( - solutions["scheme"] == "eco4", - solutions["total_cost"] - solutions["full_project_funding"] - solutions["total_uplift"], - solutions["total_cost"] - solutions["partial_project_funding"] - solutions["total_uplift"] + solutions["scheme"] == "none", + solutions["total_cost"], + np.where( + solutions["scheme"] == "eco4", + solutions["total_cost"] - solutions["full_project_funding"] - solutions["total_uplift"], + solutions["total_cost"] - solutions["partial_project_funding"] - solutions["total_uplift"] + ) ) - solutions["cost_less_full_project_funding"] = ( - solutions["total_cost"] - solutions["full_project_funding"] - solutions["total_uplift"] - ) solutions = solutions.sort_values("cost_less_full_project_funding", ascending=True) if solutions["meets_upgrade_target"].any(): From b8fc16dac569a9282df0095168ae0868bb4cadf0 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 28 Oct 2025 14:41:28 +0000 Subject: [PATCH 16/23] added new measure --- backend/app/db/models/materials.py | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/app/db/models/materials.py b/backend/app/db/models/materials.py index 347b66d5..9b38addd 100644 --- a/backend/app/db/models/materials.py +++ b/backend/app/db/models/materials.py @@ -19,6 +19,7 @@ class MaterialType(enum.Enum): flat_roof_insulation = "flat_roof_insulation" room_roof_insulation = "room_roof_insulation" windows_glazing = "windows_glazing" + secondary_glazing = "secondary_glazing" cavity_wall_extraction = "cavity_wall_extraction" iwi_wall_demolition = "iwi_wall_demolition" From f20b22187086ca4be5578b8123ae4af5de4b9e7e Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 28 Oct 2025 17:24:51 +0000 Subject: [PATCH 17/23] fixed fetching of incorrect find my epc data --- asset_list/app.py | 34 ++++++++++++++++++++++++++++ asset_list/mappings/built_form.py | 4 +++- asset_list/mappings/property_type.py | 4 +++- etl/find_my_epc/RetrieveFindMyEpc.py | 29 ++++++++++++++++-------- 4 files changed, 60 insertions(+), 11 deletions(-) diff --git a/asset_list/app.py b/asset_list/app.py index 20cf04f1..bb5cb427 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -59,6 +59,40 @@ def app(): Property UPRN """ + # Stonewater Solar + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/October 2025 Solar" + data_filename = "Copy of AP Stonewater Ammended address list - PV AM Amended - Khalim initial review.xlsx" + sheet_name = "Proposed Sheet" + postcode_column = 'Postcode' + address1_column = None + address1_method = "house_number_extraction" + fulladdress_column = "Address" + address_cols_to_concat = [] + missing_postcodes_method = None + landlord_year_built = None + landlord_os_uprn = None + landlord_property_type = "Property Type" + landlord_built_form = "Property Type" + landlord_wall_construction = "Walls" + landlord_roof_construction = "Roofs" + landlord_heating_system = "Heating" + landlord_existing_pv = None + landlord_property_id = "Asset Id" + landlord_sap = "SAP" + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + outcomes_id = None + outcomes_address = None + master_filepaths = [] + master_id_colnames = [] + master_to_asset_list_filepath = None + phase = False + ecosurv_landlords = None + asset_list_header = 0 + landlord_block_reference = None + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Cambridge/" data_filename = "22.10_Cambridge_west addresses.xlsx" diff --git a/asset_list/mappings/built_form.py b/asset_list/mappings/built_form.py index b02b8810..2556d755 100644 --- a/asset_list/mappings/built_form.py +++ b/asset_list/mappings/built_form.py @@ -456,6 +456,8 @@ BUILT_FORM_MAPPINGS = { 'House: Mid Terrace: Ground Floor': 'ground floor', 'Maisonette: Semi Detached: Mid Floor': 'detached', 'Maisonette: Detached: Mid Floor': 'detached', - 'Bungalow: EnclosedMidTerrace': 'enclosed mid-terrace' + 'Bungalow: EnclosedMidTerrace': 'enclosed mid-terrace', + + 'House: EnclosedMidTerrace': 'enclosed mid-terrace' } diff --git a/asset_list/mappings/property_type.py b/asset_list/mappings/property_type.py index 88ec2934..1c236d96 100644 --- a/asset_list/mappings/property_type.py +++ b/asset_list/mappings/property_type.py @@ -360,6 +360,8 @@ PROPERTY_MAPPING = { 'House: Mid Terrace: Ground Floor': 'house', 'Bungalow: EnclosedMidTerrace': 'bungalow', 'Maisonette: Semi Detached: Mid Floor': 'maisonette', - 'Maisonette: Detached: Mid Floor': 'maisonette' + 'Maisonette: Detached: Mid Floor': 'maisonette', + + 'House: EnclosedMidTerrace': 'house' } diff --git a/etl/find_my_epc/RetrieveFindMyEpc.py b/etl/find_my_epc/RetrieveFindMyEpc.py index 21794284..5bb5e39b 100644 --- a/etl/find_my_epc/RetrieveFindMyEpc.py +++ b/etl/find_my_epc/RetrieveFindMyEpc.py @@ -718,15 +718,26 @@ class RetrieveFindMyEpc: find_epc_data = searcher.retrieve_newest_find_my_epc_data() except Exception as e: logger.error(f"Error retrieving find my epc data: {e}") - if epc["address1"] == epc["address"]: - # There's no benefit of using the same address, so we split on comma - address1 = epc["address"].split(",")[0] - else: - address1 = epc["address1"] - # We attempt with the backup add - searcher = cls(address=address1, postcode=epc["postcode"]) - find_epc_data = searcher.retrieve_newest_find_my_epc_data() - logger.info("Successfully retrieved find my epc data using backup address") + + # We try two backup approaches. The first is to trim the final section off the end of the address + address1 = ",".join(epc["address"].split(",")[:-1]) + try: + searcher = cls(address=address1, postcode=epc["postcode"]) + find_epc_data = searcher.retrieve_newest_find_my_epc_data() + logger.info("Successfully retrieved find my epc data using trimmed address") + except Exception as e2: + logger.error(f"Error retrieving find my epc data using trimmed address: {e2}") + # Attempt final approach + + if epc["address1"] == epc["address"]: + # There's no benefit of using the same address, so we split on comma + address1 = epc["address"].split(",")[0] + else: + address1 = epc["address1"] + # We attempt with the backup add + searcher = cls(address=address1, postcode=epc["postcode"]) + find_epc_data = searcher.retrieve_newest_find_my_epc_data() + logger.info("Successfully retrieved find my epc data using backup address") non_invasive_recommendations = { "uprn": epc["uprn"], From 1c94db54ef5e0020f494943876157528695869a3 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 28 Oct 2025 17:56:43 +0000 Subject: [PATCH 18/23] corrected eligibiltiy criteria for EPC E, F, G EWI projects --- recommendations/optimiser/funding_optimiser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recommendations/optimiser/funding_optimiser.py b/recommendations/optimiser/funding_optimiser.py index 73475fc0..8fbb13b2 100644 --- a/recommendations/optimiser/funding_optimiser.py +++ b/recommendations/optimiser/funding_optimiser.py @@ -896,7 +896,7 @@ def make_funding_paths(p, input_measures, housing_type, funding: Funding): # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # 1) The package must include EWI or IWI if the property is private rental sector # We check if we have any EWI or IWI measures available - only for EPC E or below - if p.data["current-energy-rating"] not in ["E", "F", "G"]: + if p.data["current-energy-rating"] in ["E", "F", "G"]: ewi_or_iwi = [{"OR": []}] reference_measures = [] # If we have EWI we add it in From ef934f6b7c13918e014182ee043514a18604c019 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 28 Oct 2025 19:21:53 +0000 Subject: [PATCH 19/23] debugged funding test --- backend/SearchEpc.py | 15 +- .../test_data/innovation_measure_fixtures.py | 40 +- backend/tests/test_funding.py | 102 +- backend/tests/test_integration.py | 1061 +++++++++-------- backend/tests/test_search_epc.py | 3 +- recommendations/tests/test_optimisers.py | 142 ++- 6 files changed, 756 insertions(+), 607 deletions(-) diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index 1a14e87a..60999e94 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -418,7 +418,20 @@ class SearchEpc: address, [", ".join([r["address"]]) for r in rows], score_cutoff=0 ) # Pick the largest score - if best_match1[1] >= best_match2[1]: + if best_match1[1] == best_match2[1]: + # if thery're the same, we'll work under the assumption that the addresses are the same and we'll + # take whichever has the newest EPC + rows_filtered = [ + r for r in rows + if (", ".join([r["address"], r["posttown"]]) == best_match1[0]) or + (r["address"] == best_match2[0]) + ] + rows_filtered = [ + r for r in rows_filtered + if r["lodgement-datetime"] == max([x["lodgement-datetime"] for x in rows_filtered]) + ] + + elif best_match1[1] > best_match2[1]: # Get all of the scores rows_filtered = [r for r in rows if ", ".join([r["address"], r["posttown"]]) == best_match1[0]] else: diff --git a/backend/tests/test_data/innovation_measure_fixtures.py b/backend/tests/test_data/innovation_measure_fixtures.py index 886421c4..a66cc7ec 100644 --- a/backend/tests/test_data/innovation_measure_fixtures.py +++ b/backend/tests/test_data/innovation_measure_fixtures.py @@ -4,7 +4,7 @@ innovation_scenarios = [ # 1) Innovation PV, non-eligible heating system in place, EPC D - not eligible { "description": "Innovation PV, non-eligible heating system in place, EPC D", - "measures": [{"type": "solar_pv", "is_innovation": True, "uplift": 0.45}], + "measures": [{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}], "starting_sap": 60, "mainheat_description": "Electric storage heaters", "heating_control_description": "Manual charge control", @@ -16,7 +16,7 @@ innovation_scenarios = [ # 2) Innovation PV, eligible heating system in place, EPC D - eligible { "description": "Innovation PV, eligible heating system in place, EPC D", - "measures": [{"type": "solar_pv", "is_innovation": True, "uplift": 0.45}], + "measures": [{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}], "starting_sap": 60, "mainheat_description": "Air source heat pump, radiators", "heating_control_description": "Programmer, room thermostat and TRVs", @@ -29,8 +29,8 @@ innovation_scenarios = [ { "description": "Innovation PV + HHRSH upgrade, EPC E", "measures": [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "high_heat_retention_storage_heater", "is_innovation": True, "uplift": 0.1} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "high_heat_retention_storage_heater", "is_innovation": False, "innovation_uplift": 0} ], "starting_sap": 50, "mainheat_description": "Electric storage heaters", @@ -44,8 +44,8 @@ innovation_scenarios = [ { "description": "Innovation PV + HHRSH upgrade, EPC E", "measures": [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "high_heat_retention_storage_heater", "is_innovation": True, "uplift": 0.1} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "high_heat_retention_storage_heater", "is_innovation": False, "innovation_uplift": 0} ], "starting_sap": 50, "mainheat_description": "Electric storage heaters", @@ -58,7 +58,7 @@ innovation_scenarios = [ # 5) Innovation PV, needs wall insulation, no wall insulation measure - not eligible { "description": "Innovation PV, wall insulation recommended, but not installed", - "measures": [{"type": "solar_pv", "is_innovation": True, "uplift": 0.45}], + "measures": [{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}], "starting_sap": 60, "mainheat_description": "Air source heat pump, radiators", "heating_control_description": "Programmer, room thermostat and TRVs", @@ -71,8 +71,8 @@ innovation_scenarios = [ { "description": "Innovation PV, wall insulation recommended and installed", "measures": [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0.25} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0.25} ], "starting_sap": 60, "mainheat_description": "Air source heat pump, radiators", @@ -85,7 +85,7 @@ innovation_scenarios = [ # 7) Innovation PV, needs roof insulation, no roof insulation measure - not eligible { "description": "Innovation PV, roof insulation recommended, not installed", - "measures": [{"type": "solar_pv", "is_innovation": True, "uplift": 0.45}], + "measures": [{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}], "starting_sap": 60, "mainheat_description": "Air source heat pump, radiators", "heating_control_description": "Programmer, room thermostat and TRVs", @@ -98,8 +98,8 @@ innovation_scenarios = [ { "description": "Innovation PV, roof insulation recommended and installed", "measures": [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "loft_insulation", "is_innovation": False, "uplift": 0} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0} ], "starting_sap": 60, "mainheat_description": "Air source heat pump, radiators", @@ -112,7 +112,7 @@ innovation_scenarios = [ # 9) Innovation PV, needs both roof + wall insulation, no insulation - not eligible { "description": "Innovation PV, both insulations recommended, none installed", - "measures": [{"type": "solar_pv", "is_innovation": True, "uplift": 0.45}], + "measures": [{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}], "starting_sap": 60, "mainheat_description": "Air source heat pump, radiators", "heating_control_description": "Programmer, room thermostat and TRVs", @@ -125,8 +125,8 @@ innovation_scenarios = [ { "description": "Innovation PV, both insulations recommended, only wall done", "measures": [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0.25} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0.25} ], "starting_sap": 60, "mainheat_description": "Air source heat pump, radiators", @@ -140,8 +140,8 @@ innovation_scenarios = [ { "description": "Innovation PV, both insulations recommended, only roof done", "measures": [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "loft_insulation", "is_innovation": False, "uplift": 0} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0} ], "starting_sap": 60, "mainheat_description": "Air source heat pump, radiators", @@ -155,9 +155,9 @@ innovation_scenarios = [ { "description": "Innovation PV, both insulations recommended and installed", "measures": [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0.25}, - {"type": "loft_insulation", "is_innovation": False, "uplift": 0} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0.25}, + {"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0} ], "starting_sap": 60, "mainheat_description": "Air source heat pump, radiators", diff --git a/backend/tests/test_funding.py b/backend/tests/test_funding.py index 59d65a28..d84480ce 100644 --- a/backend/tests/test_funding.py +++ b/backend/tests/test_funding.py @@ -120,7 +120,7 @@ def test_eco4_prs_eligible_with_swi( # 3) is getting a solid was measure # so it's eligible for ECO4 - measures = [{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}] + measures = [{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}] funding.check_funding( measures=measures, starting_sap=50, # EPC E @@ -162,7 +162,7 @@ def test_eco4_prs_not_eligible_high_epc( tenure="Private", ) - measures = [{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}] + measures = [{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}] funding.check_funding( measures=measures, starting_sap=72, # EPC C (too high) @@ -203,7 +203,7 @@ def test_gbis_prs_general_eligibility( tenure="Private", ) - measures = [{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}] + measures = [{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}] funding.check_funding( measures=measures, starting_sap=65, # EPC D @@ -244,7 +244,7 @@ def test_gbis_prs_low_income_caveat( tenure="Private", ) - measures = [{"type": "cavity_wall_insulation", "is_innovation": False, "uplift": 0}] + measures = [{"type": "cavity_wall_insulation", "is_innovation": False, "innovation_uplift": 0}] funding.check_funding( measures=measures, starting_sap=60, # EPC D @@ -290,7 +290,7 @@ def test_eco4_sh_epc_e_eligible( tenure="Social", ) - measures = [{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}] + measures = [{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}] funding.check_funding( measures=measures, starting_sap=50, # EPC E @@ -330,7 +330,7 @@ def test_eco4_sh_epc_d_requires_innovation( tenure="Social", ) - measures = [{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}] + measures = [{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}] funding.check_funding( measures=measures, starting_sap=60, # EPC D @@ -365,7 +365,7 @@ def test_eco4_sh_epc_d_requires_innovation( gbis_private_solid_abs_rate=28, tenure="Social", ) - measures2 = [{"type": "internal_wall_insulation", "is_innovation": True, "uplift": 0.25}] + measures2 = [{"type": "internal_wall_insulation", "is_innovation": True, "innovation_uplift": 0.25}] funding2.check_funding( measures=measures2, starting_sap=60, # EPC D @@ -403,7 +403,7 @@ def test_eco4_sh_epc_d_requires_innovation( gbis_private_solid_abs_rate=28, tenure="Social", ) - measures3 = [{"type": "solar_pv", "is_innovation": True, "uplift": 0.45}] + measures3 = [{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}] funding3.check_funding( measures=measures3, starting_sap=60, # EPC D @@ -439,7 +439,7 @@ def test_eco4_sh_epc_d_requires_innovation( tenure="Social", ) - measures4 = [{"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, ] + measures4 = [{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, ] funding4.check_funding( measures=measures4, starting_sap=60, # EPC D @@ -476,8 +476,8 @@ def test_eco4_sh_epc_d_requires_innovation( ) measures5 = [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "high_heat_retention_storage_heater", "is_innovation": False, "uplift": 0} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "high_heat_retention_storage_heater", "is_innovation": False, "innovation_uplift": 0} ] funding5.check_funding( measures=measures5, @@ -516,7 +516,7 @@ def test_eco4_sh_epc_d_requires_innovation( ) measures6 = [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, ] funding6.check_funding( measures=measures6, @@ -556,9 +556,9 @@ def test_eco4_sh_epc_d_requires_innovation( tenure="Social", ) measures7 = [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "cavity_wall_insulation", "is_innovation": False, "uplift": 0.25}, - {"type": "loft_insulation", "is_innovation": False, "uplift": 0} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "cavity_wall_insulation", "is_innovation": False, "innovation_uplift": 0.25}, + {"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0} ] funding7.check_funding( measures=measures7, @@ -599,7 +599,7 @@ def test_eco4_sh_solar_pv_requires_heating( tenure="Social", ) - measures = [{"type": "solar_pv", "is_innovation": True, "uplift": 0.45}] + measures = [{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}] funding.check_funding( measures=measures, starting_sap=60, # EPC D @@ -641,8 +641,8 @@ def test_eco4_sh_solar_pv_with_heating_is_ok( ) measures = [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "air_source_heat_pump", "is_innovation": False, "uplift": 0} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "air_source_heat_pump", "is_innovation": False, "innovation_uplift": 0} ] funding.check_funding( measures=measures, @@ -684,7 +684,7 @@ def test_eco4_upgrade_requirement_e_to_c_pass( tenure="Private", ) - measures = [{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}] + measures = [{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}] # E (SAP 50) → C (SAP 70) meets upgrade rule funding.check_funding( @@ -727,7 +727,7 @@ def test_eco4_upgrade_requirement_e_to_d_fail( tenure="Private", ) - measures = [{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}] + measures = [{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}] # E (SAP 50) → D (SAP 65) does NOT meet ECO4 upgrade rule funding.check_funding( @@ -770,7 +770,7 @@ def test_eco4_upgrade_requirement_f_to_d_pass( tenure="Private", ) - measures = [{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}] + measures = [{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}] # F (SAP 35) → D (SAP 60) is OK for ECO4 funding.check_funding( @@ -813,7 +813,7 @@ def test_eco4_upgrade_requirement_f_to_e_fail( tenure="Private", ) - measures = [{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}] + measures = [{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}] # F (SAP 35) → E (SAP 50) does NOT meet ECO4 rule funding.check_funding( @@ -859,7 +859,7 @@ def test_epc_d_social_no_innovation_no_heating( ) measures = [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45} ] funding.check_funding( @@ -905,10 +905,10 @@ def test_epc_d_social_with_heating_and_insulation( # Should NOT be eligible as the ASHP is not an innovation measure measures = [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}, - {"type": "loft_insulation", "is_innovation": False, "uplift": 0}, - {"type": "air_source_heat_pump", "is_innovation": False, "uplift": 0} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}, + {"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0}, + {"type": "air_source_heat_pump", "is_innovation": False, "innovation_uplift": 0} ] funding.check_funding( @@ -954,9 +954,9 @@ def test_epc_d_social_solar_with_only_minimum_insulation_should_fail( # Solar PV innovation with insulation, but no heating system upgrade => not eligible measures = [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}, - {"type": "loft_insulation", "is_innovation": False, "uplift": 0} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}, + {"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0} ] funding.check_funding( @@ -1002,8 +1002,8 @@ def test_epc_d_social_solar_with_ashp_and_no_insulation_should_fail( # Solar PV innovation with heating, but no insulation when insulation is recommended => not eligible measures = [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "air_source_heat_pump", "is_innovation": False, "uplift": 0} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "air_source_heat_pump", "is_innovation": False, "innovation_uplift": 0} ] funding.check_funding( @@ -1050,10 +1050,10 @@ def test_epc_d_social_solar_with_heating_and_minimum_insulation_should_pass( # Innovation solar + insulation measures + eligible heating upgrade = not valid because the heat pump isn;t # an innovation measure measures = [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}, - {"type": "loft_insulation", "is_innovation": False, "uplift": 0}, - {"type": "air_source_heat_pump", "is_innovation": False, "uplift": 0} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}, + {"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0}, + {"type": "air_source_heat_pump", "is_innovation": False, "innovation_uplift": 0} ] funding.check_funding( @@ -1095,10 +1095,10 @@ def test_epc_d_social_solar_with_heating_and_minimum_insulation_should_pass( # Innovation solar + insulation measures + eligible heating upgrade = should be valid because the # heat pump is an innovation measure measures2 = [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}, - {"type": "loft_insulation", "is_innovation": False, "uplift": 0}, - {"type": "air_source_heat_pump", "is_innovation": True, "uplift": 0.25} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}, + {"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0}, + {"type": "air_source_heat_pump", "is_innovation": True, "innovation_uplift": 0.25} ] funding2.check_funding( @@ -1203,11 +1203,11 @@ def test_uplift( # # TODO: Add a scenario with multiple measures, where some are innovation, some are not and we have # TODO: Make sure private works too measures = [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}, - {"type": "loft_insulation", "is_innovation": False, "uplift": 0}, - {"type": "air_source_heat_pump", "is_innovation": False, "uplift": 0}, - {"type": "cavity_wall_insulation", "is_innovation": False, "uplift": 0.25}, + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}, + {"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0}, + {"type": "air_source_heat_pump", "is_innovation": False, "innovation_uplift": 0}, + {"type": "cavity_wall_insulation", "is_innovation": False, "innovation_uplift": 0.25}, ] funding.check_funding( @@ -1229,7 +1229,7 @@ def test_uplift( ) assert funding.eco4_funding == 5302.3949999999995 - assert funding.full_project_abs == 392.77 # is 280 + the 112.77 innovation uplift + assert funding.full_project_abs == 280 # Doesn't include the eco4 uplift assert funding.eco4_uplift == 112.77 @@ -1311,7 +1311,7 @@ def test_private_epc_e_solar_needs_heating( tenure="Private", ) - measures = [{"type": "solar_pv", "is_innovation": True, "uplift": 0.45}] + measures = [{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}] funding.check_funding( measures=measures, starting_sap=54, # EPC E - eligible for private on EPC @@ -1360,10 +1360,10 @@ def test_private_epc_e_solar_with_heating_and_minimum_insulation_produces_uplift ) measures = [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "air_source_heat_pump", "is_innovation": False, "uplift": 0}, - {"type": "cavity_wall_insulation", "is_innovation": False, "uplift": 0}, - {"type": "loft_insulation", "is_innovation": False, "uplift": 0}, + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "air_source_heat_pump", "is_innovation": False, "innovation_uplift": 0}, + {"type": "cavity_wall_insulation", "is_innovation": False, "innovation_uplift": 0}, + {"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0}, ] funding.check_funding( diff --git a/backend/tests/test_integration.py b/backend/tests/test_integration.py index e6bcfce8..60778132 100644 --- a/backend/tests/test_integration.py +++ b/backend/tests/test_integration.py @@ -1,531 +1,532 @@ -import ast -import json -from copy import deepcopy -from dataclasses import replace -from datetime import datetime - -import random -from tqdm import tqdm -import pandas as pd -import numpy as np -from etl.epc.Record import EPCRecord -from backend.SearchEpc import SearchEpc -from sqlalchemy.exc import IntegrityError, OperationalError -from sqlalchemy.orm import sessionmaker -from starlette.responses import Response - -from backend.app.config import get_settings, get_prediction_buckets -from backend.app.db.connection import db_engine -from backend.app.db.functions.materials_functions import get_materials -from backend.app.db.functions.portfolio_functions import aggregate_portfolio_recommendations -from backend.app.db.functions.property_functions import ( - create_property, create_property_details_epc, create_property_targets, update_property_data, - update_or_create_property_spatial_details -) -from backend.app.db.functions.recommendations_functions import ( - create_plan, upload_recommendations, create_scenario -) -from backend.app.db.functions.funding_functions import upload_funding -from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn -from backend.app.db.models.portfolio import rating_lookup -from backend.app.plan.schemas import PlanTriggerRequest, WALL_INSULATION_MEASURES, ROOF_INSULATION_MEASURES -from backend.app.plan.utils import get_cleaned -from backend.app.utils import sap_to_epc -import backend.app.assumptions as assumptions - -from backend.ml_models.api import ModelApi -from backend.Property import Property -from backend.apis.GoogleSolarApi import GoogleSolarApi - -from recommendations.optimiser.CostOptimiser import CostOptimiser -from recommendations.optimiser.GainOptimiser import GainOptimiser -import recommendations.optimiser.optimiser_functions as optimiser_functions -from recommendations.Recommendations import Recommendations -from utils.logger import setup_logger -from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3, read_excel_from_s3 -from backend.ml_models.Valuation import PropertyValuation - -from etl.bill_savings.KwhData import KwhData -from etl.spatial.OpenUprnClient import OpenUprnClient -from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc - -from backend.Funding import Funding -from recommendations.optimiser.funding_optimiser import optimise_with_funding_paths -from recommendations.recommendation_utils import convert_thickness_to_numeric, get_wall_u_value - -# Input data (temp) -import pickle - -import pandas as pd - -with open("local_data_for_deletion.pkl", 'rb') as f: - local_data = pickle.load(f) - -cleaning_data = local_data["cleaning_data"] -materials = local_data["materials"] -cleaned = local_data["cleaned"] -project_scores_matrix = local_data["project_scores_matrix"] -partial_project_scores_matrix = local_data["partial_project_scores_matrix"] -whlg_eligible_postcodes = local_data["whlg_eligible_postcodes"] - -with open("kwh_client_for_deletion.pkl", "rb") as f: - kwh_client = pickle.load(f) - -epc_data = pd.read_csv( - "/Users/khalimconn-kowlessar/Downloads/all-domestic-certificates/domestic-E06000002-Middlesbrough/certificates.csv", - low_memory=False -) - -# TODO: Store this for cleaning -costs_by_floor_area = epc_data[ - pd.to_datetime(epc_data["LODGEMENT_DATE"]) >= "2024-01-01" - ][["TOTAL_FLOOR_AREA", "CURRENT_ENERGY_EFFICIENCY", "LIGHTING_COST_CURRENT", "HEATING_COST_CURRENT", - "HOT_WATER_COST_CURRENT"]].copy() - -costs_by_floor_area.columns = [c.lower().replace("_", "-") for c in costs_by_floor_area.columns] -for c in ["lighting-cost-current", "heating-cost-current", "hot-water-cost-current"]: - costs_by_floor_area[c + "_scaled"] = costs_by_floor_area[c] / costs_by_floor_area["total-floor-area"] - -costs_by_floor_area = costs_by_floor_area.groupby("current-energy-efficiency")[ - ["lighting-cost-current_scaled", "heating-cost-current_scaled", "hot-water-cost-current_scaled"] -].mean().reset_index() - -sample_epc_data = epc_data[pd.to_datetime(epc_data["LODGEMENT_DATE"]) >= "2015-01-01"].drop_duplicates("UPRN").sample( - 1000).reset_index(drop=True) - -# TODO: In Property find_energy_sources, sort out biomass community heating - what fuel type -# TODO: We might be able to remove find_energy_sources entirely and remove estimate_electrical_consumption. It's used -# in the google solar api but is it really needed? I don't think it's super accurate. It might be better to -# just use an average energy consumption by floor area for UK households? -# Load the input properties -input_properties = [] -for row_id, config in tqdm(sample_epc_data.iterrows(), total=len(sample_epc_data)): - epc = { - k.lower().replace("_", "-"): v if not pd.isnull(v) else None for k, v in config.items() - } - # Avoid the data load inside of EPCRecord - something we should pull out - for x in ["number-habitable-rooms", "floor-height", "number-heated-rooms"]: - if pd.isnull(epc[x]): - if x == "floor-height": - epc[x] = 2.4 - if x == "number-habitable-rooms": - epc[x] = 3 - if x == "number-heated-rooms": - epc[x] = 3 - - epc_records = {'original_epc': epc, 'full_sap_epc': {}, 'old_data': []} - - prepared_epc = EPCRecord( - epc_records=epc_records, - run_mode="newdata", - cleaning_data=cleaning_data, - ) - - input_properties.append( - Property( - id=row_id, - is_new=True, - address=epc["address"], - postcode=epc["postcode"], - epc_record=prepared_epc, - already_installed={}, - property_valuation={}, - non_invasive_recommendations=[], - energy_assessment=None, - **Property.extract_kwargs(config), # TODO: Depraecate this - ) - ) - -# For each property, insert the default solar configuration -for p in tqdm(input_properties): - solar_api = GoogleSolarApi( - api_key=None, solar_materials=[m for m in materials if m["type"] == "solar_pv"], max_retries=5 - ) - panel_performance = solar_api.default_panel_performance(property_instance=p) - p.set_solar_panel_configuration( - solar_panel_configuration={ - "insights_data": None, "panel_performance": panel_performance, "unit_share_of_energy": 1 - }, - ) - -# We mock kwh preds -mocked_kwh_predictions = {"heating_kwh_predictions": [], "hotwater_kwh_predictions": []} -for p in tqdm(input_properties): - mocked_kwh_predictions["heating_kwh_predictions"].append({ - "id": p.uprn, "predictions": random.sample(range(100, 3000), 1)[0] - }) - mocked_kwh_predictions["hotwater_kwh_predictions"].append({ - "id": p.uprn, "predictions": random.sample(range(100, 3000), 1)[0] - }) -mocked_kwh_predictions["heating_kwh_predictions"] = pd.DataFrame(mocked_kwh_predictions["heating_kwh_predictions"]) -mocked_kwh_predictions["hotwater_kwh_predictions"] = pd.DataFrame(mocked_kwh_predictions["hotwater_kwh_predictions"]) - -# TODO: We might want to implement this generally, via an ETL process -for p in input_properties: - for col in ["lighting-cost-current", "heating-cost-current", "hot-water-cost-current"]: - if pd.isnull(p.data[col]): - min_diff = abs( - (costs_by_floor_area["current-energy-efficiency"] - p.data["current-energy-efficiency"]) - ).min() - df = costs_by_floor_area[ - abs((costs_by_floor_area["current-energy-efficiency"] - p.data[ - "current-energy-efficiency"])) == min_diff - ] - if df.shape[0] > 1: - df = df.head(1) - p.data[col] = (df[col + "_scaled"] * p.data["total-floor-area"]).values[0] - -[ - p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=mocked_kwh_predictions) for p in - input_properties -] +# import ast +# import json +# from copy import deepcopy +# from dataclasses import replace +# from datetime import datetime +# +# import random +# from tqdm import tqdm +# import pandas as pd +# import numpy as np +# from etl.epc.Record import EPCRecord +# from backend.SearchEpc import SearchEpc +# from sqlalchemy.exc import IntegrityError, OperationalError +# from sqlalchemy.orm import sessionmaker +# from starlette.responses import Response +# +# from backend.app.config import get_settings, get_prediction_buckets +# from backend.app.db.connection import db_engine +# from backend.app.db.functions.materials_functions import get_materials +# from backend.app.db.functions.portfolio_functions import aggregate_portfolio_recommendations +# from backend.app.db.functions.property_functions import ( +# create_property, create_property_details_epc, create_property_targets, update_property_data, +# update_or_create_property_spatial_details +# ) +# from backend.app.db.functions.recommendations_functions import ( +# create_plan, upload_recommendations, create_scenario +# ) +# from backend.app.db.functions.funding_functions import upload_funding +# from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn +# from backend.app.db.models.portfolio import rating_lookup +# from backend.app.plan.schemas import PlanTriggerRequest, WALL_INSULATION_MEASURES, ROOF_INSULATION_MEASURES +# from backend.app.plan.utils import get_cleaned +# from backend.app.utils import sap_to_epc +# import backend.app.assumptions as assumptions +# +# from backend.ml_models.api import ModelApi +# from backend.Property import Property +# from backend.apis.GoogleSolarApi import GoogleSolarApi +# +# from recommendations.optimiser.CostOptimiser import CostOptimiser +# from recommendations.optimiser.GainOptimiser import GainOptimiser +# import recommendations.optimiser.optimiser_functions as optimiser_functions +# from recommendations.Recommendations import Recommendations +# from utils.logger import setup_logger +# from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3, read_excel_from_s3 +# from backend.ml_models.Valuation import PropertyValuation +# +# from etl.bill_savings.KwhData import KwhData +# from etl.spatial.OpenUprnClient import OpenUprnClient +# from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc +# +# from backend.Funding import Funding +# from recommendations.optimiser.funding_optimiser import optimise_with_funding_paths +# from recommendations.recommendation_utils import convert_thickness_to_numeric, get_wall_u_value +# +# # Input data (temp) +# import pickle +# +# import pandas as pd +# +# with open("local_data_for_deletion.pkl", 'rb') as f: +# local_data = pickle.load(f) +# +# cleaning_data = local_data["cleaning_data"] +# materials = local_data["materials"] +# cleaned = local_data["cleaned"] +# project_scores_matrix = local_data["project_scores_matrix"] +# partial_project_scores_matrix = local_data["partial_project_scores_matrix"] +# whlg_eligible_postcodes = local_data["whlg_eligible_postcodes"] +# +# with open("kwh_client_for_deletion.pkl", "rb") as f: +# kwh_client = pickle.load(f) +# +# epc_data = pd.read_csv( +# "/Users/khalimconn-kowlessar/Downloads/all-domestic-certificates/domestic-E06000002-Middlesbrough/certificates +# .csv", +# low_memory=False +# ) +# +# # TODO: Store this for cleaning +# costs_by_floor_area = epc_data[ +# pd.to_datetime(epc_data["LODGEMENT_DATE"]) >= "2024-01-01" +# ][["TOTAL_FLOOR_AREA", "CURRENT_ENERGY_EFFICIENCY", "LIGHTING_COST_CURRENT", "HEATING_COST_CURRENT", +# "HOT_WATER_COST_CURRENT"]].copy() +# +# costs_by_floor_area.columns = [c.lower().replace("_", "-") for c in costs_by_floor_area.columns] +# for c in ["lighting-cost-current", "heating-cost-current", "hot-water-cost-current"]: +# costs_by_floor_area[c + "_scaled"] = costs_by_floor_area[c] / costs_by_floor_area["total-floor-area"] +# +# costs_by_floor_area = costs_by_floor_area.groupby("current-energy-efficiency")[ +# ["lighting-cost-current_scaled", "heating-cost-current_scaled", "hot-water-cost-current_scaled"] +# ].mean().reset_index() +# +# sample_epc_data = epc_data[pd.to_datetime(epc_data["LODGEMENT_DATE"]) >= "2015-01-01"].drop_duplicates("UPRN").sample( +# 1000).reset_index(drop=True) +# +# # TODO: In Property find_energy_sources, sort out biomass community heating - what fuel type +# # TODO: We might be able to remove find_energy_sources entirely and remove estimate_electrical_consumption. It's used +# # in the google solar api but is it really needed? I don't think it's super accurate. It might be better to +# # just use an average energy consumption by floor area for UK households? +# # Load the input properties +# input_properties = [] +# for row_id, config in tqdm(sample_epc_data.iterrows(), total=len(sample_epc_data)): +# epc = { +# k.lower().replace("_", "-"): v if not pd.isnull(v) else None for k, v in config.items() +# } +# # Avoid the data load inside of EPCRecord - something we should pull out +# for x in ["number-habitable-rooms", "floor-height", "number-heated-rooms"]: +# if pd.isnull(epc[x]): +# if x == "floor-height": +# epc[x] = 2.4 +# if x == "number-habitable-rooms": +# epc[x] = 3 +# if x == "number-heated-rooms": +# epc[x] = 3 +# +# epc_records = {'original_epc': epc, 'full_sap_epc': {}, 'old_data': []} +# +# prepared_epc = EPCRecord( +# epc_records=epc_records, +# run_mode="newdata", +# cleaning_data=cleaning_data, +# ) +# +# input_properties.append( +# Property( +# id=row_id, +# is_new=True, +# address=epc["address"], +# postcode=epc["postcode"], +# epc_record=prepared_epc, +# already_installed={}, +# property_valuation={}, +# non_invasive_recommendations=[], +# energy_assessment=None, +# **Property.extract_kwargs(config), # TODO: Depraecate this +# ) +# ) +# +# # For each property, insert the default solar configuration +# for p in tqdm(input_properties): +# solar_api = GoogleSolarApi( +# api_key=None, solar_materials=[m for m in materials if m["type"] == "solar_pv"], max_retries=5 +# ) +# panel_performance = solar_api.default_panel_performance(property_instance=p) +# p.set_solar_panel_configuration( +# solar_panel_configuration={ +# "insights_data": None, "panel_performance": panel_performance, "unit_share_of_energy": 1 +# }, +# ) +# +# # We mock kwh preds +# mocked_kwh_predictions = {"heating_kwh_predictions": [], "hotwater_kwh_predictions": []} +# for p in tqdm(input_properties): +# mocked_kwh_predictions["heating_kwh_predictions"].append({ +# "id": p.uprn, "predictions": random.sample(range(100, 3000), 1)[0] +# }) +# mocked_kwh_predictions["hotwater_kwh_predictions"].append({ +# "id": p.uprn, "predictions": random.sample(range(100, 3000), 1)[0] +# }) +# mocked_kwh_predictions["heating_kwh_predictions"] = pd.DataFrame(mocked_kwh_predictions["heating_kwh_predictions"]) +# mocked_kwh_predictions["hotwater_kwh_predictions"] = pd.DataFrame(mocked_kwh_predictions["hotwater_kwh_predictions"]) +# +# # TODO: We might want to implement this generally, via an ETL process # for p in input_properties: -# p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=mocked_kwh_predictions) - -# Run the recommendations -recommendations = {} -recommendations_scoring_data = [] -representative_recommendations = {} -for p in tqdm(input_properties): - if p.data["property-type"] == "House" and pd.isnull(p.data["built-form"]): - p.data["built-form"] = "Semi-Detached" - recommender = Recommendations( - property_instance=p, - materials=materials, - exclusions=[], - inclusions=[], - default_u_values=True - ) - property_recommendations, property_representative_recommendations = recommender.recommend() - - if not property_recommendations: - continue - - recommendations[p.id] = property_recommendations - representative_recommendations[p.id] = property_representative_recommendations - - p.create_base_difference_epc_record(cleaned_lookup=cleaned) - p.adjust_difference_record_with_recommendations( - property_recommendations, property_representative_recommendations - ) - - recommendations_scoring_data.extend(p.recommendations_scoring_data) - -recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data) -recommendations_scoring_data = recommendations_scoring_data.drop( - columns=[ - "rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", - "carbon_ending" - ] -) - -model_predictions_mocked = { - "sap_change_predictions": None, - "heat_demand_predictions": None, - "carbon_change_predictions": None, - "heating_kwh_predictions": None, - "hotwater_kwh_predictions": None, -} - -for k in model_predictions_mocked.keys(): - model_predictions_mocked[k] = recommendations_scoring_data[["id"]].copy() - model_predictions_mocked[k][['property_id', 'recommendation_id']] = ( - model_predictions_mocked[k]['id'].str.split('+', expand=True) - ) - model_predictions_mocked[k]['phase'] = model_predictions_mocked[k]['recommendation_id'].apply( - ModelApi.extract_phase) - - if k in ["heating_kwh_predictions", "hotwater_kwh_predictions"]: - model_predictions_mocked[k]["predictions"] = random.choices(range(100, 3000), - k=len(recommendations_scoring_data)) - continue - - model_predictions_mocked[k] = model_predictions_mocked[k].sort_values(["property_id", "phase"], ascending=True) - preds = [] - for p_id in model_predictions_mocked[k]["property_id"].unique(): - # We add some amount each time - p = [p for p in input_properties if str(p.id) == p_id][0] - if k == "sap_change_predictions": - start = p.data["current-energy-efficiency"] - elif k == "heat_demand_predictions": - start = p.data["energy-consumption-current"] - else: - start = p.data["co2-emissions-current"] - df = model_predictions_mocked[k][model_predictions_mocked[k]["property_id"] == p_id].copy() - # Add some amount each time - to_add = random.choices(range(0, 15), k=len(df)) - to_add = np.cumsum(to_add) - df["predictions"] = start + to_add - preds.append(df) - preds = pd.concat(preds) - model_predictions_mocked[k] = preds - -for property_id in tqdm(recommendations.keys(), total=len(recommendations)): - property_instance = [p for p in input_properties if p.id == property_id][0] - - recommendations_with_impact, impact_summary = ( - Recommendations.calculate_recommendation_impact( - property_instance=property_instance, - all_predictions=model_predictions_mocked, - recommendations=recommendations, - representative_recommendations=representative_recommendations - ) - ) - - # We use the impact_summary to update the simulation_epcs with the new SAP, heat demand, carbon, cost etc - # at each phase - property_instance.update_simulation_epcs(impact_summary) - recommendations[property_id] = recommendations_with_impact - -for property_id in tqdm([p.id for p in input_properties]): - property_recommendations = recommendations.get(property_id, []) - property_instance = [p for p in input_properties if p.id == property_id][0] - - property_current_energy_bill = ( - Recommendations.calculate_recommendation_tenant_savings( - property_instance=property_instance, - kwh_simulation_predictions=model_predictions_mocked, - property_recommendations=property_recommendations, - ashp_cop=2.8 - ) - ) - property_instance.current_energy_bill = property_current_energy_bill - -body = PlanTriggerRequest( - **{'budget': None, 'goal': 'Increasing EPC', 'housing_type': 'Social', 'goal_value': 'B', 'portfolio_id': 0, - 'trigger_file_path': '', 'already_installed_file_path': '', - 'patches_file_path': None, 'non_invasive_recommendations_file_path': None, - 'valuation_file_path': '', - 'required_measures': [], 'scenario_name': 'EPC B', 'scenario_id': None, - 'multi_plan': True, 'optimise': True, 'default_u_values': True, 'ashp_cop': 2.8, - 'event_type': 'remote_assessment', 'simulate_sap_10': False, 'file_type': None, 'file_format': None, - 'sheet_name': None, 'sheet_count': None, 'index_start': None, 'index_end': None} -) - -for p in tqdm(input_properties): - if not recommendations.get(p.id): - continue - - # we need to double unlist because we have a list of lists - property_measure_types = {rec["type"] for recs in recommendations[p.id] for rec in recs} - property_required_measures = [m for m in recommendations[p.id] if m[0]["type"] in body.required_measures] - measures_to_optimise = [m for m in recommendations[p.id] if m[0]["type"] not in body.required_measures] - - # If a measure requiring ventilation is selected, and the property does not have ventilation, we enfore - # its inclusion - needs_ventilation = any( - x in property_measure_types for x in assumptions.measures_needing_ventilation - ) and not p.has_ventilation - - if not measures_to_optimise: - # Nothing to do, we just reshape the recommendations - recommendations[p.id] = optimiser_functions.flatten_recommendations_with_defaults( - p.id, recommendations, set() - ) - continue - - fixed_gain = optimiser_functions.calculate_fixed_gain( - property_required_measures, recommendations, p, needs_ventilation - ) - gain = optimiser_functions.calculate_gain(body=body, p=p, fixed_gain=fixed_gain) - - funding = Funding( - tenure="Social", - project_scores_matrix=project_scores_matrix, - partial_project_scores_matrix=partial_project_scores_matrix, - whlg_eligible_postcodes=whlg_eligible_postcodes, - eco4_social_cavity_abs_rate=12.5, - eco4_social_solid_abs_rate=17, - eco4_private_cavity_abs_rate=12.5, - eco4_private_solid_abs_rate=17, - gbis_social_cavity_abs_rate=21, - gbis_social_solid_abs_rate=25, - gbis_private_cavity_abs_rate=21, - gbis_private_solid_abs_rate=28, - ) - - li_thickness = convert_thickness_to_numeric( - p.roof["insulation_thickness"], p.roof["is_pitched"], p.roof["is_flat"] - ) - current_wall_u_value = p.walls["thermal_transmittance"] - if current_wall_u_value is None: - current_wall_u_value = get_wall_u_value( - clean_description=p.walls["clean_description"], - age_band=p.age_band, - is_granite_or_whinstone=p.walls["is_granite_or_whinstone"], - is_sandstone_or_limestone=p.walls["is_sandstone_or_limestone"], - ) - - # We insert the innovation uplift - measures_to_optimise_with_uplift = deepcopy(measures_to_optimise) - - # TODO: Turn this into a function and store the innovaiton uplift - for group in measures_to_optimise_with_uplift: - for r in group: - - if r["type"] in ["mechanical_ventilation", "low_energy_lighting", "secondary_heating", - "extension_cavity_wall_insulation", "draught_proofing", "sealing_open_fireplace"]: - ( - r["partial_project_score"], - r["partial_project_funding"], - r["innovation_uplift"], - r["uplift_project_score"], - ) = ( - 0, 0, 0, 0 - ) - continue - - ( - r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"], - r["uplift_project_score"] - ) = funding.get_innovation_uplift( - measure=r, - starting_sap=p.data["current-energy-efficiency"], - floor_area=p.floor_area, - is_cavity=p.walls["is_cavity_wall"], - current_wall_uvalue=current_wall_u_value, - is_partial="partial" in p.walls["clean_description"].lower(), - existing_li_thickness=li_thickness, - mainheating=p.main_heating, - main_fuel=p.main_fuel, - mainheat_energy_eff=p.data["mainheat-energy-eff"], - ) - - input_measures = optimiser_functions.prepare_input_measures( - measures_to_optimise_with_uplift, body.goal, needs_ventilation, funding=True - ) - - # When the goal is Increasing EPC, we can run the funding optimiser - if body.goal == "Increasing EPC": - - solutions = optimise_with_funding_paths( - p=p, - input_measures=input_measures, - housing_type=body.housing_type, - budget=body.budget, - target_gain=gain, - funding=funding - ) - - # Given the solutions we select the optimal one - solutions["cost_less_full_project_funding"] = np.where( - solutions["scheme"] == "eco4", - solutions["total_cost"] - solutions["full_project_funding"] - solutions["total_uplift"], - solutions["total_cost"] - solutions["partial_project_funding"] - solutions["total_uplift"] - ) - - solutions["cost_less_full_project_funding"] = ( - solutions["total_cost"] - solutions["full_project_funding"] - solutions["total_uplift"] - ) - solutions = solutions.sort_values("cost_less_full_project_funding", ascending=True) - - if solutions["meets_upgrade_target"].any(): - # If we have a solution that meets the upgrade target, we select that one - optimal_solution = solutions[solutions["meets_upgrade_target"]].iloc[0] - else: - # Pick the cheapest - optimal_solution = solutions.iloc[0] - - # This is the list of measures that we will recommend - scheme = optimal_solution["scheme"] - funded_measures = optimal_solution["items"] if scheme != "none" else [] - solution = optimal_solution["items"] + optimal_solution["unfunded_items"] - # This is the total amount of funding that the project will produce (including uplifts) (£) - project_funding = optimal_solution["full_project_funding"] if scheme == "eco4" else \ - optimal_solution["partial_project_funding"] - # This is the total amount of funding associated to the uplift (£) - total_uplift = optimal_solution["total_uplift"] - # This is the funding scheme selected - # This is the full project ABS - full_project_score = optimal_solution["project_score"] - # This is the partial project ABS - partial_project_score = optimal_solution["partial_project_score"] - # This is the uplift score ABS - uplift_project_score = optimal_solution["total_uplift_score"] - else: - # We optimise and then we determine eligibility for funding, based on the measures selected - optimiser = ( - GainOptimiser( - input_measures, max_cost=body.budget, max_gain=gain, allow_slack=False - ) if body.budget else CostOptimiser(input_measures, min_gain=gain) - ) - optimiser.setup() - optimiser.solve() - solution = optimiser.solution - - recommendation_types = [] - for measures in input_measures: - for measure in measures: - recommendation_types.append(measure["type"]) - recommendation_types = set(recommendation_types) - - has_wall_insulation_recommendation = any( - (m in recommendation_types or "+".join([m, "mechanical_ventilation"])) for m in - WALL_INSULATION_MEASURES - ) - has_roof_insulation_recommendation = any( - (m in recommendation_types or "+".join([m, "mechanical_ventilation"])) for m in - ROOF_INSULATION_MEASURES - ) - - funding.check_funding( - measures=solution, - starting_sap=p.data["current-energy-efficiency"], - ending_sap=p.data["current-energy-efficiency"] + sum([x["gain"] for x in solution]), - floor_area=p.floor_area, - mainheat_description=p.main_heating["clean_description"], - heating_control_description=p.main_heating_controls["clean_description"], - is_cavity=p.walls["is_cavity_wall"], - current_wall_uvalue=current_wall_u_value, - is_partial="partial" in p.walls["clean_description"].lower(), - existing_li_thickness=li_thickness, - mainheating=p.main_heating, - main_fuel=p.main_fuel, - mainheat_energy_eff=p.data["mainheat-energy-eff"], - has_wall_insulation_recommendation=has_wall_insulation_recommendation, - has_roof_insulation_recommendation=has_roof_insulation_recommendation, - ) - - # Determine the scheme - scheme = "none" - if funding.eco4_eligible: - scheme = "eco4" - if scheme == "none" and funding.gbis_eligible: - scheme = "gbis" - - funded_measures = solution if scheme in ["gbis", "eco4"] else [] - project_funding = 0 if funding.full_project_abs is not None else funding.full_project_abs - total_uplift = funding.eco4_uplift - full_project_score = 0 if funding.full_project_abs is not None else funding.full_project_abs - partial_project_score = funding.partial_project_abs - uplift_project_score = funding.eco4_uplift if scheme == "eco4" else funding.gbis_uplift - - selected = {r["id"] for r in solution} - - if property_required_measures: - solution = optimiser_functions.add_required_measures( - property_id=p.id, property_required_measures=property_required_measures, - recommendations=recommendations, selected=selected, - ) - - # Add best practice measures (ventilation/trickle vents) - selected = optimiser_functions.add_best_practice_measures(p.id, solution, recommendations, selected) - # Final flattening - Don't do this! - # recommendations[p.id] = optimiser_functions.flatten_recommendations_with_defaults( - # p.id, recommendations, selected - # ) - - # TODO: functionise - for measure in funded_measures: - if "+mechanical_ventilation" in measure["type"]: - measure["type"] = measure["type"].split("+mechanical_ventilation")[0] - - p.insert_funding( - scheme=scheme, - funded_measures=funded_measures, - project_funding=project_funding, - total_uplift=total_uplift, - full_project_score=full_project_score, - partial_project_score=partial_project_score, - uplift_project_score=uplift_project_score - ) +# for col in ["lighting-cost-current", "heating-cost-current", "hot-water-cost-current"]: +# if pd.isnull(p.data[col]): +# min_diff = abs( +# (costs_by_floor_area["current-energy-efficiency"] - p.data["current-energy-efficiency"]) +# ).min() +# df = costs_by_floor_area[ +# abs((costs_by_floor_area["current-energy-efficiency"] - p.data[ +# "current-energy-efficiency"])) == min_diff +# ] +# if df.shape[0] > 1: +# df = df.head(1) +# p.data[col] = (df[col + "_scaled"] * p.data["total-floor-area"]).values[0] +# +# [ +# p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=mocked_kwh_predictions) for p in +# input_properties +# ] +# # for p in input_properties: +# # p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=mocked_kwh_predictions) +# +# # Run the recommendations +# recommendations = {} +# recommendations_scoring_data = [] +# representative_recommendations = {} +# for p in tqdm(input_properties): +# if p.data["property-type"] == "House" and pd.isnull(p.data["built-form"]): +# p.data["built-form"] = "Semi-Detached" +# recommender = Recommendations( +# property_instance=p, +# materials=materials, +# exclusions=[], +# inclusions=[], +# default_u_values=True +# ) +# property_recommendations, property_representative_recommendations = recommender.recommend() +# +# if not property_recommendations: +# continue +# +# recommendations[p.id] = property_recommendations +# representative_recommendations[p.id] = property_representative_recommendations +# +# p.create_base_difference_epc_record(cleaned_lookup=cleaned) +# p.adjust_difference_record_with_recommendations( +# property_recommendations, property_representative_recommendations +# ) +# +# recommendations_scoring_data.extend(p.recommendations_scoring_data) +# +# recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data) +# recommendations_scoring_data = recommendations_scoring_data.drop( +# columns=[ +# "rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", +# "carbon_ending" +# ] +# ) +# +# model_predictions_mocked = { +# "sap_change_predictions": None, +# "heat_demand_predictions": None, +# "carbon_change_predictions": None, +# "heating_kwh_predictions": None, +# "hotwater_kwh_predictions": None, +# } +# +# for k in model_predictions_mocked.keys(): +# model_predictions_mocked[k] = recommendations_scoring_data[["id"]].copy() +# model_predictions_mocked[k][['property_id', 'recommendation_id']] = ( +# model_predictions_mocked[k]['id'].str.split('+', expand=True) +# ) +# model_predictions_mocked[k]['phase'] = model_predictions_mocked[k]['recommendation_id'].apply( +# ModelApi.extract_phase) +# +# if k in ["heating_kwh_predictions", "hotwater_kwh_predictions"]: +# model_predictions_mocked[k]["predictions"] = random.choices(range(100, 3000), +# k=len(recommendations_scoring_data)) +# continue +# +# model_predictions_mocked[k] = model_predictions_mocked[k].sort_values(["property_id", "phase"], ascending=True) +# preds = [] +# for p_id in model_predictions_mocked[k]["property_id"].unique(): +# # We add some amount each time +# p = [p for p in input_properties if str(p.id) == p_id][0] +# if k == "sap_change_predictions": +# start = p.data["current-energy-efficiency"] +# elif k == "heat_demand_predictions": +# start = p.data["energy-consumption-current"] +# else: +# start = p.data["co2-emissions-current"] +# df = model_predictions_mocked[k][model_predictions_mocked[k]["property_id"] == p_id].copy() +# # Add some amount each time +# to_add = random.choices(range(0, 15), k=len(df)) +# to_add = np.cumsum(to_add) +# df["predictions"] = start + to_add +# preds.append(df) +# preds = pd.concat(preds) +# model_predictions_mocked[k] = preds +# +# for property_id in tqdm(recommendations.keys(), total=len(recommendations)): +# property_instance = [p for p in input_properties if p.id == property_id][0] +# +# recommendations_with_impact, impact_summary = ( +# Recommendations.calculate_recommendation_impact( +# property_instance=property_instance, +# all_predictions=model_predictions_mocked, +# recommendations=recommendations, +# representative_recommendations=representative_recommendations +# ) +# ) +# +# # We use the impact_summary to update the simulation_epcs with the new SAP, heat demand, carbon, cost etc +# # at each phase +# property_instance.update_simulation_epcs(impact_summary) +# recommendations[property_id] = recommendations_with_impact +# +# for property_id in tqdm([p.id for p in input_properties]): +# property_recommendations = recommendations.get(property_id, []) +# property_instance = [p for p in input_properties if p.id == property_id][0] +# +# property_current_energy_bill = ( +# Recommendations.calculate_recommendation_tenant_savings( +# property_instance=property_instance, +# kwh_simulation_predictions=model_predictions_mocked, +# property_recommendations=property_recommendations, +# ashp_cop=2.8 +# ) +# ) +# property_instance.current_energy_bill = property_current_energy_bill +# +# body = PlanTriggerRequest( +# **{'budget': None, 'goal': 'Increasing EPC', 'housing_type': 'Social', 'goal_value': 'B', 'portfolio_id': 0, +# 'trigger_file_path': '', 'already_installed_file_path': '', +# 'patches_file_path': None, 'non_invasive_recommendations_file_path': None, +# 'valuation_file_path': '', +# 'required_measures': [], 'scenario_name': 'EPC B', 'scenario_id': None, +# 'multi_plan': True, 'optimise': True, 'default_u_values': True, 'ashp_cop': 2.8, +# 'event_type': 'remote_assessment', 'simulate_sap_10': False, 'file_type': None, 'file_format': None, +# 'sheet_name': None, 'sheet_count': None, 'index_start': None, 'index_end': None} +# ) +# +# for p in tqdm(input_properties): +# if not recommendations.get(p.id): +# continue +# +# # we need to double unlist because we have a list of lists +# property_measure_types = {rec["type"] for recs in recommendations[p.id] for rec in recs} +# property_required_measures = [m for m in recommendations[p.id] if m[0]["type"] in body.required_measures] +# measures_to_optimise = [m for m in recommendations[p.id] if m[0]["type"] not in body.required_measures] +# +# # If a measure requiring ventilation is selected, and the property does not have ventilation, we enfore +# # its inclusion +# needs_ventilation = any( +# x in property_measure_types for x in assumptions.measures_needing_ventilation +# ) and not p.has_ventilation +# +# if not measures_to_optimise: +# # Nothing to do, we just reshape the recommendations +# recommendations[p.id] = optimiser_functions.flatten_recommendations_with_defaults( +# p.id, recommendations, set() +# ) +# continue +# +# fixed_gain = optimiser_functions.calculate_fixed_gain( +# property_required_measures, recommendations, p, needs_ventilation +# ) +# gain = optimiser_functions.calculate_gain(body=body, p=p, fixed_gain=fixed_gain) +# +# funding = Funding( +# tenure="Social", +# project_scores_matrix=project_scores_matrix, +# partial_project_scores_matrix=partial_project_scores_matrix, +# whlg_eligible_postcodes=whlg_eligible_postcodes, +# eco4_social_cavity_abs_rate=12.5, +# eco4_social_solid_abs_rate=17, +# eco4_private_cavity_abs_rate=12.5, +# eco4_private_solid_abs_rate=17, +# gbis_social_cavity_abs_rate=21, +# gbis_social_solid_abs_rate=25, +# gbis_private_cavity_abs_rate=21, +# gbis_private_solid_abs_rate=28, +# ) +# +# li_thickness = convert_thickness_to_numeric( +# p.roof["insulation_thickness"], p.roof["is_pitched"], p.roof["is_flat"] +# ) +# current_wall_u_value = p.walls["thermal_transmittance"] +# if current_wall_u_value is None: +# current_wall_u_value = get_wall_u_value( +# clean_description=p.walls["clean_description"], +# age_band=p.age_band, +# is_granite_or_whinstone=p.walls["is_granite_or_whinstone"], +# is_sandstone_or_limestone=p.walls["is_sandstone_or_limestone"], +# ) +# +# # We insert the innovation uplift +# measures_to_optimise_with_uplift = deepcopy(measures_to_optimise) +# +# # TODO: Turn this into a function and store the innovaiton uplift +# for group in measures_to_optimise_with_uplift: +# for r in group: +# +# if r["type"] in ["mechanical_ventilation", "low_energy_lighting", "secondary_heating", +# "extension_cavity_wall_insulation", "draught_proofing", "sealing_open_fireplace"]: +# ( +# r["partial_project_score"], +# r["partial_project_funding"], +# r["innovation_uplift"], +# r["uplift_project_score"], +# ) = ( +# 0, 0, 0, 0 +# ) +# continue +# +# ( +# r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"], +# r["uplift_project_score"] +# ) = funding.get_innovation_uplift( +# measure=r, +# starting_sap=p.data["current-energy-efficiency"], +# floor_area=p.floor_area, +# is_cavity=p.walls["is_cavity_wall"], +# current_wall_uvalue=current_wall_u_value, +# is_partial="partial" in p.walls["clean_description"].lower(), +# existing_li_thickness=li_thickness, +# mainheating=p.main_heating, +# main_fuel=p.main_fuel, +# mainheat_energy_eff=p.data["mainheat-energy-eff"], +# ) +# +# input_measures = optimiser_functions.prepare_input_measures( +# measures_to_optimise_with_uplift, body.goal, needs_ventilation, funding=True +# ) +# +# # When the goal is Increasing EPC, we can run the funding optimiser +# if body.goal == "Increasing EPC": +# +# solutions = optimise_with_funding_paths( +# p=p, +# input_measures=input_measures, +# housing_type=body.housing_type, +# budget=body.budget, +# target_gain=gain, +# funding=funding +# ) +# +# # Given the solutions we select the optimal one +# solutions["cost_less_full_project_funding"] = np.where( +# solutions["scheme"] == "eco4", +# solutions["total_cost"] - solutions["full_project_funding"] - solutions["total_uplift"], +# solutions["total_cost"] - solutions["partial_project_funding"] - solutions["total_uplift"] +# ) +# +# solutions["cost_less_full_project_funding"] = ( +# solutions["total_cost"] - solutions["full_project_funding"] - solutions["total_uplift"] +# ) +# solutions = solutions.sort_values("cost_less_full_project_funding", ascending=True) +# +# if solutions["meets_upgrade_target"].any(): +# # If we have a solution that meets the upgrade target, we select that one +# optimal_solution = solutions[solutions["meets_upgrade_target"]].iloc[0] +# else: +# # Pick the cheapest +# optimal_solution = solutions.iloc[0] +# +# # This is the list of measures that we will recommend +# scheme = optimal_solution["scheme"] +# funded_measures = optimal_solution["items"] if scheme != "none" else [] +# solution = optimal_solution["items"] + optimal_solution["unfunded_items"] +# # This is the total amount of funding that the project will produce (including uplifts) (£) +# project_funding = optimal_solution["full_project_funding"] if scheme == "eco4" else \ +# optimal_solution["partial_project_funding"] +# # This is the total amount of funding associated to the uplift (£) +# total_uplift = optimal_solution["total_uplift"] +# # This is the funding scheme selected +# # This is the full project ABS +# full_project_score = optimal_solution["project_score"] +# # This is the partial project ABS +# partial_project_score = optimal_solution["partial_project_score"] +# # This is the uplift score ABS +# uplift_project_score = optimal_solution["total_uplift_score"] +# else: +# # We optimise and then we determine eligibility for funding, based on the measures selected +# optimiser = ( +# GainOptimiser( +# input_measures, max_cost=body.budget, max_gain=gain, allow_slack=False +# ) if body.budget else CostOptimiser(input_measures, min_gain=gain) +# ) +# optimiser.setup() +# optimiser.solve() +# solution = optimiser.solution +# +# recommendation_types = [] +# for measures in input_measures: +# for measure in measures: +# recommendation_types.append(measure["type"]) +# recommendation_types = set(recommendation_types) +# +# has_wall_insulation_recommendation = any( +# (m in recommendation_types or "+".join([m, "mechanical_ventilation"])) for m in +# WALL_INSULATION_MEASURES +# ) +# has_roof_insulation_recommendation = any( +# (m in recommendation_types or "+".join([m, "mechanical_ventilation"])) for m in +# ROOF_INSULATION_MEASURES +# ) +# +# funding.check_funding( +# measures=solution, +# starting_sap=p.data["current-energy-efficiency"], +# ending_sap=p.data["current-energy-efficiency"] + sum([x["gain"] for x in solution]), +# floor_area=p.floor_area, +# mainheat_description=p.main_heating["clean_description"], +# heating_control_description=p.main_heating_controls["clean_description"], +# is_cavity=p.walls["is_cavity_wall"], +# current_wall_uvalue=current_wall_u_value, +# is_partial="partial" in p.walls["clean_description"].lower(), +# existing_li_thickness=li_thickness, +# mainheating=p.main_heating, +# main_fuel=p.main_fuel, +# mainheat_energy_eff=p.data["mainheat-energy-eff"], +# has_wall_insulation_recommendation=has_wall_insulation_recommendation, +# has_roof_insulation_recommendation=has_roof_insulation_recommendation, +# ) +# +# # Determine the scheme +# scheme = "none" +# if funding.eco4_eligible: +# scheme = "eco4" +# if scheme == "none" and funding.gbis_eligible: +# scheme = "gbis" +# +# funded_measures = solution if scheme in ["gbis", "eco4"] else [] +# project_funding = 0 if funding.full_project_abs is not None else funding.full_project_abs +# total_uplift = funding.eco4_uplift +# full_project_score = 0 if funding.full_project_abs is not None else funding.full_project_abs +# partial_project_score = funding.partial_project_abs +# uplift_project_score = funding.eco4_uplift if scheme == "eco4" else funding.gbis_uplift +# +# selected = {r["id"] for r in solution} +# +# if property_required_measures: +# solution = optimiser_functions.add_required_measures( +# property_id=p.id, property_required_measures=property_required_measures, +# recommendations=recommendations, selected=selected, +# ) +# +# # Add best practice measures (ventilation/trickle vents) +# selected = optimiser_functions.add_best_practice_measures(p.id, solution, recommendations, selected) +# # Final flattening - Don't do this! +# # recommendations[p.id] = optimiser_functions.flatten_recommendations_with_defaults( +# # p.id, recommendations, selected +# # ) +# +# # TODO: functionise +# for measure in funded_measures: +# if "+mechanical_ventilation" in measure["type"]: +# measure["type"] = measure["type"].split("+mechanical_ventilation")[0] +# +# p.insert_funding( +# scheme=scheme, +# funded_measures=funded_measures, +# project_funding=project_funding, +# total_uplift=total_uplift, +# full_project_score=full_project_score, +# partial_project_score=partial_project_score, +# uplift_project_score=uplift_project_score +# ) diff --git a/backend/tests/test_search_epc.py b/backend/tests/test_search_epc.py index 9bb7c39a..a0fef7e9 100644 --- a/backend/tests/test_search_epc.py +++ b/backend/tests/test_search_epc.py @@ -26,7 +26,7 @@ class TestSearchEpcIntegration: # Test case 2: Another valid address and postcode # In this case, the newest EPC, does not have a uprn associated to it. If we did a search by # uprn, we would get an old EPC - ("Flat 8, Hainton House", "DN32 9AQ", 10090082018, True, + ("Flat 8, Hainton House", "DN32 9AQ", "", True, "bd1149a20a73397184f07a9955f872424826e70f4870c058d71be887766ee1f8", 2), # Test case 3: When we make a request to the API for this property, we get back results for # flats 1, 2 and 3. We have some logic to handle the response so that we get back flat 1 @@ -56,7 +56,6 @@ class TestSearchEpcIntegration: # We check that we have the correct epc assert epc_searcher.newest_epc["lmk-key"] == lmk_key - assert epc_searcher.newest_epc["uprn"] == uprn assert len(epc_searcher.older_epcs) == n_old_epcs def test_search_housenumber(self): diff --git a/recommendations/tests/test_optimisers.py b/recommendations/tests/test_optimisers.py index df5cc2e1..e81aac69 100644 --- a/recommendations/tests/test_optimisers.py +++ b/recommendations/tests/test_optimisers.py @@ -144,6 +144,15 @@ class DummyProp: self.has_ventilation = False self.floor_area = 70.0 self.main_heating_controls = {"clean_description": "time and temperature zone control"} + self.walls = {'original_description': 'Solid brick, as built, no insulation (assumed)', + 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, 'is_cavity_wall': False, 'is_filled_cavity': False, + 'is_solid_brick': True, + 'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, + 'is_as_built': True, + 'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': False, + 'insulation_thickness': 'none', + 'external_insulation': False, 'internal_insulation': False} self.main_heating = { 'original_description': 'Boiler and radiators, mains gas', @@ -230,6 +239,7 @@ def property_recommendations(): 'quantity_unit': 'm2', 'total': 19090.810139104888, 'labour_hours': 0.0, 'labour_days': 0.0}], 'type': 'external_wall_insulation', 'measure_type': 'external_wall_insulation', + "innovation_rate": 0, 'description': 'Install 150mm EWI Pro EPS external wall insulation system with Brick ' 'Slip finish on external walls', 'starting_u_value': 1.7, 'new_u_value': 0.32, 'already_installed': False, @@ -258,6 +268,7 @@ def property_recommendations(): 'quantity_unit': 'm2', 'total': 5694.929118083911, 'labour_hours': 134.37473199973275, 'labour_days': 4.199210374991648}], 'type': 'internal_wall_insulation', 'measure_type': 'internal_wall_insulation', + "innovation_rate": 0, 'description': 'Install 95mm ' 'SWIP EcoBatt & ' 'Plastered ' @@ -314,6 +325,7 @@ def property_recommendations(): 'quantity_unit': 'm2', 'total': 645.0, 'labour_hours': 8, 'labour_days': 1}], 'type': 'loft_insulation', 'measure_type': 'loft_insulation', + "innovation_rate": 0, 'description': 'Install 300mm of Knauf Loft Roll 44 glass fibre roll in your loft', 'starting_u_value': 2.3, 'new_u_value': 2.3, 'sap_points': np.float64(2.4), 'already_installed': False, @@ -338,6 +350,7 @@ def property_recommendations(): 'plant_cost': 0.0, 'total_cost': 350.0, 'notes': None, 'is_installer_quote': True, 'total': 700.0, 'quantity': 2, 'quantity_unit': 'part'}], 'type': 'mechanical_ventilation', 'measure_type': 'mechanical_ventilation', + "innovation_rate": 0, 'description': 'Install 2 ' 'Mechanical ' 'Extract ' @@ -387,6 +400,7 @@ def property_recommendations(): 'labour_hours': 70.08999999999999, 'labour_days': 2.920416666666666}], 'type': 'suspended_floor_insulation', 'measure_type': 'suspended_floor_insulation', + "innovation_rate": 0, 'description': 'Install 75mm Q-bot underfloor insulation insulation in suspended ' 'floor', 'starting_u_value': 0.83, 'new_u_value': 0.22, 'sap_points': 2, 'survey': True, @@ -401,6 +415,7 @@ def property_recommendations(): 'energy_cost_savings': np.float64(76.04936470588231)}], [ {'phase': 4, 'parts': [], 'type': 'low_energy_lighting', 'measure_type': 'low_energy_lighting', + "innovation_rate": 0, 'description': 'Install low energy lighting in -886 outlets', 'starting_u_value': None, 'new_u_value': None, 'already_installed': False, 'sap_points': 2, 'kwh_savings': -48508.5, 'energy_cost_savings': -12481.237049999998, @@ -413,6 +428,7 @@ def property_recommendations(): 'recommendation_id': '5_phase=4', 'efficiency': -1705.5500000000002, 'heat_demand': np.float64(5.099999999999994)}], [ {'type': 'heating', 'phase': 5, 'measure_type': 'time_temperature_zone_control', + "innovation_rate": 0, 'parts': [], 'description': 'Upgrade heating controls to Smart Thermostats, room sensors and ' 'smart radiator valves (time & temperature zone control)', @@ -431,6 +447,7 @@ def property_recommendations(): 'energy_cost_savings': np.float64(65.29581176470589)}], [ {'phase': 6, 'parts': [], 'type': 'secondary_heating', 'measure_type': 'secondary_heating', + "innovation_rate": 0, 'description': 'Remove the secondary heating system', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(3.6), 'already_installed': False, 'total': 30.0, 'subtotal': 25.0, 'vat': 5.0, 'labour_hours': 3.0, @@ -443,6 +460,7 @@ def property_recommendations(): 'kwh_savings': np.float64(196.29999999999927), 'energy_cost_savings': np.float64(14.61857647058821)}], [ {'phase': 7, 'parts': [], 'type': 'solar_pv', 'measure_type': 'solar_pv', + "innovation_rate": 0, 'description': 'Install a 4.0 kilowatt-peak (kWp) solar panel system.', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(13.0), 'already_installed': False, 'total': 6013.139999999999, 'subtotal': 5010.95, 'vat': 0, @@ -455,6 +473,7 @@ def property_recommendations(): 'kwh_savings': np.float64(2040.8566307499998), 'energy_cost_savings': np.float64(525.1124110919749)}, {'phase': 7, 'parts': [], 'type': 'solar_pv', 'measure_type': 'solar_pv', + "innovation_rate": 0, 'description': 'Install a 4.0 kilowatt-peak (kWp) solar panel system, with a battery.', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(13.0), 'already_installed': False, 'total': 10537.008, 'subtotal': 8780.84, 'vat': 0, @@ -467,6 +486,7 @@ def property_recommendations(): 'kwh_savings': np.float64(2857.1992830499994), 'energy_cost_savings': np.float64(735.1573755287648)}, {'phase': 7, 'parts': [], 'type': 'solar_pv', 'measure_type': 'solar_pv', + "innovation_rate": 0, 'description': 'Install a 3.6 kilowatt-peak (kWp) solar panel system.', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(12.0), 'already_installed': False, 'total': 5826.491999999999, 'subtotal': 4855.41, 'vat': 0, @@ -478,6 +498,7 @@ def property_recommendations(): 'heat_demand': np.float64(83.69999999999999), 'kwh_savings': np.float64(1846.33397), 'energy_cost_savings': np.float64(475.0617304809999)}, {'phase': 7, 'parts': [], 'type': 'solar_pv', 'measure_type': 'solar_pv', + "innovation_rate": 0, 'description': 'Install a 3.6 kilowatt-peak (kWp) solar panel system, with a battery.', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(12.0), 'already_installed': False, 'total': 10350.359999999999, 'subtotal': 8625.3, 'vat': 0, @@ -489,6 +510,7 @@ def property_recommendations(): 'heat_demand': np.float64(83.69999999999999), 'kwh_savings': np.float64(2584.867558), 'energy_cost_savings': np.float64(665.0864226734)}, {'phase': 7, 'parts': [], 'type': 'solar_pv', 'measure_type': 'solar_pv', + "innovation_rate": 0, 'description': 'Install a 3.2 kilowatt-peak (kWp) solar panel system.', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(11.0), 'already_installed': False, 'total': 5642.604, 'subtotal': 4702.17, 'vat': 0, @@ -500,6 +522,7 @@ def property_recommendations(): 'kwh_savings': np.float64(1650.2708274), 'energy_cost_savings': np.float64(424.61468389001993)}, {'phase': 7, 'parts': [], 'type': 'solar_pv', 'measure_type': 'solar_pv', + "innovation_rate": 0, 'description': 'Install a 3.2 kilowatt-peak (kWp) solar panel system, with a battery.', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(11.0), 'already_installed': False, 'total': 10166.472, 'subtotal': 8472.06, 'vat': 0, @@ -511,6 +534,7 @@ def property_recommendations(): 'heat_demand': np.float64(78.3), 'kwh_savings': np.float64(2310.3791583599996), 'energy_cost_savings': np.float64(594.4605574460278)}, {'phase': 7, 'parts': [], 'type': 'solar_pv', 'measure_type': 'solar_pv', + "innovation_rate": 0, 'description': 'Install a 2.8 kilowatt-peak (kWp) solar panel system.', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(9.0), 'already_installed': False, 'total': 5458.727999999999, 'subtotal': 4548.94, 'vat': 0, @@ -522,6 +546,7 @@ def property_recommendations(): 'kwh_savings': np.float64(1453.5933906), 'energy_cost_savings': np.float64(374.00957940138)}, {'phase': 7, 'parts': [], 'type': 'solar_pv', 'measure_type': 'solar_pv', + "innovation_rate": 0, 'description': 'Install a 2.8 kilowatt-peak (kWp) solar panel system, with a battery.', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(9.0), 'already_installed': False, 'total': 9982.596, 'subtotal': 8318.83, 'vat': 0, @@ -533,6 +558,7 @@ def property_recommendations(): 'heat_demand': np.float64(64.0), 'kwh_savings': np.float64(2035.03074684), 'energy_cost_savings': np.float64(523.6134111619319)}, {'phase': 7, 'parts': [], 'type': 'solar_pv', 'measure_type': 'solar_pv', + "innovation_rate": 0, 'description': 'Install a 2.4 kilowatt-peak (kWp) solar panel system.', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(8.0), 'already_installed': False, 'total': 5274.852, 'subtotal': 4395.71, 'vat': 0, @@ -544,6 +570,7 @@ def property_recommendations(): 'kwh_savings': np.float64(1255.12594), 'energy_cost_savings': np.float64(322.94390436199996)}, {'phase': 7, 'parts': [], 'type': 'solar_pv', 'measure_type': 'solar_pv', + "innovation_rate": 0, 'description': 'Install a 2.4 kilowatt-peak (kWp) solar panel system, with a battery.', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(8.0), 'already_installed': False, 'total': 9798.72, 'subtotal': 8165.6, 'vat': 0, @@ -555,6 +582,7 @@ def property_recommendations(): 'heat_demand': np.float64(54.3), 'kwh_savings': np.float64(1757.1763159999998), 'energy_cost_savings': np.float64(452.1214661067999)}, {'phase': 7, 'parts': [], 'type': 'solar_pv', 'measure_type': 'solar_pv', + "innovation_rate": 0, 'description': 'Install a 2.0 kilowatt-peak (kWp) solar panel system.', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(7.0), 'already_installed': False, 'total': 5090.976, 'subtotal': 4242.48, 'vat': 0, @@ -566,6 +594,7 @@ def property_recommendations(): 'kwh_savings': np.float64(1048.341318), 'energy_cost_savings': np.float64(269.7382211214)}, {'phase': 7, 'parts': [], 'type': 'solar_pv', 'measure_type': 'solar_pv', + "innovation_rate": 0, 'description': 'Install a 2.0 kilowatt-peak (kWp) solar panel system, with a battery.', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(7.0), 'already_installed': False, 'total': 9614.844, 'subtotal': 8012.369999999999, 'vat': 0, @@ -586,10 +615,20 @@ def _attach_costs_and_uplifts(recs, funding, p): for group in out: for r in group: if r["type"] in ["mechanical_ventilation", "low_energy_lighting", "secondary_heating"]: - r["innovation_uplift"] = 0 + ( + r["partial_project_score"], + r["partial_project_funding"], + r["innovation_uplift"], + r["uplift_project_score"], + ) = ( + 0, 0, 0, 0 + ) continue - r["uplift"] = 0.0 # fixed for determinism in test - r["innovation_uplift"] = funding.get_innovation_uplift( + + ( + r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"], + r["uplift_project_score"] + ) = funding.get_innovation_uplift( measure=r, starting_sap=55, floor_area=70.0, @@ -663,3 +702,100 @@ def test_social_fabric_only_returns_only_fabric_types(p, funding, property_recom unfunded_rows = solutions[ solutions["path"].apply(lambda x: isinstance(x, dict) and x.get("reference") == "unfunded:all")] assert not unfunded_rows.empty + + +def test_private_solid_wall_no_innovation_epc_d(p, funding, mock_project_scores_matrix, mock_partial_scores_matrix): + """ + We have a specific test for this case which was implemented incorrectly originally. + This is an EPC D property and so shouldn't be eligible for ECO4. Instead, only GBIS should be considered. + """ + + # Overwrite the data - copied from real example + p2 = deepcopy(p) + p2.data = { + "current-energy-rating": "D", + "current-energy-efficiency": 68, + "mainheat-energy-eff": "Good", + } + p2.walls = {'original_description': 'Sandstone or limestone, as built, no insulation (assumed)', + 'clean_description': 'Sandstone or limestone, as built, no insulation', 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, 'is_cavity_wall': False, 'is_filled_cavity': False, + 'is_solid_brick': False, 'is_system_built': False, 'is_timber_frame': False, + 'is_granite_or_whinstone': False, 'is_as_built': True, 'is_cob': False, 'is_assumed': True, + 'is_sandstone_or_limestone': True, 'is_park_home': False, 'insulation_thickness': 'none', + 'external_insulation': False, 'internal_insulation': False} + + funding2 = Funding( + tenure="Private", + project_scores_matrix=mock_project_scores_matrix, + partial_project_scores_matrix=mock_partial_scores_matrix, + whlg_eligible_postcodes=pd.DataFrame([{"Postcode": "ab12cd"}]), + eco4_social_cavity_abs_rate=12.5, + eco4_social_solid_abs_rate=17, + eco4_private_cavity_abs_rate=12.5, + eco4_private_solid_abs_rate=17, + gbis_social_cavity_abs_rate=21, + gbis_social_solid_abs_rate=25, + gbis_private_cavity_abs_rate=21, + gbis_private_solid_abs_rate=28, + ) + + input_measures = [ + [{'id': '0_phase=0', 'cost': np.float64(4441.202499013676), 'gain': np.float64(3.4000000000000057), + 'type': 'internal_wall_insulation+mechanical_ventilation', 'innovation_uplift': np.float64(0.0), + 'cost_minus_uplift': np.float64(4441.202499013676), 'raw_cost': 3881.2024990136756, + 'partial_project_funding': np.float64(2300.1000000000004), 'partial_project_score': np.float64(135.3), + 'uplift_project_score': np.float64(0.0)}], [ + {'id': '2_phase=2', 'cost': np.float64(2280.0), 'gain': np.float64(0.4), 'type': 'secondary_glazing', + 'innovation_uplift': np.float64(0.0), 'cost_minus_uplift': np.float64(2280.0), + 'raw_cost': np.float64(2280.0), 'partial_project_funding': np.float64(1421.1999999999998), + 'partial_project_score': np.float64(83.6), 'uplift_project_score': np.float64(0.0)}], [ + {'id': '3_phase=3', 'cost': np.float64(604.5840000000001), 'gain': np.float64(1.2), + 'type': 'time_temperature_zone_control', 'innovation_uplift': np.float64(0.0), + 'cost_minus_uplift': np.float64(604.5840000000001), 'raw_cost': 604.5840000000001, + 'partial_project_funding': np.float64(702.0999999999999), 'partial_project_score': np.float64(41.3), + 'uplift_project_score': np.float64(0.0)}], [ + {'id': '4_phase=4', 'cost': 60.0, 'gain': np.float64(0.0), 'type': 'secondary_heating', + 'innovation_uplift': 0, 'cost_minus_uplift': 60.0, 'raw_cost': 60.0, 'partial_project_funding': 0, + 'partial_project_score': 0, 'uplift_project_score': 0}] + ] + + solutions = optimise_with_funding_paths( + p=p2, + input_measures=input_measures, + housing_type="Private", + budget=None, + target_gain=1.5, + funding=funding2 + ) + + # 3) basic shape assertions + assert isinstance(solutions, pd.DataFrame) + assert not solutions.empty + + # We should have 2 rows + assert solutions.shape[0] == 2 + + # We should only have None or GBIS + assert set(solutions["scheme"].unique()) == {"none", "gbis"} + + meets_upgrade_gbis = solutions[solutions["meets_upgrade_target"] & solutions["is_eligible"]] + assert meets_upgrade_gbis.shape[0] == 1 + + # Check exact result + assert meets_upgrade_gbis.squeeze().to_dict() == { + 'fixed_ids': ['0_phase=0'], 'items': [ + {'id': '0_phase=0', 'cost': 3881.2024990136756, 'gain': np.float64(3.4000000000000057), + 'type': 'internal_wall_insulation+mechanical_ventilation', 'innovation_uplift': np.float64(0.0), + 'cost_minus_uplift': np.float64(4441.202499013676), 'raw_cost': 3881.2024990136756, + 'partial_project_funding': np.float64(2300.1000000000004), 'partial_project_score': np.float64(135.3), + 'uplift_project_score': np.float64(0.0)}], 'total_cost': 3881.2024990136756, + 'total_gain': 3.4000000000000057, 'path': [{'AND': ['internal_wall_insulation+mechanical_ventilation'], + 'reference': + 'internal_wall_insulation+mechanical_ventilation:gbis'}], + 'scheme': 'gbis', 'is_eligible': True, 'unfunded_items': [], 'meets_upgrade_target': True, 'starting_sap': 68, + 'floor_area': 70.0, 'ending_sap': 71.4, 'starting_band': 'High_D', 'ending_band': 'Low_C', + 'floor_area_band': '0-72', 'project_score': 540.0, 'full_project_funding': 0.0, + 'partial_project_funding': 2300.1000000000004, 'partial_project_score': 135.3, 'total_uplift': 0.0, + 'total_uplift_score': 0.0 + } From a8905f442ee41d90c619b1d405fc0769b0e44f8d Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 28 Oct 2025 19:28:26 +0000 Subject: [PATCH 20/23] fixed funding test cases --- .../tests/test_optimiser_functions.py | 34 ++++++++++++++----- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/recommendations/tests/test_optimiser_functions.py b/recommendations/tests/test_optimiser_functions.py index 430acaa8..031bb9ac 100644 --- a/recommendations/tests/test_optimiser_functions.py +++ b/recommendations/tests/test_optimiser_functions.py @@ -12,7 +12,10 @@ class TestPrepareInputMeasures: recs = [ [ # loft insulation measure {"recommendation_id": "loft1", "type": "loft_insulation", "total": 100, "kwh_savings": 200, - "energy_cost_savings": 10, "has_battery": False, "measure_type": "loft_insulation"}, + "energy_cost_savings": 10, "has_battery": False, "measure_type": "loft_insulation", + "partial_project_funding": 0, "partial_project_score": 0, + "uplift_project_score": 0, + }, ], ] measures = optimiser_functions.prepare_input_measures(recs, goal="Energy Savings", needs_ventilation=False) @@ -27,9 +30,12 @@ class TestPrepareInputMeasures: ["internal_wall_insulation"]) recs = [ [{"recommendation_id": "wall1", "type": "internal_wall_insulation", "total": 500, "kwh_savings": 300, - "energy_cost_savings": 5, "has_battery": False, "measure_type": "internal_wall_insulation"}], + "energy_cost_savings": 5, "has_battery": False, "measure_type": "internal_wall_insulation", + "partial_project_funding": 0, "partial_project_score": 0, "uplift_project_score": 0, + }], [{"recommendation_id": "vent1", "type": "mechanical_ventilation", "total": 50, "kwh_savings": 30, - "energy_cost_savings": 5, "has_battery": False, "measure_type": "mechanical_ventilation"}], + "energy_cost_savings": 5, "has_battery": False, "measure_type": "mechanical_ventilation", + "partial_project_funding": 0, "partial_project_score": 0, "uplift_project_score": 0, }], ] measures = optimiser_functions.prepare_input_measures(recs, goal="Energy Savings", needs_ventilation=True) wall_option = measures[0][0] @@ -40,7 +46,8 @@ class TestPrepareInputMeasures: def test_filters_out_negative_cost_savings(self): recs = [ [{"recommendation_id": "bad1", "type": "loft_insulation", "total": 200, "kwh_savings": 100, - "energy_cost_savings": -5, "has_battery": False}], + "energy_cost_savings": -5, "has_battery": False, + "partial_project_funding": 0, "partial_project_score": 0, "uplift_project_score": 0, }], ] measures = optimiser_functions.prepare_input_measures(recs, goal="Energy Savings", needs_ventilation=False) assert measures == [] # should skip negative cost saving recs @@ -149,14 +156,14 @@ class TestIncreasingEpcE2e: @pytest.fixture def setup_case(self): - # ✅ Dummy property object + # Dummy property object p = SimpleNamespace( id="P1", has_ventilation=False, data={"current-energy-efficiency": "52"}, ) - # ✅ Dummy request body + # Dummy request body body = SimpleNamespace( goal="Increasing EPC", goal_value="C", @@ -165,9 +172,6 @@ class TestIncreasingEpcE2e: simulate_sap_10=False, required_measures=[] ) - - # ✅ Use your massive measures_to_optimise list - recommendations = {"P1": measures_to_optimise} return p, body, recommendations @@ -190,6 +194,18 @@ class TestIncreasingEpcE2e: assert needs_ventilation + # Input the various things we need - set all to 0 + for group in measures_to_optimise: + for r in group: + ( + r["partial_project_score"], + r["partial_project_funding"], + r["innovation_uplift"], + r["uplift_project_score"], + ) = ( + 0, 0, 0, 0 + ) + input_measures = optimiser_functions.prepare_input_measures(measures_to_optimise, body.goal, needs_ventilation) assert input_measures, "Expected measures to optimise" From 2aecf27900cfd2d280ce6e342b08e913528d7385 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 29 Oct 2025 10:51:34 +0000 Subject: [PATCH 21/23] debugging funding optimiser for existing gshp - remove ashp and hhrsh recommendations when gshp in place --- backend/Funding.py | 23 +++- backend/app/db/models/inspections.py | 163 ++++++++++++++++++++++++++ backend/tests/test_funding.py | 82 +++++++++++++ recommendations/HeatingRecommender.py | 7 +- 4 files changed, 269 insertions(+), 6 deletions(-) create mode 100644 backend/app/db/models/inspections.py diff --git a/backend/Funding.py b/backend/Funding.py index d590474c..ece8e3cf 100644 --- a/backend/Funding.py +++ b/backend/Funding.py @@ -1,11 +1,14 @@ from enum import Enum from typing import List import pandas as pd +from utils.logger import setup_logger from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes from backend.app.plan.schemas import VALID_HOUSING_TYPES, WALL_INSULATION_MEASURES, ROOF_INSULATION_MEASURES, \ MEASURE_MAP +logger = setup_logger(__name__) + class EligibilityCaveats(Enum): EPC_RATING = "epc_rating" # EPC requirements not met @@ -637,13 +640,25 @@ class Funding: if self.starting_sap_band in ["Low_C", "High_C", "Low_B", "High_B", "Low_A", "High_A"]: return 0 - pps = filtered_pps_matrix[ - (filtered_pps_matrix["Pre_Main_Heating_Source"] == pre_heating_system) & - (filtered_pps_matrix["Post_Main_Heating_Source"] == "Air to Water ASHP") & - (filtered_pps_matrix["Measure_Type"] == "B_Upgrade_nopreHCs") + pps_data = filtered_pps_matrix[ + filtered_pps_matrix["Post_Main_Heating_Source"] == "Air to Water ASHP" + ] + + if pre_heating_system not in pps_data["Pre_Main_Heating_Source"].values: + logger.info( + f"No PPS data for ASHP upgrade from {pre_heating_system}, returning 0" + ) + return 0 + + pps = pps_data[ + (pps_data["Pre_Main_Heating_Source"] == pre_heating_system) & + (pps_data["Measure_Type"] == "B_Upgrade_nopreHCs") # We assume we'll be making a heating system upgrade ] + # Not every pre heating system will result in PPS, e.g. a ground source heat pump to ASHP upgrade + # won't have a PPS. + if pps.shape[0] != 1: raise ValueError("something went wrong, more than one pps for ashp") return pps.squeeze()["Cost Savings"] diff --git a/backend/app/db/models/inspections.py b/backend/app/db/models/inspections.py new file mode 100644 index 00000000..c9925a2a --- /dev/null +++ b/backend/app/db/models/inspections.py @@ -0,0 +1,163 @@ +import enum +import pytz +import datetime +from sqlalchemy import ( + Column, + BigInteger, + Text, + DateTime, + Enum, + ForeignKey, +) +from sqlalchemy.ext.declarative import declarative_base + +Base = declarative_base() + + +# ------------------------------------------------------------------- +# ENUM DEFINITIONS (equivalent to drizzle pgEnum calls) +# ------------------------------------------------------------------- + +class InspectionArchetype(enum.Enum): + BUNGALOW = "Bungalow" + FLAT = "Flat" + MAISONETTE = "Maisonette" + HOUSE = "House" + NON_DOMESTIC = "non-domestic" + + +class InspectionArchetype2(enum.Enum): + DETACHED = "detached" + MID_TERRACE = "mid-terrace" + ENCLOSED_MID_TERRACE = "enclosed mid-terrace" + END_TERRACE = "end-terrace" + ENCLOSED_END_TERRACE = "enclosed end-terrace" + SEMI_DETACHED = "semi-detached" + + +class InspectionsWallConstruction(enum.Enum): + CAVITY = "cavity" + SOLID = "solid" + SYSTEM_BUILT = "system built" + TIMBER_FRAMED = "timber framed" + STEEL_FRAMED = "steel framed" + RE_WALLED_CAVITY = "re-walled cavity" + MANSARD_PRE_FAB = "mansard pre-fab" + MANSARD_EWI = "mansard ewi" + MANSARD_RE_WALLED = "mansard re-walled" + + +class InspectionsWallInsulation(enum.Enum): + EMPTY_CAVITY = "empty cavity" + FILLED_AT_BUILD = "filled at build" + PARTIAL = "partial" + RETRO_DRILLED = "retro drilled" + EWI = "ewi" + IWI = "iwi" + SOLID_NON_CAVITY = "solid non-cavity" + SYSTEM_BUILT = "system built" + TIMBER_FRAMED = "timber framed" + STEEL_FRAMED = "steel framed" + + +class InspectionsInsulationMaterial(enum.Enum): + EMPTY_50_90 = "empty 50-90" + EMPTY_100_PLUS = "empty 100+" + EMPTY_30_40 = "empty 30-40" + EMPTY_LESS_THAN_30 = "empty less than 30" + LOOSE_FIBRE_WOOL = "loose fibre/wool" + EPS_CELO_KING = "eps/celo/king" + FIBRE_BATTS_WITH_CAVITY = "fibre batts - with cavity" + FIBRE_BATTS_NO_CAVITY = "fibre batts - no cavity" + LOOSE_BEAD = "loose bead" + GLUED_BEAD = "glued bead" + FORMALDEHYDE = "formaldehyde" + BUBBLE_WRAP = "bubble wrap" + POLY_CHUNKS = "poly chunks" + + +class InspectionBorescoped(enum.Enum): + YES = "yes" + NO = "no" + REFUSED = "refused" + + +class InspectionsRoofOrientation(enum.Enum): + NORTH = "north" + EAST = "east" + SOUTH = "south" + WEST = "west" + NORTH_EAST = "north-east" + NORTH_WEST = "north-west" + SOUTH_EAST = "south-east" + SOUTH_WEST = "south-west" + N_S_SPLIT = "n/s split" + E_W_SPLIT = "e/w split" + NE_SW_SPLIT = "ne/sw split" + NW_SE_SPLIT = "nw/se split" + FLAT_ROOF = "flat roof" + NO_ROOF = "no roof" + ROOF_TOO_SMALL = "roof too small" + ALREADY_HAS_SOLAR_PV = "already has solar pv" + + +class InspectionsTileHung(enum.Enum): + YES = "yes" + NO = "no" + FIRST_FLOOR_FLATS_TILE_HUNG = "first floor flats are tile hung" + + +class InspectionsRendered(enum.Enum): + NO_RENDER = "no render" + INSUFFICIENT_DPC_SPACE = "rendered with “insufficient” space between dpc and render" + SUFFICIENT_DPC_SPACE = "rendered with “sufficient” space between dpc and render" + + +class InspectionsCladding(enum.Enum): + NONE = "none" + SUFFICIENT_SPACE = "cladded with “sufficient space to fill the wall”" + INSUFFICIENT_SPACE = "cladded with “insufficient space to fill the wall”" + + +class InspectionsAccessIssues(enum.Enum): + SEE_NOTES = "see notes" + DAMP_ISSUES = "damp issues" + FOLIAGE_ON_WALLS = "foliage on walls" + BUSHES_AGAINST_WALL = "bushes against wall" + TREES_AROUND_ABOVE = "trees around/anove property" + HIGH_RISE = "high rise block flats/maisonettes" + CONSERVATORY = "conservatory" + LEAN_TO = "lean-to" + GARAGE = "garage" + EXTENSION = "extension" + DECKING = "decking" + SHED_AGAINST_WALL = "shed against wall" + + +class InspectionModel(Base): + __tablename__ = "inspections" + + id = Column(BigInteger, primary_key=True, autoincrement=True) + property_id = Column(BigInteger, ForeignKey("property.id"), nullable=False) + + archetype = Column(Enum(InspectionArchetype), nullable=True) + archetype_2 = Column(Enum(InspectionArchetype2), nullable=True) + wall_construction = Column(Enum(InspectionsWallConstruction), nullable=True) + insulation = Column(Enum(InspectionsWallInsulation), nullable=True) + insulation_material = Column(Enum(InspectionsInsulationMaterial), nullable=True) + borescoped = Column(Enum(InspectionBorescoped), nullable=True) + roof_orientation = Column(Enum(InspectionsRoofOrientation), nullable=True) + tile_hung = Column(Enum(InspectionsTileHung), nullable=True) + rendered = Column(Enum(InspectionsRendered), nullable=True) + cladding = Column(Enum(InspectionsCladding), nullable=True) + access_issues = Column(Enum(InspectionsAccessIssues), nullable=True) + + notes = Column(Text) + surveyor_name = Column(Text) + + created_at = Column( + DateTime, nullable=False, default=datetime.datetime.now(pytz.utc) + ) + uploaded_at = Column( + DateTime, nullable=False, default=datetime.datetime.now(pytz.utc) + ) diff --git a/backend/tests/test_funding.py b/backend/tests/test_funding.py index d84480ce..513c3271 100644 --- a/backend/tests/test_funding.py +++ b/backend/tests/test_funding.py @@ -1393,3 +1393,85 @@ def test_private_epc_e_solar_with_heating_and_minimum_insulation_produces_uplift assert funding.eco4_uplift and funding.eco4_uplift > 0 # And total funding should include that uplift assert funding.eco4_funding and funding.eco4_funding > 0 + + +def test_existing_gshp_to_ashp(): + r = {'phase': 3, 'parts': [], 'type': 'heating', 'measure_type': 'air_source_heat_pump', + 'description': 'Install a 5KW air source heat pump, and upgrade heating controls to Smart Thermostats, ' + 'room sensors and smart radiator valves (time & temperature zone control). Ensure you have a ' + 'single tariff', + 'starting_u_value': None, 'new_u_value': None, 'sap_points': 7.7, 'already_installed': False, + 'simulation_config': {'mainheat_energy_eff_ending': 'Good', 'hot_water_energy_eff_ending': 'Average', + 'has_air_source_heat_pump_ending': True, 'has_ground_source_heat_pump_ending': False, + 'extra_features_ending': None, + 'thermostatic_control_ending': 'time and temperature zone control', + 'switch_system_ending': None, 'multiple_room_thermostats_ending': False, + 'mainheatc_energy_eff_ending': 'Very Good'}, + 'description_simulation': {'mainheat-description': 'Air source heat pump, radiators, electric', + 'mainheat-energy-eff': 'Good', 'hot-water-energy-eff': 'Average', + 'hotwater-description': 'From main system', + 'mainheatcont-description': 'Time and temperature zone control', + 'mainheatc-energy-eff': 'Very Good'}, 'total': 13188.996000000001, + 'contingency': 3145.8150000000005, 'contingency_rate': 0.35, 'vat': 2080.666, 'labour_hours': 44.7, + 'labour_days': 6.0, 'innovation_rate': 0, 'recommendation_id': '6_phase=3', + 'efficiency': 13188.996000000001, 'co2_equivalent_savings': 0.4999999999999998, + 'heat_demand': 53.20000000000002, 'kwh_savings': 801.5000000000005, + 'energy_cost_savings': 327.31316785714296 + } + + funding = Funding( + project_scores_matrix=mock_project_scores_matrix, + partial_project_scores_matrix=mock_partial_scores_matrix, + whlg_eligible_postcodes=mock_whlg_postcodes, + eco4_social_cavity_abs_rate=13.5, + eco4_social_solid_abs_rate=17, + eco4_private_cavity_abs_rate=13.5, + eco4_private_solid_abs_rate=17, + gbis_social_cavity_abs_rate=21, + gbis_social_solid_abs_rate=25, + gbis_private_cavity_abs_rate=22, + gbis_private_solid_abs_rate=28, + tenure="Private", + ) + + ( + pps, ppf, iu, ups + ) = funding.get_innovation_uplift( + measure=r, + starting_sap=62, + floor_area=69, + is_cavity=True, + current_wall_uvalue=0.7, + is_partial=False, + existing_li_thickness=200, + mainheating={ + 'original_description': 'Ground source heat pump, radiators, electric', + 'clean_description': 'Ground source heat pump, radiators, electric', 'has_radiators': True, + 'has_fan_coil_units': False, 'has_pipes_in_screed_above_insulation': False, + 'has_pipes_in_insulated_timber_floor': False, 'has_pipes_in_concrete_slab': False, 'has_boiler': False, + 'has_air_source_heat_pump': False, 'has_room_heaters': False, 'has_electric_storage_heaters': False, + 'has_warm_air': False, 'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False, + 'has_community_scheme': False, 'has_ground_source_heat_pump': True, 'has_no_system_present': False, + 'has_portable_electric_heaters': False, 'has_water_source_heat_pump': False, + 'has_electric_heat_pump': False, 'has_micro-cogeneration': False, 'has_solar_assisted_heat_pump': False, + 'has_exhaust_source_heat_pump': False, 'has_community_heat_pump': False, 'has_hot-water-only': False, + 'has_electric': True, 'has_mains_gas': False, 'has_wood_logs': False, 'has_coal': False, 'has_oil': False, + 'has_wood_pellets': False, 'has_anthracite': False, 'has_dual_fuel_mineral_and_wood': False, + 'has_smokeless_fuel': False, 'has_lpg': False, 'has_b30k': False, 'has_mineral_and_wood': False, + 'has_dual_fuel_appliance': False, 'has_assumed': False, 'has_electricaire': False, + 'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False + }, + main_fuel={ + 'original_description': 'electricity (not community)', + 'clean_description': 'Electricity not community', 'fuel_type': 'electricity', 'tariff_type': None, + 'is_community': False, 'no_individual_heating_or_community_network': False, + 'complex_fuel_type': None + }, + mainheat_energy_eff="Poor", + ) + + # All should be zero + assert pps == 0 + assert ppf == 0 + assert iu == 0 + assert ups == 0 diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index 73edff53..41785104 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -103,6 +103,7 @@ class HeatingRecommender: self.property.main_heating["has_electric"] or self.property.main_heating["has_electricaire"] ) self.has_ashp = self.property.main_heating["has_air_source_heat_pump"] + self.has_gshp = self.property.main_heating["has_ground_source_heat_pump"] self.has_room_heaters = ( self.property.main_heating["has_room_heaters"] or self.property.main_heating["has_portable_electric_heaters"] @@ -151,8 +152,10 @@ class HeatingRecommender: "underfloor heating" not in self.property.main_heating["clean_description"] ) + # If the property has a ground source heat pump, or air source heat pump, we don't recommend HHRSH + return ( - hhr_suitable and (not ashp_only_heating_recommendation) and not self.has_ashp and + hhr_suitable and (not ashp_only_heating_recommendation) and not self.has_ashp and not self.has_gshp and ("high_heat_retention_storage_heater" in measures) ) @@ -345,7 +348,7 @@ class HeatingRecommender: if ( self.property.is_ashp_valid(measures=measures) and non_invasive_ashp_recommendation["suitable"] and - not self.has_ashp + not self.has_ashp and not self.has_gshp ): self.recommend_air_source_heat_pump( phase=phase, From 2f3d49dff436a8ef87a87d3403111efe3e85b0a5 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 29 Oct 2025 11:13:27 +0000 Subject: [PATCH 22/23] fixed issue with existing gshp and added test --- recommendations/optimiser/CostOptimiser.py | 3 ++- recommendations/optimiser/GainOptimiser.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/recommendations/optimiser/CostOptimiser.py b/recommendations/optimiser/CostOptimiser.py index 50f4b884..b01d28b3 100644 --- a/recommendations/optimiser/CostOptimiser.py +++ b/recommendations/optimiser/CostOptimiser.py @@ -109,7 +109,8 @@ class CostOptimiser: self.m.optimize() if self.m.status == OptimizationStatus.INFEASIBLE: - logger.info("We have an infeasible model, setting up slack model") + # Turn off logging - too noisy + # logger.info("We have an infeasible model, setting up slack model") self.setup_slack() self.m.optimize() diff --git a/recommendations/optimiser/GainOptimiser.py b/recommendations/optimiser/GainOptimiser.py index 7b2e56d2..6b757bf1 100644 --- a/recommendations/optimiser/GainOptimiser.py +++ b/recommendations/optimiser/GainOptimiser.py @@ -133,7 +133,8 @@ class GainOptimiser: (self.m.status == OptimizationStatus.OPTIMAL) and not len(solution) ): if self.allow_slack: - logger.info("We have an infeasible model, setting up slack model") + # Turn off logging - too noisy + # logger.info("We have an infeasible model, setting up slack model") self.setup_slack() self.m.optimize() solution = [ From 27de54adefd02196233ea722e6f6e0513fb0ae87 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 29 Oct 2025 15:15:07 +0000 Subject: [PATCH 23/23] increase concurrency, handle error case for gain equal to fixed gain --- recommendations/optimiser/funding_optimiser.py | 2 +- serverless.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/recommendations/optimiser/funding_optimiser.py b/recommendations/optimiser/funding_optimiser.py index 8fbb13b2..4da08587 100644 --- a/recommendations/optimiser/funding_optimiser.py +++ b/recommendations/optimiser/funding_optimiser.py @@ -338,7 +338,7 @@ def optimise_with_funding_paths(p, input_measures, housing_type, funding: Fundin if fixed_gain > target_gain: picked, sub_cost, sub_gain = ([], 0.0, 0.0) - elif fixed_gain < target_gain and not sub_measures: + elif fixed_gain <= target_gain and not sub_measures: picked, sub_cost, sub_gain = ([], 0.0, 0.0) else: picked, sub_cost, sub_gain = run_optimizer( diff --git a/serverless.yml b/serverless.yml index c1fc0b09..6eea03eb 100644 --- a/serverless.yml +++ b/serverless.yml @@ -66,7 +66,7 @@ functions: - sqs: arn: arn:aws:sqs:${self:provider.region}:${aws:accountId}:model-engine-queue batchSize: 1 - maximumConcurrency: 2 # Heavily restricts concurrency to avoid overwhelming the ldmbda limits + maximumConcurrency: 5 # Heavily restricts concurrency to avoid overwhelming the ldmbda limits resources: