From 82ede4d8cdc1cbda8917d4d60b35eebb8c327bd6 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 9 Aug 2025 22:00:08 +0100
Subject: [PATCH] fixing funding edge cases and adding tests wip

---
 backend/Funding.py                            |   6 +-
 .../tests/test_data/pre_heating_scenarios.py  | 144 ++++++++++++++++++
 backend/tests/test_funding.py                 | 128 +++++++++++-----
 etl/epc_clean/tests/test_attribute_utils.py   |  17 +--
 4 files changed, 246 insertions(+), 49 deletions(-)
 create mode 100644 backend/tests/test_data/pre_heating_scenarios.py

diff --git a/backend/Funding.py b/backend/Funding.py
index 912c0426..027a9a2e 100644
--- a/backend/Funding.py
+++ b/backend/Funding.py
@@ -1,6 +1,5 @@
 from enum import Enum
 from typing import List
-
 import pandas as pd
 
 from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes
@@ -57,6 +56,7 @@ class Funding:
         # Funding calculation variables
         self.full_project_abs = None
         self.eco4_funding = None
+        self.eco4_uplift = 0
 
         self.partial_project_abs = None
 
@@ -875,8 +875,8 @@ class Funding:
                         pre_heating_system=pre_heating_system
                     )
                     project_uplifts.append(pps * uplifts[i])
-                total_uplift = sum(project_uplifts)
-                self.full_project_abs += total_uplift
+                self.eco4_uplift = sum(project_uplifts)
+                self.full_project_abs += self.eco4_uplift
                 self.eco4_funding = self.full_project_abs * (
                     self.social_cavity_abs_rate if is_cavity else self.social_solid_abs_rate
                 )
diff --git a/backend/tests/test_data/pre_heating_scenarios.py b/backend/tests/test_data/pre_heating_scenarios.py
new file mode 100644
index 00000000..ab5dcbb1
--- /dev/null
+++ b/backend/tests/test_data/pre_heating_scenarios.py
@@ -0,0 +1,144 @@
+# Each scenario: super explicit about inputs and expected mapping
+pre_main_heating_scenarios = [
+    # --- Mains gas boilers (radiators) ---
+    {
+        "description": "Boiler and radiators, mains gas (condensing expected)",
+        "MAINHEAT_DESCRIPTION": "Boiler and radiators, mains gas",
+        "MAIN_FUEL": "mains gas (not community)",
+        "MAINHEAT_ENERGY_EFF": "Good",
+        "expected": "Condensing Gas Boiler",
+    },
+    {
+        "description": "Boiler and radiators, mains gas (non-condensing expected)",
+        "MAINHEAT_DESCRIPTION": "Boiler and radiators, mains gas",
+        "MAIN_FUEL": "mains gas - this is for backwards compatibility only and should not be used",
+        "MAINHEAT_ENERGY_EFF": "Average",
+        "expected": "Non Condensing Gas Boiler",
+    },
+    {
+        "description": "Boiler and radiators, mains gas (very poor => back boiler to rads)",
+        "MAINHEAT_DESCRIPTION": "Boiler and radiators, mains gas",
+        "MAIN_FUEL": "Gas: mains gas",
+        "MAINHEAT_ENERGY_EFF": "Very Poor",
+        "expected": "Gas Back Boiler to Radiators",
+    },
+
+    # --- Warm air (treated like gas boiler family in your mapper) ---
+    {
+        "description": "Warm air, mains gas (good => condensing)",
+        "MAINHEAT_DESCRIPTION": "Warm air, mains gas",
+        "MAIN_FUEL": "mains gas (not community)",
+        "MAINHEAT_ENERGY_EFF": "Good",
+        "expected": "Condensing Gas Boiler",
+    },
+
+    # --- Community scheme (CHP vs non-CHP depends on energy eff) ---
+    {
+        "description": "Community scheme (gas, good => CHP)",
+        "MAINHEAT_DESCRIPTION": "Community scheme",
+        "MAIN_FUEL": "mains gas (community)",
+        "MAINHEAT_ENERGY_EFF": "Good",
+        "expected": "DHS CHP",
+    },
+    {
+        "description": "Community scheme (gas, average => non-CHP)",
+        "MAINHEAT_DESCRIPTION": "Community scheme",
+        "MAIN_FUEL": "mains gas (community)",
+        "MAINHEAT_ENERGY_EFF": "Average",
+        "expected": "DHS non-CHP",
+    },
+    {
+        "description": "Community scheme (no fuel data, good => CHP)",
+        "MAINHEAT_DESCRIPTION": "Community scheme",
+        "MAIN_FUEL": "NO DATA!",
+        "MAINHEAT_ENERGY_EFF": "Good",
+        "expected": "DHS CHP",
+    },
+
+    # --- Electric storage heaters (ESH responsiveness split) ---
+    {
+        "description": "Electric storage heaters (average => responsiveness > 0.2)",
+        "MAINHEAT_DESCRIPTION": "Electric storage heaters",
+        "MAIN_FUEL": "electricity (not community)",
+        "MAINHEAT_ENERGY_EFF": "Average",
+        "expected": "Electric Storage Heaters Responsiveness >0.2",
+    },
+    {
+        "description": "Electric storage heaters (poor => responsiveness > 0.2)",
+        "MAINHEAT_DESCRIPTION": "Electric storage heaters",
+        "MAIN_FUEL": "electricity - this is for backwards compatibility only and should not be used",
+        "MAINHEAT_ENERGY_EFF": "Poor",
+        "expected": "Electric Storage Heaters Responsiveness >0.2",
+    },
+    {
+        "description": "Electric storage heaters (very poor => responsiveness <= 0.2)",
+        "MAINHEAT_DESCRIPTION": "Electric storage heaters",
+        "MAIN_FUEL": "electricity (not community)",
+        "MAINHEAT_ENERGY_EFF": "Very Poor",
+        "expected": "Electric Storage Heaters Responsiveness <=0.2",
+    },
+
+    # --- Electric direct-acting / room heaters ---
+    {
+        "description": "Room heaters, electric (very poor)",
+        "MAINHEAT_DESCRIPTION": "Room heaters, electric",
+        "MAIN_FUEL": "electricity (not community)",
+        "MAINHEAT_ENERGY_EFF": "Very Poor",
+        "expected": "Electric Room Heaters",
+    },
+    {
+        "description": "Room heaters, electric (poor, unspecified tariff)",
+        "MAINHEAT_DESCRIPTION": "Room heaters, electric",
+        "MAIN_FUEL": "Electricity: electricity, unspecified tariff",
+        "MAINHEAT_ENERGY_EFF": "Poor",
+        "expected": "Electric Room Heaters",
+    },
+    {
+        "description": "Portable electric heaters assumed for most rooms (maps to electric room heaters)",
+        "MAINHEAT_DESCRIPTION": "Portable electric heaters assumed for most rooms",
+        "MAIN_FUEL": "mains gas (not community)",  # weird in EPCs, but your mapper forces electric room heaters here
+        "MAINHEAT_ENERGY_EFF": "Very Poor",
+        "expected": "Electric Room Heaters",
+    },
+    {
+        "description": "No system present: electric heaters assumed",
+        "MAINHEAT_DESCRIPTION": "No system present: electric heaters assumed",
+        "MAIN_FUEL": "To be used only when there is no heating/hot-water system",
+        "MAINHEAT_ENERGY_EFF": "Very Poor",
+        "expected": "Electric Room Heaters",
+    },
+    {
+        "description": "Electric underfloor heating => direct-acting electric",
+        "MAINHEAT_DESCRIPTION": "Electric underfloor heating",
+        "MAIN_FUEL": "electricity (not community)",
+        "MAINHEAT_ENERGY_EFF": "Average",
+        "expected": "Electric Room Heaters",
+    },
+
+    # --- Gas room heaters ---
+    {
+        "description": "Room heaters, mains gas (average)",
+        "MAINHEAT_DESCRIPTION": "Room heaters, mains gas",
+        "MAIN_FUEL": "mains gas (not community)",
+        "MAINHEAT_ENERGY_EFF": "Average",
+        "expected": "Gas Room Heaters",
+    },
+
+    # --- Electric boiler ---
+    {
+        "description": "Boiler and radiators, electric (very poor => electric boiler)",
+        "MAINHEAT_DESCRIPTION": "Boiler and radiators, electric",
+        "MAIN_FUEL": "electricity (not community)",
+        "MAINHEAT_ENERGY_EFF": "Very Poor",
+        "expected": "Electric Boiler",
+    },
+
+    # --- Gas boiler + UFH (still ‘boiler’ logic) ---
+    {
+        "description": "Boiler and underfloor heating, mains gas (good => condensing)",
+        "MAINHEAT_DESCRIPTION": "Boiler and underfloor heating, mains gas",
+        "MAIN_FUEL": "mains gas (not community)",
+        "MAINHEAT_ENERGY_EFF": "Good",
+        "expected": "Condensing Gas Boiler",
+    },
+]
diff --git a/backend/tests/test_funding.py b/backend/tests/test_funding.py
index 872d0f21..456c090e 100644
--- a/backend/tests/test_funding.py
+++ b/backend/tests/test_funding.py
@@ -2,6 +2,7 @@ import pytest
 import pandas as pd
 from backend.Funding import Funding, EligibilityCaveats
 from backend.tests.test_data.innovation_measure_fixtures import innovation_scenarios
+from backend.tests.test_data.pre_heating_scenarios import pre_main_heating_scenarios
 
 
 @pytest.fixture
@@ -43,6 +44,49 @@ def mock_whlg_postcodes():
     return pd.DataFrame([{"Postcode": "ab12cd"}])
 
 
+@pytest.fixture
+def mock_mainheating():
+    return {
+        'original_description': 'Electric storage heaters', 'has_radiators': False,
+        'has_fan_coil_units': False,
+        'has_pipes_in_screed_above_insulation': False,
+        'has_pipes_in_insulated_timber_floor': False,
+        'has_pipes_in_concrete_slab': False, 'has_boiler': False,
+        'has_air_source_heat_pump': False,
+        'has_room_heaters': False, 'has_electric_storage_heaters': True, 'has_warm_air': False,
+        'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False,
+        'has_community_scheme': False,
+        'has_ground_source_heat_pump': False, 'has_no_system_present': False,
+        'has_portable_electric_heaters': False,
+        'has_water_source_heat_pump': False, 'has_electric': True, 'has_mains_gas': False,
+        'has_wood_logs': False,
+        'has_coal': False, 'has_oil': False, 'has_wood_pellets': False, 'has_anthracite':
+            False,
+        'has_dual_fuel_mineral_and_wood': False, 'has_smokeless_fuel': False, 'has_lpg': False,
+        'has_assumed': False,
+        'has_electricaire': False, 'has_assumed_for_most_rooms': False,
+        'has_underfloor_heating': False,
+        "has_electric_heat_pumps": False,
+        "has_micro-cogeneration": False
+    }
+
+
+@pytest.fixture
+def mock_main_fuel():
+    return {
+        'original_description': 'Electricity: electricity, unspecified tariff', 'fuel_type':
+            'electricity',
+        'tariff_type': 'unspecified tariff', 'is_community': False,
+        'no_individual_heating_or_community_network': False,
+        'complex_fuel_type': None
+    }
+
+
+@pytest.fixture
+def mock_mainheat_energy_eff():
+    return "Average"
+
+
 ### -------------------------
 ### PRIVATE RENTED SECTOR (PRS)
 ### -------------------------
@@ -916,7 +960,10 @@ def test_custom_eco4_scenarios(
 def test_uplift(
     mock_project_scores_matrix,
     mock_partial_scores_matrix,
-    mock_whlg_postcodes
+    mock_whlg_postcodes,
+    mock_mainheating,
+    mock_main_fuel,
+    mock_mainheat_energy_eff
 ):
     funding = Funding(
         project_scores_matrix=mock_project_scores_matrix,
@@ -939,38 +986,6 @@ def test_uplift(
         {"type": "cavity_wall_insulation", "is_innovation": False, "uplift": 0.25},
     ]
 
-    mainheating = {
-        'original_description': 'Electric storage heaters', 'has_radiators': False,
-        'has_fan_coil_units': False,
-        'has_pipes_in_screed_above_insulation': False,
-        'has_pipes_in_insulated_timber_floor': False,
-        'has_pipes_in_concrete_slab': False, 'has_boiler': False,
-        'has_air_source_heat_pump': False,
-        'has_room_heaters': False, 'has_electric_storage_heaters': True, 'has_warm_air': False,
-        'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False,
-        'has_community_scheme': False,
-        'has_ground_source_heat_pump': False, 'has_no_system_present': False,
-        'has_portable_electric_heaters': False,
-        'has_water_source_heat_pump': False, 'has_electric': True, 'has_mains_gas': False,
-        'has_wood_logs': False,
-        'has_coal': False, 'has_oil': False, 'has_wood_pellets': False, 'has_anthracite':
-            False,
-        'has_dual_fuel_mineral_and_wood': False, 'has_smokeless_fuel': False, 'has_lpg': False,
-        'has_assumed': False,
-        'has_electricaire': False, 'has_assumed_for_most_rooms': False,
-        'has_underfloor_heating': False,
-        "has_electric_heat_pumps": False,
-        "has_micro-cogeneration": False
-    }
-    main_fuel = {
-        'original_description': 'Electricity: electricity, unspecified tariff', 'fuel_type':
-            'electricity',
-        'tariff_type': 'unspecified tariff', 'is_community': False,
-        'no_individual_heating_or_community_network': False,
-        'complex_fuel_type': None
-    }
-    mainheat_energy_eff = "Good"
-
     funding.check_funding(
         measures=measures,
         starting_sap=33,
@@ -984,11 +999,46 @@ def test_uplift(
         existing_li_thickness=0,
         has_wall_insulation_recommendation=True,
         has_roof_insulation_recommendation=True,
-        mainheating=mainheating,
-        main_fuel=main_fuel,
-        mainheat_energy_eff=mainheat_energy_eff,
+        mainheating=mock_mainheating,
+        main_fuel=mock_main_fuel,
+        mainheat_energy_eff=mock_mainheat_energy_eff,
     )
 
+    assert funding.eco4_funding == 123
+    assert funding.eco4_uplift == 456
+
+
+def _dummy_funding():
+    # Matrices/whlg are unused by _map_to_pre_main_heating; pass harmless placeholders
+    return Funding(
+        tenure="Social",
+        social_cavity_abs_rate=0.0,
+        social_solid_abs_rate=0.0,
+        private_cavity_abs_rate=0.0,
+        private_solid_abs_rate=0.0,
+        project_scores_matrix=None,
+        partial_project_scores_matrix=None,
+        whlg_eligible_postcodes=set(),
+    )
+
+
+@pytest.mark.parametrize("scenario", pre_main_heating_scenarios)
+def test_map_to_pre_main_heating(scenario):
+    funding = _dummy_funding()
+
+    # Build normalized mainheating / main_fuel using your attribute processors
+    h = MainHeatAttributes(description=scenario["MAINHEAT_DESCRIPTION"]).process()
+    f = MainFuelAttributes(description=scenario["MAIN_FUEL"]).process()
+
+    result = funding._map_to_pre_main_heating(
+        mainheating=h,
+        main_fuel=f,
+        mainheat_energy_eff=scenario["MAINHEAT_ENERGY_EFF"],
+    )
+
+    assert result == scenario[
+        "expected"], f"Failed: {scenario['description']} -> {result} (expected {scenario['expected']})"
+
 
 # Large scale testing for various measures
 measures = [
@@ -1003,7 +1053,7 @@ measures = [
     {"type": "high_heat_retention_storage_heaters", "is_innovation": False, "uplift": 0},
 ]
 epc_df = pd.read_csv(
-    "/Users/khalimconn-kowlessar/Downloads/domestic-E08000025-Birmingham/certificates.csv"
+    "/Users/khalimconn-kowlessar/Downloads/domestic-E08000003-Manchester/certificates.csv"
 )
 from tqdm import tqdm
 from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes
@@ -1082,3 +1132,7 @@ x = errored_epcs[
     (errored_epcs["MAINHEAT_DESCRIPTION"] == unique_combs["MAINHEAT_DESCRIPTION"].values[i]) &
     (errored_epcs["MAIN_FUEL"] == unique_combs["MAIN_FUEL"].values[i])
     ].head(1).squeeze()
+
+most_prominent_combinations = epc_df.groupby(
+    ["MAINHEAT_ENERGY_EFF", "MAINHEAT_DESCRIPTION", "MAIN_FUEL"]
+)["LMK_KEY"].nunique().reset_index().sort_values("LMK_KEY", ascending=False).head(30).to_dict("records")
diff --git a/etl/epc_clean/tests/test_attribute_utils.py b/etl/epc_clean/tests/test_attribute_utils.py
index f4e74da3..e7ea7b4e 100644
--- a/etl/epc_clean/tests/test_attribute_utils.py
+++ b/etl/epc_clean/tests/test_attribute_utils.py
@@ -53,12 +53,11 @@ def test_process_part_value_errors():
     with pytest.raises(ValueError):
         attribute_utils.process_part(result, part, attr_list, prefix)
 
-
-# Test for no attribute matches found
-def test_process_part_no_matches():
-    result = {'has_glazing': False, 'has_glazed': False, 'has_glaze': False}
-    part = 'high performance coating'
-    attr_list = ['glazing', 'glazed', 'glaze']
-    prefix = 'has_'
-    with pytest.raises(ValueError):
-        attribute_utils.process_part(result, part, attr_list, prefix)
+# Test for no attribute matches found - we don't raise this error any more
+# def test_process_part_no_matches():
+#     result = {'has_glazing': False, 'has_glazed': False, 'has_glaze': False}
+#     part = 'high performance coating'
+#     attr_list = ['glazing', 'glazed', 'glaze']
+#     prefix = 'has_'
+#     with pytest.raises(ValueError):
+#         attribute_utils.process_part(result, part, attr_list, prefix)