From 7969f517337865353c03c87536d0a3aa58e1ad61 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 11 Jan 2024 11:57:44 +0000
Subject: [PATCH] set up load data function for cancellation app

---
 .idea/Model.iml                      |   2 +-
 .idea/misc.xml                       |   2 +-
 etl/eligibility/ha_15_32/app.py      |   9 +-
 etl/eligibility/ha_15_32/ha16_app.py |  11 +-
 etl/eligibility/ha_15_32/ha24_app.py |  39 ++++++-
 etl/eligibility/ha_15_32/ha25_app.py | 150 +++++++++++++++------------
 etl/eligibility/ha_15_32/ha4_app.py  |  39 +++++--
 etl/eligibility/ha_15_32/ha7_app.py  |  67 ++++++++++--
 recommendations/Costs.py             |   6 +-
 9 files changed, 234 insertions(+), 91 deletions(-)
diff --git a/.idea/Model.iml b/.idea/Model.iml
index b0f9c00d..4413bb06 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 1122b380..6f308057 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/etl/eligibility/ha_15_32/app.py b/etl/eligibility/ha_15_32/app.py
index 3a0caec6..b7f44a43 100644
--- a/etl/eligibility/ha_15_32/app.py
+++ b/etl/eligibility/ha_15_32/app.py
@@ -358,9 +358,16 @@ def prepare_model_data_row(
 
     p.get_components(cleaned, photo_supply_lookup=photo_supply_lookup,
                      floor_area_decile_thresholds=floor_area_decile_thresholds)
+
+    # THIS IS TEMP AND SHOULDN'T BE HERE
+    data_to_clean = p.get_model_data()
+    if data_to_clean["NUMBER_HEATED_ROOMS"] in ['', None]:
+        data_to_clean["NUMBER_HEATED_ROOMS"] = data_to_clean["NUMBER_HABITABLE_ROOMS"]
+        p.data["number-heated-rooms"] = data_to_clean["NUMBER_HABITABLE_ROOMS"]
+
     # This is temp - this should happen after scoring
     cleaned_property_data = DataProcessor.apply_averages_cleaning(
-        data_to_clean=pd.DataFrame([dict(**p.get_model_data(), LOCAL_AUTHORITY=p.data["local-authority"])]),
+        data_to_clean=pd.DataFrame([dict(**data_to_clean, LOCAL_AUTHORITY=p.data["local-authority"])]),
         cleaning_data=cleaning_data,
         cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
     )
diff --git a/etl/eligibility/ha_15_32/ha16_app.py b/etl/eligibility/ha_15_32/ha16_app.py
index f2b80542..b7f076b1 100644
--- a/etl/eligibility/ha_15_32/ha16_app.py
+++ b/etl/eligibility/ha_15_32/ha16_app.py
@@ -18,6 +18,7 @@ from etl.epc.settings import COLUMNS_TO_MERGE_ON
 from backend.ml_models.api import ModelApi
 from etl.solar.SolarPhotoSupply import SolarPhotoSupply
 from recommendations.recommendation_utils import calculate_cavity_age
+from recommendation_utils import convert_thickness_to_numeric
 
 import re
 
@@ -484,9 +485,6 @@ def analyse_results(results_df, data, survey_list):
         how="left", on="survey_key"
     )
 
-    from recommendation_utils import convert_thickness_to_numeric
-
-    analysis_data["roof_insulation_thickness"] = analysis_data["roof_insulation_thickness"].fillna(None)
     analysis_data["roof_insulation_thickness"] = np.where(
         pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
     )
@@ -497,13 +495,12 @@ def analyse_results(results_df, data, survey_list):
     warmfront_sold_eco4 = analysis_data[
         (analysis_data["warmfront_identified"] == True) & (
             analysis_data["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"]))
-        ]
+        ]  # 1407
 
     warmfront_sold_gbis = analysis_data[
         (analysis_data["warmfront_identified"] == True) & (
             analysis_data["funding_scheme"].isin(["ECO4 GBIS (ECO+)"]))
         ]
-    # 1407
 
     ideal_eco4_warmfront_not_sold = analysis_data[
         (analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
@@ -519,7 +516,7 @@ def analyse_results(results_df, data, survey_list):
     underperforming_cavities = analysis_data[
         (analysis_data["eco4_message"] == "Failed due to full cavity - check cavity age") & (
             analysis_data["cavity_age"] > 10 * 365
-        )
+        ) & (analysis_data["roof_insulation_thickness_numeric"] <= 100)
         ]
 
     identified_gbis_not_sold = analysis_data[
@@ -643,7 +640,7 @@ def app():
 
     # Read pickle
     # import pickle
-    # with open("ha16_8_jan_2.pickle", "rb") as f:
+    # with open("ha16_10_jan.pickle", "rb") as f:
     #     saved = pickle.load(f)
     # scoring_data = saved["scoring_data"]
     # results_df = saved["results"]
diff --git a/etl/eligibility/ha_15_32/ha24_app.py b/etl/eligibility/ha_15_32/ha24_app.py
index 49a5abb1..dc4df018 100644
--- a/etl/eligibility/ha_15_32/ha24_app.py
+++ b/etl/eligibility/ha_15_32/ha24_app.py
@@ -17,6 +17,7 @@ from etl.epc.settings import COLUMNS_TO_MERGE_ON
 from backend.ml_models.api import ModelApi
 from etl.solar.SolarPhotoSupply import SolarPhotoSupply
 from recommendations.recommendation_utils import calculate_cavity_age
+from recommendation_utils import convert_thickness_to_numeric
 
 EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
 ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
@@ -393,6 +394,42 @@ def analyse_results(results_df, data, survey_list):
         how="left", on="survey_key"
     )
 
+    # NEW
+
+    analysis_data["roof_insulation_thickness"] = np.where(
+        pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
+    )
+    analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
+        lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
+    )
+
+    warmfront_sold_eco4 = analysis_data[
+        (analysis_data["warmfront_identified"] == True) & (
+            analysis_data["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"]))
+        ]
+
+    warmfront_sold_gbis = analysis_data[
+        (analysis_data["warmfront_identified"] == True) & (
+            analysis_data["funding_scheme"].isin(["ECO4 GBIS (ECO+)"]))
+        ]
+    # 1407
+
+    additional_eco4_warmfront_not_sold = analysis_data[
+        (analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
+            analysis_data["roof_insulation_thickness_numeric"] <= 100)
+        ]
+
+    additional_gbis_warmfront_not_sold = analysis_data[
+        (analysis_data["gbis_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
+            ~analysis_data["row_id"].isin(additional_eco4_warmfront_not_sold["row_id"].values)
+        )
+        ]
+
+    additional_gbis_warmfront_not_sold["walls"].value_counts()
+    analysis_data["walls"].value_counts()
+
+    # END NEW
+
     all_identified_eco = analysis_data[
         (analysis_data["warmfront_identified"] & analysis_data["funding_scheme"].isin(
             ["ECO4 A/W"])) |
@@ -480,7 +517,7 @@ def app():
 
     # Read in pickle
     # import pickle
-    # with open("ha24_8_jan.pickle", "rb") as f:
+    # with open("ha24_10_jan.pickle", "rb") as f:
     #     saved = pickle.load(f)
     # scoring_data = saved["scoring_data"]
     # results_df = saved["results"]
diff --git a/etl/eligibility/ha_15_32/ha25_app.py b/etl/eligibility/ha_15_32/ha25_app.py
index 541f77d3..c67c6b6b 100644
--- a/etl/eligibility/ha_15_32/ha25_app.py
+++ b/etl/eligibility/ha_15_32/ha25_app.py
@@ -17,6 +17,8 @@ from etl.epc.DataProcessor import DataProcessor
 from etl.epc.settings import COLUMNS_TO_MERGE_ON
 from backend.ml_models.api import ModelApi
 from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+from recommendations.recommendation_utils import calculate_cavity_age
+from recommendation_utils import convert_thickness_to_numeric
 
 import re
 
@@ -341,7 +343,7 @@ def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup,
         "Guest Room": {"property-type": None, "built-form": None}
     }
 
-    for _, property_meta in tqdm(data.iterrows(), total=len(data)):
+    for _, property_meta in tqdm(data, total=len(data)):
 
         searcher = SearchEpc(
             address1=property_meta["HouseNo"],
@@ -368,22 +370,35 @@ def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup,
         older_epcs = searcher.older_epcs
         full_sap_epc = searcher.full_sap_epc
         # We also want to get the penultimate epc
-        penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
-        if not penultimate_epc:
-            penultimate_epc = newest_epc
+        # penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
+        # if not penultimate_epc:
+        #     penultimate_epc = newest_epc
 
         eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
         eligibility.check_gbis_warmfront()
         eligibility.check_eco4_warmfront()
 
-        if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront):
-            eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
-            eligibility.check_gbis_warmfront()
-            eligibility.check_eco4_warmfront()
-            # If this is the case, we need to update the older epcs
-            # We don't update just to make data cleaning easier
-            if penultimate_epc.get("estimated") is None:
-                older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
+        # if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront):
+        #     eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
+        #     eligibility.check_gbis_warmfront()
+        #     eligibility.check_eco4_warmfront()
+        #     # If this is the case, we need to update the older epcs
+        #     # We don't update just to make data cleaning easier
+        #     if penultimate_epc.get("estimated") is None:
+        #         older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
+
+        # If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity
+
+        # Loft MUST be suitable
+        cavity_age = None
+        if (
+            eligibility.walls["is_cavity_wall"] and
+            eligibility.walls["is_filled_cavity"] and
+            eligibility.loft["suitability"] and
+            eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
+        ):
+            # We check the age of the cavity and if it's particularly old, we flag it
+            cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
 
         # Full checks
         eligibility.check_gbis()
@@ -396,6 +411,15 @@ def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup,
             if eligibility.epc["construction-age-band"] in ["", None]:
                 eligibility.epc["construction-age-band"] = map_year_to_age_band(property_meta["Build Yr"])
 
+            # This is not the right place to do this but this is temp
+            if eligibility.epc["extension-count"] in ["", None]:
+                eligibility.epc["extension-count"] = 0
+
+            # Not in the right place but temp
+            if eligibility.epc["built-form"] in ["", None]:
+                if not older_epcs:
+                    eligibility.epc["built-form"] = "Mid-Terrace"
+
             scoring_dictionary = prepare_model_data_row(
                 property_id=property_meta["row_id"],
                 modelling_epc=eligibility.epc,
@@ -431,6 +455,9 @@ def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup,
                 "heating": eligibility.epc["mainheat-description"],
                 "tenure": eligibility.tenure,
                 "date_epc": eligibility.epc["lodgement-date"],
+                "cavity_age": cavity_age,
+                **eligibility.walls,
+                **eligibility.roof,
             }
         )
 
@@ -657,6 +684,8 @@ def get_epc_data_for_lost_surveys(
                 "heating": eligibility.epc["mainheat-description"],
                 "tenure": eligibility.tenure,
                 "date_epc": eligibility.epc["lodgement-date"],
+                **eligibility.walls,
+                **eligibility.roof,
             }
         )
 
@@ -758,58 +787,51 @@ def analyse_results(results_df, data, eco4_prospects_survey_list):
         results_df, how="left", on="row_id"
     )
 
-    warmfront_identified = analysis_data[analysis_data["warmfront_identified"]]
+    # NEW
+    analysis_data["roof_insulation_thickness"] = np.where(
+        pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
+    )
+    analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
+        lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
+    )
 
-    identified_eco = analysis_data[analysis_data["eco4_eligible"] == True]
-    identified_eco = identified_eco[identified_eco["eco4_message"] == "subject to post retrofit sap"]
+    warmfront_identified = analysis_data[
+        (analysis_data["warmfront_identified"] == True)
+    ]  # 2204
 
-    identified_gbis = analysis_data[
-        (analysis_data["gbis_eligible"] == True) & (analysis_data["eco4_eligible"] == False)
+    # Because we don't know which property is for which scheme, we'll just look at what we found
+    ideal_eco4 = analysis_data[
+        (analysis_data["eco4_eligible"] == True) &
+        (analysis_data["roof_insulation_thickness_numeric"] <= 100) &
+        (analysis_data["sap"] <= 54)
+        ]  # 335
+
+    gbis = analysis_data[
+        (analysis_data["gbis_eligible"] == True) &
+        ~analysis_data["row_id"].isin(ideal_eco4["row_id"].values)
         ]
 
-    # Take just unfilled cavities and remove filled potentials
-    identified_gbis["walls"].value_counts()
-
-    identified_gbis["walls"].value_counts()
-
-    # Of the ECO jobs, what proportion to we get right
-
-    success_rate = (warmfront_identified["eco4_eligible"] | warmfront_identified["gbis_eligible"]).sum() / \
-                   warmfront_identified.shape[
-                       0]
-
-    # No gbis for this
-    # gbis_success_rate = warmfront_identified_gbis["gbis_eligible"].sum() / warmfront_identified_gbis.shape[0]
-
-    # Additional identified
-    additional_identified_eco = analysis_data[
-        (analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False)
-        ]
-
-    additional_identified_eco["eligibility_classification"].value_counts()
-
-    additional_identified_gbis = analysis_data[
-        (analysis_data["gbis_eligible"] == True) & (analysis_data["eco4_eligible"] == False) & (
-            analysis_data["warmfront_identified"] == False
-        )
-        ].shape[0]
-
-    # Future
-    additional_identified_eco_future = analysis_data[
-        (analysis_data["eco4_eligible_future"] == True) & (analysis_data["warmfront_identified"] == False)
-        ].shape[0]
-    additional_identified_gbis_future = analysis_data[
-        (analysis_data["gbis_eligible_future"] == True) & (analysis_data["eco4_eligible_future"] == False) & (
-            analysis_data["warmfront_identified"] == False
-        )
-        ].shape[0]
+    ideal_eco4 = ideal_eco4[ideal_eco4["sap"] <= 54]
 
 
 def analyse_lost_surveys(results_df):
-    identified_eco = results_df[results_df["eco4_eligible"] == True]
-    # 59 for lost surveys
-    identified_gbis = results_df[results_df["gbis_eligible"] == True]
-    # 107
+    results_df["roof_insulation_thickness"] = np.where(
+        pd.isnull(results_df["roof_insulation_thickness"]), None, results_df["roof_insulation_thickness"]
+    )
+    results_df["roof_insulation_thickness_numeric"] = results_df["roof_insulation_thickness"].apply(
+        lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
+    )
+
+    ideal_eco4 = results_df[
+        (results_df["eco4_eligible"] == True) &
+        (results_df["roof_insulation_thickness_numeric"] <= 100) &
+        (results_df["sap"] <= 54)
+        ]  # 25
+
+    gbis = results_df[
+        (results_df["gbis_eligible"] == True) &
+        ~results_df["row_id"].isin(ideal_eco4["row_id"].values)
+        ]  # 82
 
 
 def app():
@@ -837,7 +859,7 @@ def app():
     # Pickle the outputs
     # Old data was ha25.pickle
     # import pickle
-    # with open("ha25_9_jan.pickle", "wb") as f:
+    # with open("ha25_10_jan.pickle", "wb") as f:
     #     pickle.dump(
     #         {
     #             "results_df": results_df,
@@ -848,9 +870,9 @@ def app():
     #     )
 
     # Load in pickle
-    # import pickle
-    # with open("ha25_9_jan.pickle", "rb") as f:
-    #     saved = pickle.load(f)
-    # results_df = saved["results_df"]
-    # scoring_data = saved["scoring_data"]
-    # nodata = saved["nodata"]
+    import pickle
+    with open("ha25_10_jan.pickle", "rb") as f:
+        saved = pickle.load(f)
+    results_df = saved["results_df"]
+    scoring_data = saved["scoring_data"]
+    nodata = saved["nodata"]
diff --git a/etl/eligibility/ha_15_32/ha4_app.py b/etl/eligibility/ha_15_32/ha4_app.py
index 92b03539..d2702dd8 100644
--- a/etl/eligibility/ha_15_32/ha4_app.py
+++ b/etl/eligibility/ha_15_32/ha4_app.py
@@ -1,3 +1,4 @@
+import os
 import msgpack
 from pathlib import Path
 from datetime import datetime
@@ -6,7 +7,7 @@ import pandas as pd
 from utils.s3 import read_from_s3
 from utils.logger import setup_logger
 from dotenv import load_dotenv
-from backend.app.utils import read_parquet_from_s3
+from utils.s3 import read_dataframe_from_s3_parquet
 from tqdm import tqdm
 from backend.SearchEpc import SearchEpc
 from etl.eligibility.Eligibility import Eligibility
@@ -14,9 +15,13 @@ from etl.eligibility.ha_15_32.app import prepare_model_data_row
 from etl.epc.DataProcessor import DataProcessor
 from etl.epc.settings import COLUMNS_TO_MERGE_ON
 from backend.ml_models.api import ModelApi
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+from recommendations.recommendation_utils import calculate_cavity_age
+from recommendation_utils import convert_thickness_to_numeric
 
 import re
 
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
 ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
 
 logger = setup_logger()
@@ -52,7 +57,7 @@ def standardise_ha_4(data):
     return data
 
 
-def get_ha_4_data(data, cleaned, cleaning_data, created_at):
+def get_ha_4_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
     scoring_data = []
     results = []
     nodata = []
@@ -62,19 +67,33 @@ def get_ha_4_data(data, cleaned, cleaning_data, created_at):
         searcher = SearchEpc(
             address1=property_meta["Address Line 1"],
             postcode=property_meta["Post Code"],
-            size=1000
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key=None,
+            property_type=property_type_lookup.get(house["Archetype"]),
         )
 
-        searcher.search()
+        searcher.find_property(skip_os=True)
 
-        if searcher.data is None:
+        if searcher.newest_epc is None:
             searcher = SearchEpc(
                 address1=property_meta["Location Name"],
                 postcode=property_meta["Post Code"],
-                size=1000
+                auth_token=EPC_AUTH_TOKEN,
+                os_api_key=None,
+                property_type=property_type_lookup.get(house["Archetype"]),
             )
             searcher.search()
 
+        if searcher.newest_epc is None:
+            nodata.append(house["row_id"])
+            continue
+
+        newest_epc = searcher.newest_epc
+        older_epcs = searcher.older_epcs
+        full_sap_epc = searcher.full_sap_epc
+
+        searcher.search()
+
         if searcher.data is None:
             nodata.append(property_meta.to_dict())
             continue
@@ -273,17 +292,21 @@ def app():
     )
     cleaned = msgpack.unpackb(cleaned, raw=False)
 
-    cleaning_data = read_parquet_from_s3(
+    cleaning_data = read_dataframe_from_s3_parquet(
         bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
     )
 
     created_at = datetime.now().isoformat()
 
+    photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
     results_df, scoring_data, nodata = get_ha_4_data(
         data=data,
         cleaned=cleaned,
         cleaning_data=cleaning_data,
-        created_at=created_at
+        created_at=created_at,
+        photo_supply_lookup=photo_supply_lookup,
+        floor_area_decile_thresholds=floor_area_decile_thresholds
     )
 
     # Store the data locally as a pickle
diff --git a/etl/eligibility/ha_15_32/ha7_app.py b/etl/eligibility/ha_15_32/ha7_app.py
index 544f614d..54d0dbb0 100644
--- a/etl/eligibility/ha_15_32/ha7_app.py
+++ b/etl/eligibility/ha_15_32/ha7_app.py
@@ -17,6 +17,8 @@ from etl.epc.DataProcessor import DataProcessor
 from etl.epc.settings import COLUMNS_TO_MERGE_ON
 from backend.ml_models.api import ModelApi
 from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+from recommendations.recommendation_utils import calculate_cavity_age
+from recommendation_utils import convert_thickness_to_numeric
 
 ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
 
@@ -112,6 +114,19 @@ def get_ha7_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup,
         eligibility.check_gbis_warmfront()
         eligibility.check_eco4_warmfront()
 
+        # If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity
+
+        # Loft MUST be suitable
+        cavity_age = None
+        if (
+            eligibility.walls["is_cavity_wall"] and
+            eligibility.walls["is_filled_cavity"] and
+            eligibility.loft["suitability"] and
+            eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
+        ):
+            # We check the age of the cavity and if it's particularly old, we flag it
+            cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
+
         # If the house is not identified, we do a full gbis and eco4 check
         eligibility.check_gbis()
         eligibility.check_eco4()
@@ -151,6 +166,9 @@ def get_ha7_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup,
                 "tenure": eligibility.tenure,
                 "date_epc": eligibility.epc["lodgement-date"],
                 **newest_epc,
+                "cavity_age": cavity_age,
+                **eligibility.walls,
+                **eligibility.roof,
             }
         )
 
@@ -250,21 +268,56 @@ def get_ha7_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup,
 
 
 def analyse_ha_7(results_df, data):
-    df = results_df.merge(
+    analysis_data = results_df.merge(
         data[["row_id", "row_code", "Property Type", "Construction Year Band"]], how="left", on="row_id"
     )
-    warmfront_identification = df["row_code"].value_counts()
-    warmfront_identified = df[df["row_code"] == "potential ECO4"]
+
+    # NEW
+
+    analysis_data["roof_insulation_thickness"] = np.where(
+        pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
+    )
+    analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
+        lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
+    )
+
+    ideal_eco4 = analysis_data[
+        (analysis_data["eco4_eligible"] == True) & (
+            analysis_data["roof_insulation_thickness_numeric"] <= 100)
+        ]
+
+    secondary_eco4_warmfront_not_sold = analysis_data[
+        (analysis_data["eco4_eligible"] == True) & (
+            analysis_data["roof_insulation_thickness_numeric"] > 100)
+        ]
+
+    # underperforming cavities
+    underperforming_cavities = analysis_data[
+        (analysis_data["eco4_message"] == "Failed due to full cavity - check cavity age") & (
+            analysis_data["cavity_age"] > 9 * 365
+        ) & (analysis_data["roof_insulation_thickness_numeric"] <= 100)
+        ]
+
+    identified_gbis_not_sold = analysis_data[
+        (analysis_data["gbis_eligible"] == True) & (
+            analysis_data["eco4_eligible"] == False
+        )
+        ]
+
+    # END NEW
+
+    warmfront_identification = analysis_data["row_code"].value_counts()
+    warmfront_identified = analysis_data[analysis_data["row_code"] == "potential ECO4"]
     warmfront_identified["walls"].value_counts(normalize=True)
 
-    df["Construction Year Band"].value_counts(normalize=True)
+    analysis_data["Construction Year Band"].value_counts(normalize=True)
 
     # Number of days from today
 
     days_to_today = (datetime.now() - pd.to_datetime(warmfront_identified["date_epc"])).dt.days
     days_to_today.mean()
 
-    property_types = df["Property Type"].value_counts()
+    property_types = analysis_data["Property Type"].value_counts()
 
     n_identified = (results_df["gbis_eligible"] | results_df["eco4_eligible"]).sum()
 
@@ -312,12 +365,12 @@ def app():
 
     # Pickle results
     # import pickle
-    # with open("ha7_results.pkl", "wb") as f:
+    # with open("ha7_results_jan_10.pkl", "wb") as f:
     #     pickle.dump({"results_df": results_df, "scoring_data": scoring_data, "nodata": nodata}, f)
 
     # Read in the old data
     # import pickle
-    # with open("ha7_results.pkl", "rb") as f:
+    # with open("ha7_results_jan_10.pkl", "rb") as f:
     #     old_data = pickle.load(f)
     # results_df = old_data["results_df"]
     # scoring_data = old_data["scoring_data"]
diff --git a/recommendations/Costs.py b/recommendations/Costs.py
index 8dbb9cc9..e2b26448 100644
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@@ -176,12 +176,16 @@ class Costs:
         """
         material_cost_per_m2 = material["material_cost"]
 
+        # We inflate material costs due to recent price increases
+        material_cost_per_m2 = material_cost_per_m2 * 1.5
+
         base_material_cost = material_cost_per_m2 * floor_area
         labour_cost = material["labour_cost"] * floor_area * self.labour_adjustment_factor
 
         subtotal_before_profit = base_material_cost + labour_cost
 
-        contingency_cost = subtotal_before_profit * self.CONTINGENCY
+        # We use high risk contingency because of the possibility of access issues and clearing existing insulation
+        contingency_cost = subtotal_before_profit * self.HIGH_RISK_CONTINGENCY
         preliminaries_cost = subtotal_before_profit * self.PRELIMINARIES
         profit_cost = subtotal_before_profit * self.PROFIT_MARGIN