fml fml

2026-07-27 23:35:01 +00:00 · 2024-03-14 17:36:09 +00:00 · 2024-03-14 17:36:09 +00:00 · 9b255029b3
commit 9b255029b3
parent bee07a253b
1 changed files with 96 additions and 45 deletions
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@ -20,6 +20,9 @@ from backend.ml_models.api import ModelApi
 from etl.solar.SolarPhotoSupply import SolarPhotoSupply
 from recommendations.recommendation_utils import calculate_cavity_age
 from etl.epc.Record import EPCRecord
+from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
+from etl.epc.DataProcessor import EPCDataProcessor
+from datetime import datetime

 EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
 ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
@ -5188,9 +5191,6 @@ def classify_loft(x):


 def fml_analysis(loader):
-    from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
-    from etl.epc.DataProcessor import EPCDataProcessor
-    from datetime import datetime
    assumed_ciga_pass_rate = 0.731
    has_bruh = ["HA7", "HA14", "HA25", "HA39", "HA16"]

@ -5216,15 +5216,20 @@ def fml_analysis(loader):
            bucket_name="retrofit-datalake-dev",
            s3_file_name=f"ha-analysis/revised/{ha_name}/epc_data.pickle"
        )
+        # We make sure we don't have duplicated. We do a super basic drop duplicates because it shouldn't be a huge
+        # issue at this point
+        epc_data = epc_data.drop_duplicates("uprn")

        # time from the inspection to now
        epc_data["epc_age"] = (datetime.now() - pd.to_datetime(epc_data["inspection-date"])).dt.days
        if "estimated" not in epc_data.columns:
-            epc_data["estimated"] = None
+            # For all after HA7, we don't use estimated surveys
+            epc_data["estimated"] = False

        fuck_this = fml.merge(
            epc_data, how="left", on="asset_list_row_id"
        )
+        fuck_this["estimated"] = fuck_this["estimated"].fillna(True)
        if fuck_this.shape[0] != fml.shape[0]:
            raise Exception("What the fuck bruv")

@ -5259,7 +5264,15 @@ def fml_analysis(loader):
            )
        insulation_thicknesses = pd.DataFrame(insulation_thicknesses)

+        before_merge_shape = fuck_this.shape[0]
        fuck_this = fuck_this.merge(insulation_thicknesses, how="left", on="uprn")
+
+        if fuck_this.shape[0] != before_merge_shape:
+            raise Exception("SOMETHING WENT WRONG")
+
+        if any(fuck_this["ECO Eligibility"].str.contains("subject to archetype")):
+            blah
+
        # clean roof insulation
        fuck_this["roof_insulation_thickness"] = fuck_this["roof_insulation_thickness"].fillna("0")
        fuck_this["roof_insulation_thickness"] = fuck_this[
@ -5283,7 +5296,7 @@ def fml_analysis(loader):
        #
        # fuck_this['age_lower_bound'] = fuck_this['construction-age-band'].apply(extract_lower_bound)

-        had_survey = fuck_this[pd.isnull(fuck_this["estimated"])]
+        had_survey = fuck_this[fuck_this["estimated"] == False]

        # proportion with a survey:
        proportion_with_survey = 100 * had_survey.shape[0] / fuck_this.shape[0]
@ -5294,27 +5307,11 @@ def fml_analysis(loader):
            had_survey["ECO Eligibility"] == "eco4"
            ]

-        # Walls:
-        # Cavity wall, as built, insulated (assumed)
-        # Cavity wall, as built, no insulation (assumed)
-        # Cavity wall, as built, partial insulation (assumed)
-
-        # Roof:
-        # Less than 100mm = high confidence
-        # Less than 270mm & EPC at least 5 years old = medium confidence
-        # Otherwise, low confidence
-
-        # SAP criteria is EPC C or below
-
-        # Pre is 54 or below
-
-        no_ciga_check_needed_with_archetype = no_ciga_check_needed[
+        no_ciga_check_needed_eligible = no_ciga_check_needed[
            (no_ciga_check_needed["walls-description"].isin(no_ciga_cavity_descriptions)) &
            (no_ciga_check_needed["roof_classiciation"].isin(["high", "medium"])) &
            (no_ciga_check_needed["current-energy-efficiency"].astype(float) <= 80)
            ]
-        if not no_ciga_check_needed_with_archetype.empty:
-            raise Exception("SORT ME OUT")

        # Characterise no CIGA check needed

@ -5327,9 +5324,20 @@ def fml_analysis(loader):
        ciga_check_passed = had_survey[
            had_survey["ECO Eligibility"] == "eco4 - passed ciga"
            ]
+        # These should be treated the same as one that have passed their ciga checks, from a detection perspective
+        ciga_check_passed_eligible = ciga_check_passed[
+            (ciga_check_passed["walls-description"].str.lower().str.contains("cavity") == True) &
+            (ciga_check_passed["roof_classiciation"].isin(["high", "medium"])) &
+            (ciga_check_passed["current-energy-efficiency"].astype(float) <= 80)
+            ]

-        if not ciga_check_passed.empty:
-            raise Exception("SORT ME BRUV")
+        if not loader.data[ha_name]["ciga_list"].empty:
+
+            proportions = loader.data[ha_name]["ciga_list"]["Guarantee"].value_counts(normalize=True)
+            ha_ciga_pass_rate = proportions[proportions.index == "No"].values[0]
+
+        else:
+            ha_ciga_pass_rate = assumed_ciga_pass_rate

        # We take just the cavity walls
        # UCL paper: https://discovery.ucl.ac.uk/id/eprint/10110371/
@ -5338,53 +5346,96 @@ def fml_analysis(loader):
        # differ between variables; floor and wall type errors occur in ~10-15% of EPCs,
        # compared with ~5% for wall insulation and glazing performance

-        ciga_check_needed_plausible = ciga_check_needed[
+        ciga_check_needed_eligible = ciga_check_needed[
            (ciga_check_needed["walls-description"].str.lower().str.contains("cavity") == True) &
            (ciga_check_needed["roof_classiciation"].isin(["high", "medium"])) &
            (ciga_check_needed["current-energy-efficiency"].astype(float) <= 80)
            ]

-        if not loader.data[ha_name]["ciga_list"].empty:
-            raise NotImplementedError("SORT OUT THE CIGA BRUV")
-        else:
-            ha_ciga_pass_rate = assumed_ciga_pass_rate
-
-        ciga_check_expectation = np.round(ciga_check_needed_plausible.shape[0] * ha_ciga_pass_rate)
-        without_ciga_expectation = no_ciga_check_needed_with_archetype.shape[0]
+        ciga_check_expectation = np.round(ciga_check_needed_eligible.shape[0] * ha_ciga_pass_rate)
+        without_ciga_expectation = no_ciga_check_needed_eligible.shape[0]
+        passed_ciga_expectation = ciga_check_passed_eligible.shape[0]

        # Need to add on the non-ciga
-        total_expectation = ciga_check_expectation + without_ciga_expectation
+        total_expectation = ciga_check_expectation + without_ciga_expectation + passed_ciga_expectation

        if proportion_with_survey < 100:
            # We estimate the rest
            without_survey_needing_ciga = fuck_this[
-                (pd.isnull(fuck_this["estimated"]) == False) &
+                (fuck_this["estimated"] == True) &
                (fuck_this["ECO Eligibility"].str.contains("subject to ciga") == True)
                ]

-            # We apply the same conversion rate as the properties with a survey
-            without_survey_without_ciga_expected = np.round(
-                without_survey_needing_ciga.shape[0] * (ciga_check_expectation / ciga_check_needed.shape[0])
-            )
+            if without_survey_needing_ciga.empty:
+                without_survey_without_ciga_expected = 0
+            else:
+                # We apply the same conversion rate as the properties with a survey
+                without_survey_without_ciga_expected = np.round(
+                    without_survey_needing_ciga.shape[0] * (ciga_check_expectation / ciga_check_needed.shape[0])
+                )

-            total_expectation += without_survey_without_ciga_expected
-
-            without_survey_without_ciga = fuck_this[
-                (pd.isnull(fuck_this["estimated"]) == False) & (fuck_this["ECO Eligibility"].isin(["eco4"]))
+            without_survey_passed_ciga = fuck_this[
+                (fuck_this["estimated"] == True) &
+                (fuck_this["ECO Eligibility"] == "eco4 - passed ciga")
                ]

-            if not without_survey_without_ciga.empty:
-                raise Exception("Estimate the rest!!")
+            if without_survey_passed_ciga.empty:
+                without_survey_passed_ciga_expected = 0
+            else:
+                # We apply the same conversion rate as the properties with a survey
+                without_survey_passed_ciga_expected = np.round(
+                    without_survey_passed_ciga.shape[0] * (passed_ciga_expectation / ciga_check_passed.shape[0])
+                )
+
+            # Finally, no ciga needed
+            without_survey_eco4 = fuck_this[
+                (fuck_this["estimated"] == True) &
+                (fuck_this["ECO Eligibility"] == "eco4")
+                ]
+
+            if without_survey_eco4.empty:
+                without_survey_eco4_expected = 0
+            else:
+                # We apply the same conversion rate as the properties with a survey
+                without_survey_eco4_expected = np.round(
+                    without_survey_eco4.shape[0] * (without_ciga_expectation / no_ciga_check_needed.shape[0])
+                )
+
+            total_expectation = (
+                total_expectation +
+                without_survey_without_ciga_expected +
+                without_survey_passed_ciga_expected +
+                without_survey_eco4_expected
+            )
+
+        surveys = loader.data[ha_name]["survey_list"]
+        sold_now = 0
+        if not surveys.empty:
+            sold_now = surveys[
+                surveys["installation_status"].str.lower().str.contains("eco4")
+            ].shape[0]
+
+        sales_since_nov = sold_now - original_figures["No. of Tech surveys complete - Eco 4"].values[0]

        results.append(
            {
                "HA Name": ha_name,
                "Original ECO4 Estimate - Remaining": original_remaining,
+                "Of which sold": sales_since_nov,
+                "Of which ECO4 Eligible - Remaining": int(total_expectation),
                "Proportion with a survey": proportion_with_survey,
-                "total_expectation": total_expectation
            }
        )

+    results_df = pd.DataFrame(results)
+
+    results_df["Delta vs November"] = 100 * (
+        results_df["Of which ECO4 Eligible - Remaining"] - results_df["Original ECO4 Estimate - Remaining"]
+    ) / results_df["Original ECO4 Estimate - Remaining"]
+
+    # TODO: Split into high and low confidence?
+    #
+

 def app():
    """