debugging calico epc search to handle the strict blocks

2026-07-27 23:35:01 +00:00 · 2025-06-19 22:48:25 +01:00 · 2025-06-19 22:48:25 +01:00 · 980f439f49
commit 980f439f49
parent 383a4852e2
7 changed files with 236 additions and 56 deletions
--- a/asset_list/AssetList.py
+++ b/asset_list/AssetList.py
@ -298,7 +298,7 @@ class AssetList:
        "Has the property been re-walled?", "Is the property tile hung?", "Does the property have a render?",
        "Does the property have cladding?", "Gable Wall Obstructions",
        "Does the property have foliage that needs removal?",
-        "Potential unsafe environment", "Date of Inspection"
+        "Potential unsafe environment", "Date of Inspection", "Borescoped?"
    ]

    NON_INTRUSIVES_ELIGIBILITY_COLUMN = "Eligibility (Red/Yellow/Green)"
@ -354,6 +354,7 @@ class AssetList:
    # Work type prefixes:
    # Empties
    EMPTY_CAVITY_NON_INTRUSIVE = "Non-Intrusive Data Shows Empty Cavity"
+    EMPTY_CAVITY_NON_INTRUSIVE_YEAR = 'Non-Intrusive Data Shows Empty Cavity, built after 2002'
    EPC_EMPTY_INSPECTIONS_RETRO_DRILLED = "EPC Shows Empty Cavity, inspections show retro drilled"
    EPC_EMPTY_INSPECTIONS_FILLED = "EPC Shows Empty Cavity, inspections show filled or other"
    EPC_EMPTY_INSPECTIONS_FILLED_AT_BUILD = "EPC Shows Empty Cavity, inspections show filled at build"
@ -1280,7 +1281,8 @@ class AssetList:
            )

            self.standardised_asset_list["SAP Category"] = np.where(
-                pd.isnull(self.standardised_asset_list[self.STANDARD_SAP]),
+                pd.isnull(self.standardised_asset_list[self.STANDARD_SAP]) &
+                pd.isnull(self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]]),
                "SAP Unknown",
                self.standardised_asset_list["SAP Category"]
            )
@ -1745,8 +1747,6 @@ class AssetList:
            self.standardised_asset_list["solar_epc_loft_needs_topup"]
        )

-        z = self.standardised_asset_list[self.standardised_asset_list["landlord_property_id"] == "DW150120029"]
-
        self.standardised_asset_list["solar_eligible"] = (
            # Property isn't a flag
            not_a_flat &
@ -2035,14 +2035,15 @@ class AssetList:
        self.standardised_asset_list["cavity_reason"] = np.where(
            (self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "flat") &
            (self.standardised_asset_list["SAP Category"] == "SAP Rating 76 or more"),
-            None,
+            self.standardised_asset_list["cavity_reason"] + " - (unlikely to quality)",
            self.standardised_asset_list["cavity_reason"]
        )
+
        # Split cavity_reason on the colon and check if the first part is equal to one of the two options above
        # that indicates empties
        self.standardised_asset_list["identified_empty_cavity"] = (
            self.standardised_asset_list["cavity_reason"].str.split(":").str[0].isin(
-                [self.EMPTY_CAVITY_NON_INTRUSIVE, self.EPC_EMPTY]
+                [self.EMPTY_CAVITY_NON_INTRUSIVE, self.EMPTY_CAVITY_NON_INTRUSIVE_YEAR, self.EPC_EMPTY]
            )
        )

@ -2078,6 +2079,7 @@ class AssetList:
        NUM_RE = re.compile(r'\b\d+[A-Za-z]?\b')  # captures 12, 12A, etc.

        expanded_rows = []
+
        for _, row in blocks.iterrows():
            addr = str(row[self.STANDARD_ADDRESS_1])

@ -2194,16 +2196,9 @@ class AssetList:
        # if we have any blocks, where work is eligible, we flag them now
        # These blocks may be refecence via the landlord_block_reference field, or by property types being
        # blocks of flats
-        has_landlord_block_reference = self.landlord_block_reference is not None
+        has_landlord_block_reference = sum(~pd.isnull(self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE]))

        if has_landlord_block_reference:
-
-            # # If we blocks of flats, without a landlord block reference, we create this
-            # self.fill_landlord_block_reference(has_blocks_of_flats)
-            #
-            # # If we have blocks of flats, we split these out into individual units
-            # self.split_blocks()
-
            # For blocks that have a 50% allocation, we create project codes
            self.block_analysis()
            # find any block refs with more than 50% emptires
@ -2265,13 +2260,18 @@ class AssetList:

        block_analysis = []
        for block_reference, group in self.standardised_asset_list.groupby(self.STANDARD_BLOCK_REFERENCE):
+
            cavity_breakdown = group["cavity_reason"].fillna("No Eligibility").value_counts(normalize=True) * 100

            if all(cavity_breakdown.index == "No Eligibility"):
                continue

            # We check the % of empty vs not empty as right now, we're focused on empty
-            n_empties = ((group["identified_empty_cavity"] == True) & (~pd.isnull(group["cavity_reason"]))).sum()
+            n_empties = (
+                (group["identified_empty_cavity"] == True) &
+                (~pd.isnull(group["cavity_reason"])) &
+                (~group["cavity_reason"].str.contains("(unlikely to quality)", case=False, na=False, regex=False))
+            ).sum()

            works = group["hubspot_status"]
            above_threshold = works.map(LABEL_TO_ENUM.get).dropna()
@ -2293,6 +2293,36 @@ class AssetList:
        block_analysis = block_analysis.fillna(0)

        # We flag which properties are eligible for works. We need at least 50%
+        block_analysis["Eligible for Works"] = (
+            block_analysis["Percentage of Empties"] >= 0.50
+        )
+        block_analysis = block_analysis.sort_values("Percentage of Empties", ascending=False)
+
+        # For properties that are NOT eligible, we should update the cavity reason
+        ineligible_blocks = block_analysis[
+            ~block_analysis["Eligible for Works"]
+        ]["Block Reference"].values
+
+        eligible_blocks = block_analysis[
+            block_analysis["Eligible for Works"]
+        ]["Block Reference"].values
+
+        self.standardised_asset_list["cavity_reason"] = np.where(
+            self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE].isin(ineligible_blocks),
+            self.standardised_asset_list["cavity_reason"] + " (Flat in block with less than 50% eligible)",
+            self.standardised_asset_list["cavity_reason"]
+        )
+
+        # if the property is in a block of flats that eligible, but the property itself is not eligible, we flag this
+        # The criteria is:
+        # =The property should be in a block of flats
+
+        self.standardised_asset_list["cavity_reason"] = np.where(
+            self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE].isin(eligible_blocks),
+            self.standardised_asset_list["cavity_reason"]
+            + " " + "(Flat in block with more than 50% eligible, but not eligible itself)",
+            self.standardised_asset_list["cavity_reason"]
+        )

        self.block_analysis_df = block_analysis

@ -2434,13 +2464,13 @@ class AssetList:
        )

        # Format the two date columns
-        programme_data["survey_week"] = pd.to_datetime(programme_data["survey_week"], errors="coerce")
+        programme_data["survey_date"] = pd.to_datetime(programme_data["survey_date"], errors="coerce")
        programme_data[self.EPC_API_DATA_NAMES["inspection-date"]] = pd.to_datetime(
            programme_data[self.EPC_API_DATA_NAMES["inspection-date"]],
            errors="coerce"
        )
        # Convert to dd/mm/yyyy format
-        programme_data["survey_week"] = programme_data["survey_week"].dt.strftime("%d/%m/%Y")
+        programme_data["survey_date"] = programme_data["survey_date"].dt.strftime("%d/%m/%Y")
        programme_data[self.EPC_API_DATA_NAMES["inspection-date"]] = (
            programme_data[self.EPC_API_DATA_NAMES["inspection-date"]].dt.strftime("%d/%m/%Y")
        )
@ -2457,12 +2487,14 @@ class AssetList:
            ready_to_be_scheduled = (
                (
                    programme_data["hubspot_status"] == hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label
-                ) & (~pd.isnull(programme_data["survey_week"]) & ~pd.isnull(programme_data["surveyor"]))
+                ) & (~pd.isnull(programme_data["survey_date"]))
            )
-            completed_works = (
-                programme_data["hubspot_status"] != hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label
-            )
-            programme_data = programme_data[ready_to_be_scheduled | completed_works]
+            # completed_works = (
+            #     (programme_data["hubspot_status"] !=
+            #     hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label) &
+            #     (~pd.isnull(programme_data["hubspot_status"]))
+            # )
+            programme_data = programme_data[ready_to_be_scheduled]

        # Merge on the contact details
        programme_data = programme_data.merge(
@ -2505,11 +2537,13 @@ class AssetList:
                self.CRM_HISTORICAL_CAVITY_PRODUCT["name"]
            )
        else:
+            # We shouldn't have any missing products
            programme_data = programme_data[
-                ~pd.isnull(programme_data["domna_product"]) &
-                ~pd.isnull(programme_data["surveyor"]) &
-                ~pd.isnull(programme_data["survey_week"])
-                ]
+                ~pd.isnull(programme_data["survey_date"])
+            ]
+
+            if pd.isnull(programme_data["domna_product"]).sum():
+                raise ValueError("Missing products")
            programme_data = programme_data.drop(columns=["solar_product", "cavity_product"])

        product_df = (
@ -2686,7 +2720,7 @@ class AssetList:
            'Last EPC: Room Height <LISTING last_epc__room_height>': self.EPC_API_DATA_NAMES["floor-height"],
            'Last EPC: Age Band <LISTING last_epc__age_band>': self.EPC_API_DATA_NAMES["construction-age-band"],
            'Pipeline <DEAL pipeline>': 'Pipeline <DEAL pipeline>',
-            'Expected Commencement Date <DEAL expected_commencement_date>': "survey_week",
+            'Expected Commencement Date <DEAL expected_commencement_date>': "survey_date",
            'Deal Name <DEAL dealname>': "dealname",  # Need to create this,
            'Product ID <LINE_ITEM hs_product_id>': 'Product ID <LINE_ITEM hs_product_id>',
            'Name <LINE_ITEM name>': 'Name <LINE_ITEM name>',
@ -2724,7 +2758,11 @@ class AssetList:
        # The listing owner email is the same as the surveyor email (deal owner), so they can see the listing
        programme_data['Listing Owner Email <LISTING hubspot_owner_id>'] = programme_data['Deal Owner']
        programme_data['Amount <DEAL amount>'] = 0
-        programme_data["Deal Owner"] = programme_data["Deal Owner"].str.lower()
+        programme_data["Deal Owner"] = np.where(
+            ~pd.isnull(programme_data["Deal Owner"]),
+            programme_data["Deal Owner"].astype(str).str.lower(),
+            programme_data["Deal Owner"]
+        )

        # We make sure we have all of the columns that we need
        missed_columns = [c for c in hubspot_config.CRM_UPLOAD_COLUMNS if c not in programme_data.columns]
--- a/asset_list/app.py
+++ b/asset_list/app.py
@ -553,13 +553,9 @@ def app():
    )

    asset_list.merge_data(epc_df)
-
    asset_list.extract_attributes()
-
    asset_list.identify_worktypes()

-    pprint(asset_list.work_type_figures)
-
    # We now flag the status of the property
    asset_list.label_property_status()
    asset_list.analyse_geographies()
--- a/asset_list/hubspot/prepare_for_hubspot.py
+++ b/asset_list/hubspot/prepare_for_hubspot.py
@ -19,19 +19,19 @@ def app():

    # inputs:
    reconcile_programme = False  # If True, the hubspot upload will include all properties with a project code
-    customer_domain = "https://sandwell.gov.uk"
+    customer_domain = "https://livewest.co.uk"
    installer_name = "J & J CRUMP"
    asset_list_filepath = (
-        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/Sandwell BC - Full Asset List MAIN - "
-        "Standardised.xlsx"
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Hubspot/Livewest South-West - Standardised "
+        "V2.xlsx"
    )
-    asset_list_sheet_name = "Proposed Program"
-    asset_list_header = 1
+    asset_list_sheet_name = "Standardised Asset List"
+    asset_list_header = 0

    contact_details_filepath = (
-        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/Sandwell Contact Details.xlsx"
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Hubspot/23.06 Livewest Contacts.xlsx"
    )
-    contacts_sheet_name = "Sheet1"
+    contacts_sheet_name = "Contact Information"
    contacts_landlord_property_id = "landlord_property_id"
    contacts_phone_number_column = "phone_number"
    contacts_secondary_phone_number_column = "secondary_phone_number"
--- a/asset_list/utils.py
+++ b/asset_list/utils.py
@ -79,7 +79,13 @@ def get_data(
                uprn=uprn
            )
            # Force the skipping of estimating the EPC
-            searcher.ordnance_survey_client.property_type = None
+            # We check if the property was split
+            if home["is_expended_block"]:
+                searcher.ordnance_survey_client.property_type = "Flat"
+                searcher.property_type = "Flat"
+                searcher.set_strict_property_type_search()
+            else:
+                searcher.ordnance_survey_client.property_type = None
            searcher.ordnance_survey_client.built_form = None

            searcher.find_property(skip_os=True)
--- a/backend/Funding.py
+++ b/backend/Funding.py
@ -5,7 +5,7 @@ from typing import List
 from backend.app.plan.schemas import HousingType


-class Funding:
+class FundingOld:
    """
    Given a property, this class identifies if the home is possibly eligible for funding under
    the various funding schemes. It will also calculate the expected amount of funding available
@ -413,13 +413,32 @@ class Funding:
        self.whlg()


-class Funding2:
+class Funding:
    """
    New class to handle funding calculation
    """

-    def __init__(self, tenure: HousingType):
+    def __init__(
+        self,
+        tenure: HousingType,
+        social_cavity_abs_rate: float,
+        social_solid_abs_rate: float,
+        private_cavity_abs_rate: float,
+        private_solid_abs_rate: float,
+        project_scores_matrix,
+        whlg_eligible_postcodes
+    ):
        self.tenure = tenure
+        self.social_cavity_abs_rate = social_cavity_abs_rate
+        self.social_solid_abs_rate = social_solid_abs_rate
+        self.private_cavity_abs_rate = private_cavity_abs_rate
+        self.private_solid_abs_rate = private_solid_abs_rate
+
+        self.starting_sap_band = None
+        self.ending_sap_band = None
+        self.floor_area_band = None
+        self.project_scores_matrix = project_scores_matrix
+        self.whlg_eligible_postcodes = whlg_eligible_postcodes

    @staticmethod
    def get_sap_band(sap_score_number):
@ -446,8 +465,22 @@ class Funding2:

        return None

+    @staticmethod
+    def get_floor_area_band(floor_area):
+        if floor_area <= 72:
+            return "0-72"
+
+        if floor_area <= 97:
+            return "73-97"
+
+        if floor_area <= 199:
+            return "98-199"
+
+        return "200"
+
+    @staticmethod
    def eco4_prs_eligibility(
-        self, starting_sap: int, measures: List, mainheat_description: str, heating_control_description: str
+        starting_sap: int, measures: List, mainheat_description: str, heating_control_description: str
    ):
        """
        Handles the eligibility criteria for private rental properties under eco
@ -481,31 +514,53 @@ class Funding2:

        return False

+    def calculate_full_project_abs(self):
+
+        # Filter the project scores matrix
+        data = self.project_scores_matrix[
+            (self.project_scores_matrix["Floor Area Segment"] == self.floor_area_band) &
+            (self.project_scores_matrix["Starting Band"] == self.starting_sap_band) &
+            (self.project_scores_matrix["Finishing Band"] == self.ending_sap_band)
+            ]
+
+        if data.emtpy:
+            raise ValueError("Missing abs rate, check the project scores matrix")
+
+        return data["Cost Savings"].values[0]
+
    def check_funding(
        self, measures: List,
        starting_sap: int,
        ending_sap: int,
+        floor_area: float,
        mainheat_description: str,
-        heating_control_description: str
+        heating_control_description: str,
+        is_cavity: bool
    ):
        """
        Given a list of measures, this function will check if the package of measures is fundable
        :param measures:
        :param starting_sap:
        :param ending_sap:
+        :param floor_area:
+        :param mainheat_description:
+        :param heating_control_description:
+        :param is_cavity: Indicates if the property has cavity wall insulation
        :return:
        """

-        starting_band = self.get_sap_band(starting_sap)
-        ending_band = self.get_sap_band(ending_sap)
+        # If it's an E or D, should get to an EPC C
+        if starting_sap >= 55 and ending_sap < 69:
+            raise NotImplementedError("This property doesn't have sufficient SAP movement")

-        # For ECO4 eligibility, the property needs to end at a C if it starts at a D or E, otherwise should end at a
-        # D
-
-        if starting_band <= 38 & ending_band >= 55:
+        if starting_sap <= 38 & ending_sap <= 55:
            # F or G should get to D
            raise NotImplementedError("Implement F or G to D eligibility")

+        self.starting_sap_band = self.get_sap_band(starting_sap)
+        self.ending_sap_band = self.get_sap_band(ending_sap)
+        self.floor_area_band = self.get_floor_area_band(floor_area)
+
        ########################
        # Private
        ########################
@ -513,13 +568,25 @@ class Funding2:
        # 2) GBIS

        if self.tenure == "Private":
-            is_eligible = self.eco4_prs_eligibility(
+            is_eco4_eligible = self.eco4_prs_eligibility(
                starting_sap=starting_sap,
                measures=measures,
                mainheat_description=mainheat_description,
                heating_control_description=heating_control_description
            )
-            pass
+
+            # Need to implement
+            # 1) Package has to include an insulation measure
+            # 2) We should use the funding for the measure that has the largest partial project score
+            is_gbis_eligible = ()
+
+            if not is_eco4_eligible:
+                return
+            eco4_abs = self.calculate_full_project_abs()
+            # We estimate rates now
+            eco4_funding = (
+                eco4_abs * self.private_cavity_abs_rate if is_cavity else eco4_abs & self.private_solid_abs_rate
+            )

        ########################
        # Social
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@ -160,6 +160,9 @@ class SearchEpc:
        """
        Address lines 1 and postcode are mandatory fields. The other address lines are optional
        but can be used to find the epc for the home, if address1 and postcode are insufficient
+
+        If you wish to run a strict property type search, please run set_strict_property_type_search()
+
        :param address1: string, propery's address line 1
        :param postcode: string, propery's postcode
        :param full_address: string, optional parameter, the full address of the property
@ -189,6 +192,7 @@ class SearchEpc:
        self.older_epcs = None
        self.full_sap_epc = None
        self.metadata = None
+        self.strict_property_type_search = False

        # These are the address and postcode values, which we store in the database
        self.address_clean = None
@ -199,6 +203,14 @@ class SearchEpc:
        self.property_type = property_type
        self.fast = fast

+    def set_strict_property_type_search(self):
+        """
+        This method sets the strict property type search flag to True. When this flag is set, the search will
+        only return results that match the specified property type.
+        :return:
+        """
+        self.strict_property_type_search = True
+
    @staticmethod
    def get_house_number(address: str, postcode=None) -> str | None:
        """
@ -315,6 +327,8 @@ class SearchEpc:
            address_params["address"] = self.address1
        if self.postcode:
            address_params["postcode"] = self.postcode
+        if self.strict_property_type_search and self.property_type:
+            address_params["property-type"] = self.property_type.lower()

        # We attempt the search with uprn params

@ -365,11 +379,16 @@ class SearchEpc:

        unique_property_types = {r["property-type"] for r in rows}

+        is_just_a_house = (len(unique_property_types) == 1) & (
+            ("House" in unique_property_types) | ("Bungalow" in unique_property_types)
+        )
+
        # We allow for variation in property type across flats/maisonettes
        # If we know that we have a flat/maisonette, we allow for both property types
-        if property_type in ["Flat", "Maisonette"]:
-            if ((len(uprns) == 1) and ((len(unique_property_types) == 1)
-            ) or unique_property_types == {"Flat", "Maisonette"}):
+        # Make sure we have not JUST a house, or not JUST a flat/maisonette
+        if property_type in ["Flat", "Maisonette"] and not is_just_a_house:
+            if (((len(uprns) == 1) and ((len(unique_property_types) == 1)
+            ) or unique_property_types == {"Flat", "Maisonette"})):
                return rows

        if property_type is not None:
@ -424,6 +443,8 @@ class SearchEpc:

            return rows

+        raise ValueError("property type and address cannot both be None, at least one must be provided")
+
    @staticmethod
    def format_address(newest_epc):
        """
--- a/backend/tests/test_funding.py
+++ b/backend/tests/test_funding.py
@ -0,0 +1,52 @@
+import pytest
+import pandas as pd
+from utils.s3 import read_csv_from_s3
+from backend.Funding import Funding
+
+
+def get_funding_data():
+    """
+    This function retrieves the eco project scores matrix and the warm homes local grant funding data
+    :return:
+    """
+    project_scores_matrix = read_csv_from_s3(
+        bucket_name="retrofit-data-dev",
+        filepath="funding/ECO4 Full Project Scores Matrix.csv",
+    )
+    project_scores_matrix = pd.DataFrame(project_scores_matrix)
+    project_scores_matrix.columns = ['Floor Area Segment', 'Starting Band', 'Finishing Band', 'Cost Savings']
+    project_scores_matrix["Cost Savings"] = project_scores_matrix["Cost Savings"].astype(float)
+
+    whlg_eligible_postcodes = read_csv_from_s3(
+        bucket_name="retrofit-data-dev",
+        filepath="funding/whlg eligible postcodes.csv",
+    )
+    whlg_eligible_postcodes = pd.DataFrame(whlg_eligible_postcodes)
+
+    return project_scores_matrix, whlg_eligible_postcodes
+
+
+class TestFunding:
+
+    def test_prs(self):
+        eco_project_scores_matrix, whlg_eligible_postcodes = get_funding_data()
+        funding = Funding(
+            project_scores_matrix=eco_project_scores_matrix,
+            whlg_eligible_postcodes=whlg_eligible_postcodes,
+            social_cavity_abs_rate=13.5,
+            social_solid_abs_rate=17,
+            private_cavity_abs_rate=13.5,
+            private_solid_abs_rate=17,
+            tenure="Private",
+        )
+
+        measures_1 = ["internal_wall_insulation", "solar_pv"]
+        funding.check_funding(
+            measures=measures_1,
+            starting_sap=54,
+            ending_sap=69,
+            floor_area=73,
+            mainheat_description="Boiler and radiators, mains gas",
+            heating_control_description="Programmer, room thermostat and TRVs",
+            is_cavity=True
+        )