don't fetch from find my epc website when the property doesnt have an epc

2026-07-27 23:35:01 +00:00 · 2025-05-23 10:23:38 +01:00 · 2025-05-23 10:23:38 +01:00 · 1e0fbb111d
commit 1e0fbb111d
parent 2e041bfe75
7 changed files with 180 additions and 42 deletions
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@ -7,7 +7,7 @@
      <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
      <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
    </content>
-    <orderEntry type="jdk" jdkName="AssetList" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Fastapi-backend" jdkType="Python SDK" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
 </module>
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@ -3,7 +3,7 @@
  <component name="Black">
    <option name="sdkName" value="Python 3.10 (backend)" />
  </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="AssetList" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Fastapi-backend" project-jdk-type="Python SDK" />
  <component name="PyCharmProfessionalAdvertiser">
    <option name="shown" value="true" />
  </component>
--- a/asset_list/AssetList.py
+++ b/asset_list/AssetList.py
@ -279,6 +279,7 @@ class AssetList:
    STANDARD_HEATING_SYSTEM = "landlord_heating_system"
    STANDARD_EXISTING_PV = "landlord_existing_pv"
    STANDARD_SAP = "landlord_sap_rating"
+    STANDARD_BLOCK_REFERENCE = "landlord_block_reference"

    DOMNA_PROPERTY_ID = "domna_property_id"

@ -369,6 +370,7 @@ class AssetList:
        landlord_heating_system=None,
        landlord_existing_pv=None,
        landlord_sap=None,
+        landlord_block_reference=None,
        phase=False,
        header=0
    ):
@ -382,7 +384,7 @@ class AssetList:
        self.standardised_asset_list = self.raw_asset_list.copy()
        # Will be used to store aggregated figures against the various work types
        self.work_type_figures = {}
-        self.flat_data = None
+        self.block_analysis_df = None
        self.duplicated_addresses = None
        self.contact_details = None
        self.contact_detail_fields = None
@ -425,6 +427,7 @@ class AssetList:
        self.landlord_heating_system = landlord_heating_system
        self.landlord_existing_pv = landlord_existing_pv
        self.landlord_sap = landlord_sap
+        self.landlord_block_reference = landlord_block_reference

        # parameters for cleaning
        self.full_address_cols_to_concat = full_address_cols_to_concat
@ -671,6 +674,7 @@ class AssetList:
            self.landlord_heating_system,
            self.landlord_existing_pv,
            self.landlord_sap,
+            self.landlord_block_reference,
        ]
        # Keep just non-null variables (e.g landlord may not provide uprn
        self.keep_variables = [v for v in variables if v is not None]
@ -688,6 +692,7 @@ class AssetList:
            self.landlord_heating_system: self.STANDARD_HEATING_SYSTEM,
            self.landlord_existing_pv: self.STANDARD_EXISTING_PV,
            self.landlord_sap: self.STANDARD_SAP,
+            self.landlord_block_reference: self.STANDARD_BLOCK_REFERENCE
        }
        self.rename_map = {k: v for k, v in self.rename_map.items() if k is not None}

@ -1905,7 +1910,7 @@ class AssetList:
            for col in ["cavity_reason", "solar_reason"]:
                self.standardised_asset_list[col] = np.where(
                    (
-                        (~pd.isnull(self.standardised_asset_list["ecosurv_status"]))
+                        (~pd.isnull(self.standardised_asset_list["ecosurv_install_status"]))
                    ),
                    None,
                    self.standardised_asset_list[col]
@ -1978,42 +1983,42 @@ class AssetList:
            get_max_status_from_columns, axis=1
        )

-    def flat_analysis(self):
+    def block_analysis(self):

-        # We need to deduce the building name - we strip out the house number
+        if self.landlord_block_reference is None:
+            # This information is not available
+            return

-        # We want to deduce if flats have 50% of the properties below C75
-        # We group by postcode and property type
-        grouped = self.standardised_asset_list.groupby(
-            [self.STANDARD_POSTCODE, self.STANDARD_PROPERTY_TYPE]
-        )
+        # Reverse mapping: label -> enum
+        LABEL_TO_ENUM = {e.label: e for e in hubspot_config.HubspotProcessStatus}

-        flat_data = []
-        for _, group in grouped:
-            if "flat" in group[self.STANDARD_PROPERTY_TYPE].values:
-                num_flats = group[self.STANDARD_PROPERTY_TYPE].shape[0]
-                num_below_c75 = group[
-                    self.EPC_API_DATA_NAMES["current-energy-efficiency"]
-                ].lt(self.FILLED_CAVITY_SAP_THRESHOLD).sum()
-                # Check if any flats are below C69
-                num_flats_below_c69 = group[
-                    self.EPC_API_DATA_NAMES["current-energy-efficiency"]
-                ].lt(69).sum()
+        # Threshold status - anythign that is at this stage or beyond is considered surveyed
+        threshold = hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.value

-                flat_data.append(
-                    {
-                        "Postcode": group[self.STANDARD_POSTCODE].iloc[0],
-                        "Property Type": "Flat",
-                        "Number of Flats with EPC": num_flats,
-                        "Number of Flats below C75": num_below_c75,
-                        "Proportion of Flat EPCs below C75": round(100 * num_below_c75 / num_flats),
-                        "Number of Flats Below C69": num_flats_below_c69,
-                    }
-                )
+        block_analysis = []
+        for block_reference, group in self.standardised_asset_list.groupby(self.STANDARD_BLOCK_REFERENCE):
+            cavity_breakdown = group["cavity_reason"].fillna("No Eligibility").value_counts(normalize=True) * 100

-        flat_data = pd.DataFrame(flat_data)
+            if all(cavity_breakdown.index == "No Eligibility"):
+                continue

-        self.flat_data = flat_data
+            works = group["hubspot_status"]
+            above_threshold = works.map(LABEL_TO_ENUM.get).dropna()
+            count_above = (above_threshold >= threshold).sum()
+            proportion = count_above / len(works)
+
+            block_analysis.append(
+                {
+                    "Block Reference": block_reference,
+                    "Proportion of properties suryeyed": proportion,
+                    **cavity_breakdown.to_dict(),
+                }
+            )
+
+        block_analysis = pd.DataFrame(block_analysis)
+        block_analysis = block_analysis.fillna(0)
+
+        self.block_analysis_df = block_analysis

    @staticmethod
    def split_full_name(x):
@ -2403,14 +2408,15 @@ class AssetList:
            self.ecosurv[["Reference", "Status", "Lead Status", "Tags"]].rename(
                columns={
                    "Reference": "ecosurv_reference",
-                    "status": "ecosurv_status",
+                    "Status": "ecosurv_status",
                    "Lead Status": "ecosurv_lead_status",
-                    "Tags": "ecosurv_tags"
+                    "Tags": "ecosurv_tags",
+                    "Installer": "ecosurv_installer"
                }
            ), how="left", on="ecosurv_reference"
        )

-        matched["ecosurv_install_status"] = None
+        matched["ecosurv_install_status"] = hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER

        # This mapping is ordered by process order, where lodgment is the final step so if we have an indication
        # that the property is ready for lodgement, we set the status to that. We then proceed through the other
@ -2772,6 +2778,7 @@ class AssetList:
            )
            measure_mix_col = "MEASURE COMBO"
            installer_notes_col = "INSTALLERS NOTES ; REASONS FOR CANCELLATIONS"
+            installer_col = "INSTALLER"

            logger.info("Matching master data to asset list")
            matched = []
@ -2912,7 +2919,7 @@ class AssetList:

            matched = pd.DataFrame(matched)
            master_to_append = master_data[
-                [scheme_col, "row_id", install_col, submission_col, measure_mix_col, installer_notes_col]
+                [scheme_col, "row_id", install_col, submission_col, measure_mix_col, installer_notes_col, installer_col]
            ].merge(
                matched, how="left", on="row_id"
            ).rename(
@ -2921,7 +2928,8 @@ class AssetList:
                    measure_mix_col: "measure_mix",
                    install_col: "survey_status",
                    submission_col: "submission_date",
-                    installer_notes_col: "submission_installer_notes"
+                    installer_notes_col: "submission_installer_notes",
+                    installer_col: "submission_installer"
                }
            )
            master_to_append["submission_cancelled"] = (
--- a/asset_list/app.py
+++ b/asset_list/app.py
@ -82,6 +82,7 @@ def app():
    landlord_existing_pv = None
    landlord_property_id = "thrive_property_id"
    landlord_sap = "sap_rating_updated"
+    landlord_block_reference = "block_reference"
    outcomes_filename = [
        os.path.join(data_folder, "Thrive - Outcomes - April 24-March25 - Corrected.xlsx")
    ]
@ -119,6 +120,7 @@ def app():
    landlord_existing_pv = "Low Carbon Technology (Solar PV)"
    landlord_property_id = "UPRN"
    landlord_sap = "SAP Score"
+    landlord_block_reference = None
    outcomes_filename = None
    outcomes_sheetname = None
    outcomes_postcode = None
@ -209,6 +211,7 @@ def app():
        landlord_heating_system=landlord_heating_system,
        landlord_existing_pv=landlord_existing_pv,
        landlord_sap=landlord_sap,
+        landlord_block_reference=landlord_block_reference,
        phase=phase
    )
    asset_list.init_standardise()
@ -479,7 +482,7 @@ def app():
    # We now flag the status of the property
    asset_list.label_property_status()

-    asset_list.flat_analysis()
+    asset_list.block_analysis()

    asset_list.load_contact_details(
        local_filepath=os.path.join(data_folder, "Full property list wth D&V report V look up 12.2.25.xlsx"),
@ -526,7 +529,8 @@ def app():

    with pd.ExcelWriter(filename) as writer:
        asset_list.standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False)
-        asset_list.flat_data.to_excel(writer, sheet_name="Flat Data", index=False)
+        if asset_list.block_analysis_df is not None:
+            asset_list.block_analysis_df.to_excel(writer, sheet_name="Block Analysis", index=False)
        # If we have outcomes, we add a tab with the outcomes
        if not asset_list.outcomes_for_output.empty:
            asset_list.outcomes_for_output.to_excel(writer, sheet_name="Outcomes", index=False)
--- a/asset_list/hubspot/prepare_for_hubspot.py
+++ b/asset_list/hubspot/prepare_for_hubspot.py
@ -0,0 +1,18 @@
+import pandas as pd
+
+
+def app():
+    """
+    TODO: Operations may have removed some cavity_reason/solar_reason values from the standardised asset list after
+          review. So, we will need to update the hubspot status for these entries and set them to None, if they
+          were previously being set to ready for scheduling. We don't want to just filter on rows where
+          cavity_reason and solar_reason are populated, as if we want to include historical surveys, this will remove
+          them
+
+    :return:
+    """
+
+    filepath = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive "
+                "Programme - reconciled.xlsx")
+
+    standardised_asset_list = pd.read_excel(filepath, sheet_name="Standardised Asset List")
--- a/backend/engine/engine.py
+++ b/backend/engine/engine.py
@ -507,7 +507,7 @@ async def model_engine(body: PlanTriggerRequest):
            )

            # if we have a remote assment data type, we pull the additional data and include it
-            if body.event_type == "remote_assessment":
+            if (body.event_type == "remote_assessment") and not (epc_searcher.newest_epc["estimated"]):
                logger.info("Retrieving find my epc data")
                try:
                    property_non_invasive_recommendations, patch = RetrieveFindMyEpc.get_from_epc(
--- a/etl/customers/thrive/Project
+++ b/etl/customers/thrive/Project
@ -0,0 +1,108 @@
+"""
+THis script will take the standardised asset list and append on the project codes.
+We also, review the existing install status, in case anything is wrong
+"""
+import pandas as pd
+import numpy as np
+
+standardised_asset_list = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
+    "Complete - Updated May 2025 - Standardised.xlsx",
+    sheet_name="Standardised Asset List",
+)
+
+project_code_allocations = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Master Tracker (Thrive - "
+    "Warmfront).xlsx",
+    sheet_name="Master Tracker",
+    header=1
+)
+
+programme_codes = project_code_allocations[
+    ["UPRN", "Proposed Progamme", "New Proposed Programme", "Warmfront Survey Notes", ]
+].copy()
+programme_codes["programme_reference"] = programme_codes["New Proposed Programme"].copy()
+programme_codes["programme_reference"] = np.where(
+    pd.isnull(programme_codes["programme_reference"]),
+    programme_codes["Proposed Progamme"],
+    programme_codes["programme_reference"]
+)
+
+PROJECT_CODE_MAP = {
+    'Phase 2': "THRIVE-002",
+    'Phase 3': "THRIVE-003",
+    'Phase 4': "THRIVE-004",
+    'Phase 5': "THRIVE-005",
+    'Phase 6': "THRIVE-006",
+    'Phase 7': "THRIVE-007",
+    'Phase 8': "THRIVE-008",
+    'Phase 9': "THRIVE-009",
+    'Phase 10': "THRIVE-010",
+    "Week1": "THRIVE-WEEK-001",
+    "Week2": "THRIVE-WEEK-002",
+    "Week4": "THRIVE-WEEK-004",
+    "Week7": "THRIVE-WEEK-007",
+}
+programme_codes["project_code"] = programme_codes["programme_reference"].map(PROJECT_CODE_MAP)
+
+thrive_notes = project_code_allocations[["UPRN", "Thrive Notes", "Priority", "Notes Reply (Thrive)"]].copy()
+
+standardised_asset_list = standardised_asset_list.merge(
+    programme_codes[["UPRN", "project_code", "programme_reference"]],
+    how="left",
+    left_on="landlord_property_id",
+    right_on="UPRN",
+).merge(
+    thrive_notes[["UPRN", "Thrive Notes", "Priority", "Notes Reply (Thrive)"]],
+    how="left",
+    on="UPRN",
+)
+
+standardised_asset_list = standardised_asset_list.drop(columns=["UPRN"])
+
+# We fill the project code for historical completions
+standardised_asset_list["project_code"] = np.where(
+    pd.isnull(standardised_asset_list["project_code"]) & (
+        standardised_asset_list["hubspot_status"] != "READY TO BE SCHEDULED"
+    ) & (
+        ~pd.isnull(standardised_asset_list["hubspot_status"])
+    ),
+    "THRIVE-HISTORICAL",
+    standardised_asset_list["project_code"]
+)
+
+# Store as an excel
+filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Programme - "
+            "reconciled.xlsx")
+# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
+# Other tabs:
+block_analysis = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
+    "Complete - Updated May 2025 - Standardised.xlsx",
+    sheet_name="Block Analysis",
+)
+outcomes = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
+    "Complete - Updated May 2025 - Standardised.xlsx",
+    sheet_name="Outcomes",
+)
+unmatched_submissions = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
+    "Complete - Updated May 2025 - Standardised.xlsx",
+    sheet_name="Unmatched Submissions",
+)
+unmatched_ecosurv = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
+    "Complete - Updated May 2025 - Standardised.xlsx",
+    sheet_name="Unmatched Ecosurv",
+)
+
+with pd.ExcelWriter(filename) as writer:
+    standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False)
+    block_analysis.to_excel(writer, sheet_name="Block Analysis", index=False)
+    # If we have outcomes, we add a tab with the outcomes
+    outcomes.to_excel(writer, sheet_name="Outcomes", index=False)
+
+    unmatched_submissions.to_excel(writer, sheet_name="Unmatched Submissions", index=False)
+
+    unmatched_ecosurv.to_excel(writer, sheet_name="Unmatched Ecosurv", index=False)