set up hubspot status

2026-07-27 23:35:01 +00:00 · 2025-05-21 11:55:10 +01:00 · 2025-05-21 11:55:10 +01:00 · 2e041bfe75
commit 2e041bfe75
parent c0cf848db2
3 changed files with 231 additions and 72 deletions
--- a/asset_list/AssetList.py
+++ b/asset_list/AssetList.py
@ -4,8 +4,8 @@ import re
 import tiktoken
 from pprint import pprint
 from datetime import datetime
+import asset_list.hubspot.config as hubspot_config

-from numpy.ma.core import masked_not_equal
 from openai import OpenAI
 import numpy as np
 import pandas as pd
@ -292,6 +292,13 @@ class AssetList:
        "Any further surveyor notes", 'Surveyors Name'
    ]

+    NON_INTRUSIVES_NEW_FORMAT_COLNAMES = [
+        "Has the property been re-walled?", "Is the property tile hung?", "Does the property have a render?",
+        "Does the property have cladding?", "Gable Wall Obstructions",
+        "Does the property have foliage that needs removal?",
+        "Potential unsafe environment", "Date of Inspection"
+    ]
+
    NON_INTRUSIVES_ELIGIBILITY_COLUMN = "Eligibility (Red/Yellow/Green)"

    OLD_FORMAT_NON_INTRUSIVE_COLNAMES = ['WFT Findings', 'ECO Eligibility']
@ -400,6 +407,10 @@ class AssetList:

        self.non_intrusives_eligibility = "Eligibility (Red/Yellow/Green)" in self.raw_asset_list.columns

+        self.new_format_non_insturives_present = (
+            "Has the property been re-walled?" in self.standardised_asset_list.columns
+        )
+
        # Names of columns
        self.landlord_property_id = landlord_property_id
        self.address1_colname = address1_colname
@ -687,6 +698,9 @@ class AssetList:
        if self.non_intrusives_eligibility:
            non_intrusive_columns.append(self.NON_INTRUSIVES_ELIGIBILITY_COLUMN)

+        if self.new_format_non_insturives_present:
+            non_intrusive_columns += self.NON_INTRUSIVES_NEW_FORMAT_COLNAMES
+
        if self.old_format_non_intrusives_present:
            # We check if we have the ECO Eligibility column, which we might not have
            non_intrusive_columns = [
@ -931,6 +945,23 @@ class AssetList:
            self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID].astype(str)
        )

+        # CLean up the standard SAP column, that can be problematic
+        if self.landlord_sap is not None:
+            self.standardised_asset_list[self.STANDARD_SAP] = (
+                self.standardised_asset_list[self.STANDARD_SAP]
+                .astype(str)
+                .str.replace('\xa0', ' ', regex=False)
+                .str.strip()
+            )
+            self.standardised_asset_list[self.STANDARD_SAP] = np.where(
+                self.standardised_asset_list[self.STANDARD_SAP] == "",
+                None,
+                self.standardised_asset_list[self.STANDARD_SAP]
+            )
+            self.standardised_asset_list[self.STANDARD_SAP] = (
+                self.standardised_asset_list[self.STANDARD_SAP].astype(float)
+            )
+
    def merge_data(self, df: pd.DataFrame):
        """
        Used to insert data into the standardised asset list, based on the domna property id
@ -1864,7 +1895,7 @@ class AssetList:
            for col in ["cavity_reason", "solar_reason"]:
                self.standardised_asset_list[col] = np.where(
                    (
-                        (~pd.isnull(self.standardised_asset_list["submission_date"]))
+                        (~pd.isnull(self.standardised_asset_list["submission_status"]))
                    ),
                    None,
                    self.standardised_asset_list[col]
@ -1874,7 +1905,7 @@ class AssetList:
            for col in ["cavity_reason", "solar_reason"]:
                self.standardised_asset_list[col] = np.where(
                    (
-                        (~pd.isnull(self.standardised_asset_list["ecosurv_reference"]))
+                        (~pd.isnull(self.standardised_asset_list["ecosurv_status"]))
                    ),
                    None,
                    self.standardised_asset_list[col]
@ -1911,6 +1942,42 @@ class AssetList:
                    self.outcomes[self.DOMNA_PROPERTY_ID].isin(identified_work)
                ]

+    def label_property_status(self):
+        """
+        This function is designed to be run after identify_worktypes() has been run, and will create a "property_status"
+        column, which will note where each property is (to be surveyed, surveyed, installed), using the stages we
+        recognise within hubspot
+        :return:
+        """
+
+        # For anything that is ready to go, that gets set to ready to be scheduled
+        self.standardised_asset_list["hubspot_status"] = np.where(
+            ~pd.isnull(self.standardised_asset_list["cavity_reason"]) |
+            ~pd.isnull(self.standardised_asset_list["solar_reason"]),
+            hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label,
+            None
+        )
+
+        # we step through the process of flagging completed surveys
+
+        # We utilise submissions, ecosurv and outcomes to define the hubspot status
+        # We'll take the maximum of these three columns, based on the enum integer value
+        label_to_enum = {e.label: e for e in hubspot_config.HubspotProcessStatus}
+
+        def get_max_status_from_columns(row):
+            status_candidates = []
+            for col in ["submission_status", "ecosurv_install_status", "outcome_status"]:
+                label = row.get(col)
+                if label in label_to_enum:
+                    status_candidates.append(label_to_enum[label])
+            if not status_candidates:
+                return row["hubspot_status"]  # fallback to existing status if no updates
+            return max(status_candidates).label
+
+        self.standardised_asset_list["hubspot_status"] = self.standardised_asset_list.apply(
+            get_max_status_from_columns, axis=1
+        )
+
    def flat_analysis(self):

        # We need to deduce the building name - we strip out the house number
@ -2331,6 +2398,52 @@ class AssetList:
            # It doesn't matter too much which record we take
            matched = matched.drop_duplicates(subset=[self.STANDARD_LANDLORD_PROPERTY_ID])

+        # We merge on the status of the property
+        matched = matched.merge(
+            self.ecosurv[["Reference", "Status", "Lead Status", "Tags"]].rename(
+                columns={
+                    "Reference": "ecosurv_reference",
+                    "status": "ecosurv_status",
+                    "Lead Status": "ecosurv_lead_status",
+                    "Tags": "ecosurv_tags"
+                }
+            ), how="left", on="ecosurv_reference"
+        )
+
+        matched["ecosurv_install_status"] = None
+
+        # This mapping is ordered by process order, where lodgment is the final step so if we have an indication
+        # that the property is ready for lodgement, we set the status to that. We then proceed through the other
+        # statuses where the penultimate status is install complete
+        mapping = {
+            "Cancelled": hubspot_config.HubspotProcessStatus.INSTALLER_CANCELLED_FINALIZED,
+            "TrustMark: Lodged": hubspot_config.HubspotProcessStatus.LODGEMENT_COMPLETE,
+            "Retrofit: Complete": hubspot_config.HubspotProcessStatus.INSTALL_COMPLETE,
+            "Retrofit: Awaiting TrustMark": hubspot_config.HubspotProcessStatus.INSTALL_COMPLETE,
+            "Retrofit: Awaiting post checks": hubspot_config.HubspotProcessStatus.INSTALL_COMPLETE,
+            "Installer Notification Sent": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER,
+            "Submitted to RC": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER,
+            "COONEY": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER,
+            "Signed off for install": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER,
+            "Retrofit: Signed off for install": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER,
+            "Audit": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER,
+            "Accepted": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER,
+            "Sold": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER
+        }
+
+        def get_max_status(tag_str):
+            if pd.isna(tag_str):
+                return None
+            matched_statuses = []
+            for tag, status in mapping.items():
+                if tag in tag_str:
+                    matched_statuses.append(status)
+            if not matched_statuses:
+                return None
+            return max(matched_statuses).label
+
+        matched["ecosurv_install_status"] = matched["ecosurv_tags"].apply(get_max_status)
+
        self.standardised_asset_list = self.standardised_asset_list.merge(
            matched,
            how="left",
@ -2380,7 +2493,7 @@ class AssetList:
                # Perform the remap
                outcomes["Outcome"] = outcomes["Notes / Outcomes"].map(remap_dictionary)

-            outcomes["Outcome"] = outcomes["Outcome"].str.lower()
+            outcomes["Outcome"] = outcomes["Outcome"].str.lower().str.strip()

            logger.info("Matching outcomes to asset list")
            # Merge the outcomes onto the asset list - we check we're able to match sufficiently well
@ -2542,12 +2655,13 @@ class AssetList:
            apply(get_latest_note).
            reset_index(drop=True)
        )
-        latest_note = latest_note[["domna_property_id", notes_col]]
+        latest_note = latest_note[["domna_property_id", notes_col, "Outcome"]].rename(
+            columns={"Notes": "latest_outcome_note", "Outcome": "latest_outcome"}
+        )

        pivot_df = lookup.groupby(["domna_property_id", "Outcome"]).size().unstack(fill_value=0).reset_index()
-        pivot_df = pivot_df.merge(
-            visit_counts, how="left", on="domna_property_id"
-        )
+        pivot_df = pivot_df.merge(visit_counts, how="left", on="domna_property_id")
+        pivot_df = pivot_df.merge(latest_note, how="left", on="domna_property_id")

        # We want the latest note

@ -2558,15 +2672,32 @@ class AssetList:
        self.outcomes["matched_to_asset_list"] = self.outcomes["row_id"].isin(lookup["row_id"].values)
        self.outcomes = self.outcomes.merge(lookup[["row_id", "domna_property_id"]], how="left", on="row_id")

+        # We flag the outcome status, based on the outcome
+        pivot_df["outcome_status"] = None
+
+        if "surveyed" in pivot_df.columns:
+            pivot_df["outcome_status"] = np.where(
+                pivot_df["surveyed"] > 0, hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.label,
+                pivot_df["outcome_status"]
+            )
+
+        if "installer refusal" in pivot_df.columns:
+            pivot_df["outcome_status"] = np.where(
+                pivot_df["installer refusal"] > 0, hubspot_config.HubspotProcessStatus.NOT_VIABLE.label,
+                pivot_df["outcome_status"]
+            )
+
+        pivot_df["outcome_status"] = np.where(
+            pivot_df["latest_outcome"].isin(["see notes"]) &
+            (pivot_df["outcome_status"] != hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.label),
+            hubspot_config.HubspotProcessStatus.SURVEYED_NO_ACCESS_NEEDS_SIGN_OFF.label,
+            pivot_df["outcome_status"]
+        )
+
        # We merge out pivoted outcomes onto the asset list
        self.standardised_asset_list = self.standardised_asset_list.merge(
            pivot_df, how="left", left_on=self.DOMNA_PROPERTY_ID, right_on="domna_property_id"
        )
-        # Merge the latest note
-        self.standardised_asset_list = self.standardised_asset_list.merge(
-            latest_note.rename(columns={notes_col: "Latest Route March Note"}),
-            how="left", left_on=self.DOMNA_PROPERTY_ID, right_on="domna_property_id"
-        )

        if self.standardised_asset_list[self.DOMNA_PROPERTY_ID].duplicated().sum():
            raise ValueError("Duplicates appreared - something went wrong")
@ -2640,6 +2771,7 @@ class AssetList:
                master_data.columns else "PROPERTY TYPE As per table emailed"
            )
            measure_mix_col = "MEASURE COMBO"
+            installer_notes_col = "INSTALLERS NOTES ; REASONS FOR CANCELLATIONS"

            logger.info("Matching master data to asset list")
            matched = []
@ -2774,19 +2906,30 @@ class AssetList:
            self.standardised_asset_list = self.standardised_asset_list.drop(columns="house_no")

            # We match the "UPRN" which is the landlords ID, onto the master sheet
+
+            if measure_mix_col not in master_data.columns:
+                master_data[measure_mix_col] = "Measure mix not recorded"
+
            matched = pd.DataFrame(matched)
-            master_to_append = master_data[[scheme_col, "row_id", install_col, submission_col, measure_mix_col]].merge(
+            master_to_append = master_data[
+                [scheme_col, "row_id", install_col, submission_col, measure_mix_col, installer_notes_col]
+            ].merge(
                matched, how="left", on="row_id"
            ).rename(
                columns={
                    scheme_col: "funding_scheme",
                    measure_mix_col: "measure_mix",
                    install_col: "survey_status",
-                    submission_col: "submission_date"
+                    submission_col: "submission_date",
+                    installer_notes_col: "submission_installer_notes"
                }
            )
-            master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel")
-            master_to_append["installed"] = master_to_append["survey_status"].str.lower().str.contains("installed")
+            master_to_append["submission_cancelled"] = (
+                master_to_append["survey_status"].str.lower().str.contains("cancel")
+            )
+            master_to_append["submission_installed"] = (
+                master_to_append["survey_status"].str.lower().str.contains("installed")
+            )
            master_surveyed.append(master_to_append)
            unmatched_df = master_data[
                master_data["row_id"].isin(unmatched)
@ -2822,7 +2965,21 @@ class AssetList:
        ].astype(str)

        # We de-dupe crudely on landlord property id
-        self.master_surveyed = master_surveyed.drop_duplicates(subset=[self.STANDARD_LANDLORD_PROPERTY_ID])
+        self.master_surveyed = master_surveyed.drop_duplicates(subset=[self.STANDARD_LANDLORD_PROPERTY_ID]).copy()
+
+        # We now add the submission status, based on the hubspot stages
+        self.master_surveyed["submission_status"] = hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER.label
+        self.master_surveyed["submission_status"] = np.where(
+            self.master_surveyed["submission_cancelled"] == True,
+            hubspot_config.HubspotProcessStatus.INSTALLER_CANCELLED_FINALIZED.label,
+            self.master_surveyed["submission_status"]
+        )
+
+        self.master_surveyed["submission_status"] = np.where(
+            self.master_surveyed["submission_installed"] == True,
+            hubspot_config.HubspotProcessStatus.INSTALL_COMPLETE.label,
+            self.master_surveyed["submission_status"]
+        )

        self.standardised_asset_list = self.standardised_asset_list.merge(
            self.master_surveyed, how="left", on=self.STANDARD_LANDLORD_PROPERTY_ID
--- a/asset_list/app.py
+++ b/asset_list/app.py
@ -99,66 +99,36 @@ def app():
    phase = False
    ecosurv_landlords = "thrive"

-    # Medway
-    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Medway"
-    data_filename = "MEDWAY Asset List.xlsx"
-    sheet_name = "Asset list"
+    # Torus
+    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Torus/Phase 2"
+    data_filename = "Torus Property Asset List - INSPECTIONS.xlsx"
+    sheet_name = "TORUS"
    postcode_column = 'Postcode'
    fulladdress_column = None
-    address1_column = "House Number"
+    address1_column = "AddressLine1"
    address1_method = None
-    address_cols_to_concat = ["House Number", "Street 1"]
+    address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
    missing_postcodes_method = None
-    landlord_year_built = "Year Built"
-    landlord_os_uprn = None
-    landlord_property_type = "Property Type - Academy"
-    landlord_built_form = "Property Type - Academy"
-    landlord_wall_construction = None
-    landlord_roof_construction = None
-    landlord_heating_system = None
-    landlord_existing_pv = None
-    landlord_property_id = "Row ID"
-    landlord_sap = None
-    outcomes_filename = []
-    outcomes_sheetname = []
-    outcomes_postcode = []
-    outcomes_houseno = []
-    outcomes_id = []
-    outcomes_address = []
-    master_filepaths = []
-    master_to_asset_list_filepath = None
-    phase = False
-    ecosurv_landlords = None
-
-    # MHS
-    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS"
-    data_filename = "MHS HOMES (Full Asset List) - for programme build.xlsx"
-    sheet_name = "Sheet1"
-    postcode_column = 'Postcode'
-    fulladdress_column = "FullAddress"
-    address1_column = None
-    address1_method = "house_number_extraction"
-    address_cols_to_concat = []
-    missing_postcodes_method = None
-    landlord_year_built = "BuiltInYear"
-    landlord_os_uprn = None
-    landlord_property_type = "AssetType"
-    landlord_built_form = "PropertyType"
-    landlord_wall_construction = None
-    landlord_roof_construction = None
-    landlord_heating_system = None
-    landlord_existing_pv = None
+    landlord_year_built = "Property Age"
+    landlord_os_uprn = "NatUPRN"
+    landlord_property_type = "Property Type"
+    landlord_built_form = "Built Form"
+    landlord_wall_construction = "Wall Construction"
+    landlord_roof_construction = "Roof Construction"
+    landlord_heating_system = "Space Heating Source"
+    landlord_existing_pv = "Low Carbon Technology (Solar PV)"
    landlord_property_id = "UPRN"
-    landlord_sap = None
-    outcomes_filename = []
-    outcomes_sheetname = []
-    outcomes_postcode = []
-    outcomes_houseno = []
-    outcomes_id = []
-    outcomes_address = []
+    landlord_sap = "SAP Score"
+    outcomes_filename = None
+    outcomes_sheetname = None
+    outcomes_postcode = None
+    outcomes_houseno = None
+    outcomes_id = None
+    outcomes_address = None
    master_filepaths = []
    master_to_asset_list_filepath = None
-    phase = False
+    master_id_colnames = []
+    phase = True
    ecosurv_landlords = None

    # Southern Midlands
@ -300,7 +270,8 @@ def app():

    asset_list.flag_survey_master(
        master_filepaths=master_filepaths,
-        master_to_asset_list_filepath=master_to_asset_list_filepath
+        master_to_asset_list_filepath=master_to_asset_list_filepath,
+        master_id_colnames=master_id_colnames,
    )

    asset_list.flag_ecosurv(ecosurv_landlords)
@ -505,6 +476,9 @@ def app():

    pprint(asset_list.work_type_figures)

+    # We now flag the status of the property
+    asset_list.label_property_status()
+
    asset_list.flat_analysis()

    asset_list.load_contact_details(
--- a/asset_list/hubspot/config.py
+++ b/asset_list/hubspot/config.py
@ -0,0 +1,28 @@
+from enum import IntEnum
+
+
+class HubspotProcessStatus(IntEnum):
+    def __new__(cls, value, label):
+        obj = int.__new__(cls, value)
+        obj._value_ = value
+        obj.label = label
+        return obj
+
+    # the numerical values of this enum aren't important, but they define the order of operations
+
+    # This is the first stage, where a survey is ready to go
+    READY_TO_BE_SCHEDULED = 1, "READY TO BE SCHEDULED"
+    # The property didn't get access and needs sign off
+    SURVEYED_NO_ACCESS_NEEDS_SIGN_OFF = 2, "SURVEYED - NO ACCESS - NEED SIGN OFF"
+    # The survey has been completed. We don't have any update as to whether the property has been installed
+    SURVEYED_COMPLETED_SIGNED_OFF = 3, "SURVEYED - COMPLETED - SIGNED OFF"
+    # The property turned out to be ineligibile
+    NOT_VIABLE = 4, "NOT VIABLE"
+    # The property is with the installer. This will likely be the default for historic programmes
+    SUBMITTED_TO_INSTALLER = 5, "SUBMITTED TO INSTALLER"
+    # The property has been installed
+    INSTALL_COMPLETE = 6, "INSTALL COMPLETE"
+    # The install has complete and lodgement is complete
+    LODGEMENT_COMPLETE = 7, "LODGEMENT COMPLETE"
+    # The property has been cancelled
+    INSTALLER_CANCELLED_FINALIZED = 8, "INSTALLER CANCELLED - FINALIZED"