set up hubspot status

This commit is contained in:
Khalim Conn-Kowlessar 2025-05-21 11:55:10 +01:00
parent c0cf848db2
commit 2e041bfe75
3 changed files with 231 additions and 72 deletions

View file

@ -4,8 +4,8 @@ import re
import tiktoken
from pprint import pprint
from datetime import datetime
import asset_list.hubspot.config as hubspot_config
from numpy.ma.core import masked_not_equal
from openai import OpenAI
import numpy as np
import pandas as pd
@ -292,6 +292,13 @@ class AssetList:
"Any further surveyor notes", 'Surveyors Name'
]
NON_INTRUSIVES_NEW_FORMAT_COLNAMES = [
"Has the property been re-walled?", "Is the property tile hung?", "Does the property have a render?",
"Does the property have cladding?", "Gable Wall Obstructions",
"Does the property have foliage that needs removal?",
"Potential unsafe environment", "Date of Inspection"
]
NON_INTRUSIVES_ELIGIBILITY_COLUMN = "Eligibility (Red/Yellow/Green)"
OLD_FORMAT_NON_INTRUSIVE_COLNAMES = ['WFT Findings', 'ECO Eligibility']
@ -400,6 +407,10 @@ class AssetList:
self.non_intrusives_eligibility = "Eligibility (Red/Yellow/Green)" in self.raw_asset_list.columns
self.new_format_non_insturives_present = (
"Has the property been re-walled?" in self.standardised_asset_list.columns
)
# Names of columns
self.landlord_property_id = landlord_property_id
self.address1_colname = address1_colname
@ -687,6 +698,9 @@ class AssetList:
if self.non_intrusives_eligibility:
non_intrusive_columns.append(self.NON_INTRUSIVES_ELIGIBILITY_COLUMN)
if self.new_format_non_insturives_present:
non_intrusive_columns += self.NON_INTRUSIVES_NEW_FORMAT_COLNAMES
if self.old_format_non_intrusives_present:
# We check if we have the ECO Eligibility column, which we might not have
non_intrusive_columns = [
@ -931,6 +945,23 @@ class AssetList:
self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID].astype(str)
)
# CLean up the standard SAP column, that can be problematic
if self.landlord_sap is not None:
self.standardised_asset_list[self.STANDARD_SAP] = (
self.standardised_asset_list[self.STANDARD_SAP]
.astype(str)
.str.replace('\xa0', ' ', regex=False)
.str.strip()
)
self.standardised_asset_list[self.STANDARD_SAP] = np.where(
self.standardised_asset_list[self.STANDARD_SAP] == "",
None,
self.standardised_asset_list[self.STANDARD_SAP]
)
self.standardised_asset_list[self.STANDARD_SAP] = (
self.standardised_asset_list[self.STANDARD_SAP].astype(float)
)
def merge_data(self, df: pd.DataFrame):
"""
Used to insert data into the standardised asset list, based on the domna property id
@ -1864,7 +1895,7 @@ class AssetList:
for col in ["cavity_reason", "solar_reason"]:
self.standardised_asset_list[col] = np.where(
(
(~pd.isnull(self.standardised_asset_list["submission_date"]))
(~pd.isnull(self.standardised_asset_list["submission_status"]))
),
None,
self.standardised_asset_list[col]
@ -1874,7 +1905,7 @@ class AssetList:
for col in ["cavity_reason", "solar_reason"]:
self.standardised_asset_list[col] = np.where(
(
(~pd.isnull(self.standardised_asset_list["ecosurv_reference"]))
(~pd.isnull(self.standardised_asset_list["ecosurv_status"]))
),
None,
self.standardised_asset_list[col]
@ -1911,6 +1942,42 @@ class AssetList:
self.outcomes[self.DOMNA_PROPERTY_ID].isin(identified_work)
]
def label_property_status(self):
"""
This function is designed to be run after identify_worktypes() has been run, and will create a "property_status"
column, which will note where each property is (to be surveyed, surveyed, installed), using the stages we
recognise within hubspot
:return:
"""
# For anything that is ready to go, that gets set to ready to be scheduled
self.standardised_asset_list["hubspot_status"] = np.where(
~pd.isnull(self.standardised_asset_list["cavity_reason"]) |
~pd.isnull(self.standardised_asset_list["solar_reason"]),
hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label,
None
)
# we step through the process of flagging completed surveys
# We utilise submissions, ecosurv and outcomes to define the hubspot status
# We'll take the maximum of these three columns, based on the enum integer value
label_to_enum = {e.label: e for e in hubspot_config.HubspotProcessStatus}
def get_max_status_from_columns(row):
status_candidates = []
for col in ["submission_status", "ecosurv_install_status", "outcome_status"]:
label = row.get(col)
if label in label_to_enum:
status_candidates.append(label_to_enum[label])
if not status_candidates:
return row["hubspot_status"] # fallback to existing status if no updates
return max(status_candidates).label
self.standardised_asset_list["hubspot_status"] = self.standardised_asset_list.apply(
get_max_status_from_columns, axis=1
)
def flat_analysis(self):
# We need to deduce the building name - we strip out the house number
@ -2331,6 +2398,52 @@ class AssetList:
# It doesn't matter too much which record we take
matched = matched.drop_duplicates(subset=[self.STANDARD_LANDLORD_PROPERTY_ID])
# We merge on the status of the property
matched = matched.merge(
self.ecosurv[["Reference", "Status", "Lead Status", "Tags"]].rename(
columns={
"Reference": "ecosurv_reference",
"status": "ecosurv_status",
"Lead Status": "ecosurv_lead_status",
"Tags": "ecosurv_tags"
}
), how="left", on="ecosurv_reference"
)
matched["ecosurv_install_status"] = None
# This mapping is ordered by process order, where lodgment is the final step so if we have an indication
# that the property is ready for lodgement, we set the status to that. We then proceed through the other
# statuses where the penultimate status is install complete
mapping = {
"Cancelled": hubspot_config.HubspotProcessStatus.INSTALLER_CANCELLED_FINALIZED,
"TrustMark: Lodged": hubspot_config.HubspotProcessStatus.LODGEMENT_COMPLETE,
"Retrofit: Complete": hubspot_config.HubspotProcessStatus.INSTALL_COMPLETE,
"Retrofit: Awaiting TrustMark": hubspot_config.HubspotProcessStatus.INSTALL_COMPLETE,
"Retrofit: Awaiting post checks": hubspot_config.HubspotProcessStatus.INSTALL_COMPLETE,
"Installer Notification Sent": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER,
"Submitted to RC": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER,
"COONEY": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER,
"Signed off for install": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER,
"Retrofit: Signed off for install": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER,
"Audit": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER,
"Accepted": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER,
"Sold": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER
}
def get_max_status(tag_str):
if pd.isna(tag_str):
return None
matched_statuses = []
for tag, status in mapping.items():
if tag in tag_str:
matched_statuses.append(status)
if not matched_statuses:
return None
return max(matched_statuses).label
matched["ecosurv_install_status"] = matched["ecosurv_tags"].apply(get_max_status)
self.standardised_asset_list = self.standardised_asset_list.merge(
matched,
how="left",
@ -2380,7 +2493,7 @@ class AssetList:
# Perform the remap
outcomes["Outcome"] = outcomes["Notes / Outcomes"].map(remap_dictionary)
outcomes["Outcome"] = outcomes["Outcome"].str.lower()
outcomes["Outcome"] = outcomes["Outcome"].str.lower().str.strip()
logger.info("Matching outcomes to asset list")
# Merge the outcomes onto the asset list - we check we're able to match sufficiently well
@ -2542,12 +2655,13 @@ class AssetList:
apply(get_latest_note).
reset_index(drop=True)
)
latest_note = latest_note[["domna_property_id", notes_col]]
latest_note = latest_note[["domna_property_id", notes_col, "Outcome"]].rename(
columns={"Notes": "latest_outcome_note", "Outcome": "latest_outcome"}
)
pivot_df = lookup.groupby(["domna_property_id", "Outcome"]).size().unstack(fill_value=0).reset_index()
pivot_df = pivot_df.merge(
visit_counts, how="left", on="domna_property_id"
)
pivot_df = pivot_df.merge(visit_counts, how="left", on="domna_property_id")
pivot_df = pivot_df.merge(latest_note, how="left", on="domna_property_id")
# We want the latest note
@ -2558,15 +2672,32 @@ class AssetList:
self.outcomes["matched_to_asset_list"] = self.outcomes["row_id"].isin(lookup["row_id"].values)
self.outcomes = self.outcomes.merge(lookup[["row_id", "domna_property_id"]], how="left", on="row_id")
# We flag the outcome status, based on the outcome
pivot_df["outcome_status"] = None
if "surveyed" in pivot_df.columns:
pivot_df["outcome_status"] = np.where(
pivot_df["surveyed"] > 0, hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.label,
pivot_df["outcome_status"]
)
if "installer refusal" in pivot_df.columns:
pivot_df["outcome_status"] = np.where(
pivot_df["installer refusal"] > 0, hubspot_config.HubspotProcessStatus.NOT_VIABLE.label,
pivot_df["outcome_status"]
)
pivot_df["outcome_status"] = np.where(
pivot_df["latest_outcome"].isin(["see notes"]) &
(pivot_df["outcome_status"] != hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.label),
hubspot_config.HubspotProcessStatus.SURVEYED_NO_ACCESS_NEEDS_SIGN_OFF.label,
pivot_df["outcome_status"]
)
# We merge out pivoted outcomes onto the asset list
self.standardised_asset_list = self.standardised_asset_list.merge(
pivot_df, how="left", left_on=self.DOMNA_PROPERTY_ID, right_on="domna_property_id"
)
# Merge the latest note
self.standardised_asset_list = self.standardised_asset_list.merge(
latest_note.rename(columns={notes_col: "Latest Route March Note"}),
how="left", left_on=self.DOMNA_PROPERTY_ID, right_on="domna_property_id"
)
if self.standardised_asset_list[self.DOMNA_PROPERTY_ID].duplicated().sum():
raise ValueError("Duplicates appreared - something went wrong")
@ -2640,6 +2771,7 @@ class AssetList:
master_data.columns else "PROPERTY TYPE As per table emailed"
)
measure_mix_col = "MEASURE COMBO"
installer_notes_col = "INSTALLERS NOTES ; REASONS FOR CANCELLATIONS"
logger.info("Matching master data to asset list")
matched = []
@ -2774,19 +2906,30 @@ class AssetList:
self.standardised_asset_list = self.standardised_asset_list.drop(columns="house_no")
# We match the "UPRN" which is the landlords ID, onto the master sheet
if measure_mix_col not in master_data.columns:
master_data[measure_mix_col] = "Measure mix not recorded"
matched = pd.DataFrame(matched)
master_to_append = master_data[[scheme_col, "row_id", install_col, submission_col, measure_mix_col]].merge(
master_to_append = master_data[
[scheme_col, "row_id", install_col, submission_col, measure_mix_col, installer_notes_col]
].merge(
matched, how="left", on="row_id"
).rename(
columns={
scheme_col: "funding_scheme",
measure_mix_col: "measure_mix",
install_col: "survey_status",
submission_col: "submission_date"
submission_col: "submission_date",
installer_notes_col: "submission_installer_notes"
}
)
master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel")
master_to_append["installed"] = master_to_append["survey_status"].str.lower().str.contains("installed")
master_to_append["submission_cancelled"] = (
master_to_append["survey_status"].str.lower().str.contains("cancel")
)
master_to_append["submission_installed"] = (
master_to_append["survey_status"].str.lower().str.contains("installed")
)
master_surveyed.append(master_to_append)
unmatched_df = master_data[
master_data["row_id"].isin(unmatched)
@ -2822,7 +2965,21 @@ class AssetList:
].astype(str)
# We de-dupe crudely on landlord property id
self.master_surveyed = master_surveyed.drop_duplicates(subset=[self.STANDARD_LANDLORD_PROPERTY_ID])
self.master_surveyed = master_surveyed.drop_duplicates(subset=[self.STANDARD_LANDLORD_PROPERTY_ID]).copy()
# We now add the submission status, based on the hubspot stages
self.master_surveyed["submission_status"] = hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER.label
self.master_surveyed["submission_status"] = np.where(
self.master_surveyed["submission_cancelled"] == True,
hubspot_config.HubspotProcessStatus.INSTALLER_CANCELLED_FINALIZED.label,
self.master_surveyed["submission_status"]
)
self.master_surveyed["submission_status"] = np.where(
self.master_surveyed["submission_installed"] == True,
hubspot_config.HubspotProcessStatus.INSTALL_COMPLETE.label,
self.master_surveyed["submission_status"]
)
self.standardised_asset_list = self.standardised_asset_list.merge(
self.master_surveyed, how="left", on=self.STANDARD_LANDLORD_PROPERTY_ID

View file

@ -99,66 +99,36 @@ def app():
phase = False
ecosurv_landlords = "thrive"
# Medway
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Medway"
data_filename = "MEDWAY Asset List.xlsx"
sheet_name = "Asset list"
# Torus
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Torus/Phase 2"
data_filename = "Torus Property Asset List - INSPECTIONS.xlsx"
sheet_name = "TORUS"
postcode_column = 'Postcode'
fulladdress_column = None
address1_column = "House Number"
address1_column = "AddressLine1"
address1_method = None
address_cols_to_concat = ["House Number", "Street 1"]
address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
missing_postcodes_method = None
landlord_year_built = "Year Built"
landlord_os_uprn = None
landlord_property_type = "Property Type - Academy"
landlord_built_form = "Property Type - Academy"
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "Row ID"
landlord_sap = None
outcomes_filename = []
outcomes_sheetname = []
outcomes_postcode = []
outcomes_houseno = []
outcomes_id = []
outcomes_address = []
master_filepaths = []
master_to_asset_list_filepath = None
phase = False
ecosurv_landlords = None
# MHS
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS"
data_filename = "MHS HOMES (Full Asset List) - for programme build.xlsx"
sheet_name = "Sheet1"
postcode_column = 'Postcode'
fulladdress_column = "FullAddress"
address1_column = None
address1_method = "house_number_extraction"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = "BuiltInYear"
landlord_os_uprn = None
landlord_property_type = "AssetType"
landlord_built_form = "PropertyType"
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_year_built = "Property Age"
landlord_os_uprn = "NatUPRN"
landlord_property_type = "Property Type"
landlord_built_form = "Built Form"
landlord_wall_construction = "Wall Construction"
landlord_roof_construction = "Roof Construction"
landlord_heating_system = "Space Heating Source"
landlord_existing_pv = "Low Carbon Technology (Solar PV)"
landlord_property_id = "UPRN"
landlord_sap = None
outcomes_filename = []
outcomes_sheetname = []
outcomes_postcode = []
outcomes_houseno = []
outcomes_id = []
outcomes_address = []
landlord_sap = "SAP Score"
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
outcomes_address = None
master_filepaths = []
master_to_asset_list_filepath = None
phase = False
master_id_colnames = []
phase = True
ecosurv_landlords = None
# Southern Midlands
@ -300,7 +270,8 @@ def app():
asset_list.flag_survey_master(
master_filepaths=master_filepaths,
master_to_asset_list_filepath=master_to_asset_list_filepath
master_to_asset_list_filepath=master_to_asset_list_filepath,
master_id_colnames=master_id_colnames,
)
asset_list.flag_ecosurv(ecosurv_landlords)
@ -505,6 +476,9 @@ def app():
pprint(asset_list.work_type_figures)
# We now flag the status of the property
asset_list.label_property_status()
asset_list.flat_analysis()
asset_list.load_contact_details(

View file

@ -0,0 +1,28 @@
from enum import IntEnum
class HubspotProcessStatus(IntEnum):
def __new__(cls, value, label):
obj = int.__new__(cls, value)
obj._value_ = value
obj.label = label
return obj
# the numerical values of this enum aren't important, but they define the order of operations
# This is the first stage, where a survey is ready to go
READY_TO_BE_SCHEDULED = 1, "READY TO BE SCHEDULED"
# The property didn't get access and needs sign off
SURVEYED_NO_ACCESS_NEEDS_SIGN_OFF = 2, "SURVEYED - NO ACCESS - NEED SIGN OFF"
# The survey has been completed. We don't have any update as to whether the property has been installed
SURVEYED_COMPLETED_SIGNED_OFF = 3, "SURVEYED - COMPLETED - SIGNED OFF"
# The property turned out to be ineligibile
NOT_VIABLE = 4, "NOT VIABLE"
# The property is with the installer. This will likely be the default for historic programmes
SUBMITTED_TO_INSTALLER = 5, "SUBMITTED TO INSTALLER"
# The property has been installed
INSTALL_COMPLETE = 6, "INSTALL COMPLETE"
# The install has complete and lodgement is complete
LODGEMENT_COMPLETE = 7, "LODGEMENT COMPLETE"
# The property has been cancelled
INSTALLER_CANCELLED_FINALIZED = 8, "INSTALLER CANCELLED - FINALIZED"