mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
set up hubspot status
This commit is contained in:
parent
c0cf848db2
commit
2e041bfe75
3 changed files with 231 additions and 72 deletions
|
|
@ -4,8 +4,8 @@ import re
|
|||
import tiktoken
|
||||
from pprint import pprint
|
||||
from datetime import datetime
|
||||
import asset_list.hubspot.config as hubspot_config
|
||||
|
||||
from numpy.ma.core import masked_not_equal
|
||||
from openai import OpenAI
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
|
@ -292,6 +292,13 @@ class AssetList:
|
|||
"Any further surveyor notes", 'Surveyors Name'
|
||||
]
|
||||
|
||||
NON_INTRUSIVES_NEW_FORMAT_COLNAMES = [
|
||||
"Has the property been re-walled?", "Is the property tile hung?", "Does the property have a render?",
|
||||
"Does the property have cladding?", "Gable Wall Obstructions",
|
||||
"Does the property have foliage that needs removal?",
|
||||
"Potential unsafe environment", "Date of Inspection"
|
||||
]
|
||||
|
||||
NON_INTRUSIVES_ELIGIBILITY_COLUMN = "Eligibility (Red/Yellow/Green)"
|
||||
|
||||
OLD_FORMAT_NON_INTRUSIVE_COLNAMES = ['WFT Findings', 'ECO Eligibility']
|
||||
|
|
@ -400,6 +407,10 @@ class AssetList:
|
|||
|
||||
self.non_intrusives_eligibility = "Eligibility (Red/Yellow/Green)" in self.raw_asset_list.columns
|
||||
|
||||
self.new_format_non_insturives_present = (
|
||||
"Has the property been re-walled?" in self.standardised_asset_list.columns
|
||||
)
|
||||
|
||||
# Names of columns
|
||||
self.landlord_property_id = landlord_property_id
|
||||
self.address1_colname = address1_colname
|
||||
|
|
@ -687,6 +698,9 @@ class AssetList:
|
|||
if self.non_intrusives_eligibility:
|
||||
non_intrusive_columns.append(self.NON_INTRUSIVES_ELIGIBILITY_COLUMN)
|
||||
|
||||
if self.new_format_non_insturives_present:
|
||||
non_intrusive_columns += self.NON_INTRUSIVES_NEW_FORMAT_COLNAMES
|
||||
|
||||
if self.old_format_non_intrusives_present:
|
||||
# We check if we have the ECO Eligibility column, which we might not have
|
||||
non_intrusive_columns = [
|
||||
|
|
@ -931,6 +945,23 @@ class AssetList:
|
|||
self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID].astype(str)
|
||||
)
|
||||
|
||||
# CLean up the standard SAP column, that can be problematic
|
||||
if self.landlord_sap is not None:
|
||||
self.standardised_asset_list[self.STANDARD_SAP] = (
|
||||
self.standardised_asset_list[self.STANDARD_SAP]
|
||||
.astype(str)
|
||||
.str.replace('\xa0', ' ', regex=False)
|
||||
.str.strip()
|
||||
)
|
||||
self.standardised_asset_list[self.STANDARD_SAP] = np.where(
|
||||
self.standardised_asset_list[self.STANDARD_SAP] == "",
|
||||
None,
|
||||
self.standardised_asset_list[self.STANDARD_SAP]
|
||||
)
|
||||
self.standardised_asset_list[self.STANDARD_SAP] = (
|
||||
self.standardised_asset_list[self.STANDARD_SAP].astype(float)
|
||||
)
|
||||
|
||||
def merge_data(self, df: pd.DataFrame):
|
||||
"""
|
||||
Used to insert data into the standardised asset list, based on the domna property id
|
||||
|
|
@ -1864,7 +1895,7 @@ class AssetList:
|
|||
for col in ["cavity_reason", "solar_reason"]:
|
||||
self.standardised_asset_list[col] = np.where(
|
||||
(
|
||||
(~pd.isnull(self.standardised_asset_list["submission_date"]))
|
||||
(~pd.isnull(self.standardised_asset_list["submission_status"]))
|
||||
),
|
||||
None,
|
||||
self.standardised_asset_list[col]
|
||||
|
|
@ -1874,7 +1905,7 @@ class AssetList:
|
|||
for col in ["cavity_reason", "solar_reason"]:
|
||||
self.standardised_asset_list[col] = np.where(
|
||||
(
|
||||
(~pd.isnull(self.standardised_asset_list["ecosurv_reference"]))
|
||||
(~pd.isnull(self.standardised_asset_list["ecosurv_status"]))
|
||||
),
|
||||
None,
|
||||
self.standardised_asset_list[col]
|
||||
|
|
@ -1911,6 +1942,42 @@ class AssetList:
|
|||
self.outcomes[self.DOMNA_PROPERTY_ID].isin(identified_work)
|
||||
]
|
||||
|
||||
def label_property_status(self):
|
||||
"""
|
||||
This function is designed to be run after identify_worktypes() has been run, and will create a "property_status"
|
||||
column, which will note where each property is (to be surveyed, surveyed, installed), using the stages we
|
||||
recognise within hubspot
|
||||
:return:
|
||||
"""
|
||||
|
||||
# For anything that is ready to go, that gets set to ready to be scheduled
|
||||
self.standardised_asset_list["hubspot_status"] = np.where(
|
||||
~pd.isnull(self.standardised_asset_list["cavity_reason"]) |
|
||||
~pd.isnull(self.standardised_asset_list["solar_reason"]),
|
||||
hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label,
|
||||
None
|
||||
)
|
||||
|
||||
# we step through the process of flagging completed surveys
|
||||
|
||||
# We utilise submissions, ecosurv and outcomes to define the hubspot status
|
||||
# We'll take the maximum of these three columns, based on the enum integer value
|
||||
label_to_enum = {e.label: e for e in hubspot_config.HubspotProcessStatus}
|
||||
|
||||
def get_max_status_from_columns(row):
|
||||
status_candidates = []
|
||||
for col in ["submission_status", "ecosurv_install_status", "outcome_status"]:
|
||||
label = row.get(col)
|
||||
if label in label_to_enum:
|
||||
status_candidates.append(label_to_enum[label])
|
||||
if not status_candidates:
|
||||
return row["hubspot_status"] # fallback to existing status if no updates
|
||||
return max(status_candidates).label
|
||||
|
||||
self.standardised_asset_list["hubspot_status"] = self.standardised_asset_list.apply(
|
||||
get_max_status_from_columns, axis=1
|
||||
)
|
||||
|
||||
def flat_analysis(self):
|
||||
|
||||
# We need to deduce the building name - we strip out the house number
|
||||
|
|
@ -2331,6 +2398,52 @@ class AssetList:
|
|||
# It doesn't matter too much which record we take
|
||||
matched = matched.drop_duplicates(subset=[self.STANDARD_LANDLORD_PROPERTY_ID])
|
||||
|
||||
# We merge on the status of the property
|
||||
matched = matched.merge(
|
||||
self.ecosurv[["Reference", "Status", "Lead Status", "Tags"]].rename(
|
||||
columns={
|
||||
"Reference": "ecosurv_reference",
|
||||
"status": "ecosurv_status",
|
||||
"Lead Status": "ecosurv_lead_status",
|
||||
"Tags": "ecosurv_tags"
|
||||
}
|
||||
), how="left", on="ecosurv_reference"
|
||||
)
|
||||
|
||||
matched["ecosurv_install_status"] = None
|
||||
|
||||
# This mapping is ordered by process order, where lodgment is the final step so if we have an indication
|
||||
# that the property is ready for lodgement, we set the status to that. We then proceed through the other
|
||||
# statuses where the penultimate status is install complete
|
||||
mapping = {
|
||||
"Cancelled": hubspot_config.HubspotProcessStatus.INSTALLER_CANCELLED_FINALIZED,
|
||||
"TrustMark: Lodged": hubspot_config.HubspotProcessStatus.LODGEMENT_COMPLETE,
|
||||
"Retrofit: Complete": hubspot_config.HubspotProcessStatus.INSTALL_COMPLETE,
|
||||
"Retrofit: Awaiting TrustMark": hubspot_config.HubspotProcessStatus.INSTALL_COMPLETE,
|
||||
"Retrofit: Awaiting post checks": hubspot_config.HubspotProcessStatus.INSTALL_COMPLETE,
|
||||
"Installer Notification Sent": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER,
|
||||
"Submitted to RC": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER,
|
||||
"COONEY": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER,
|
||||
"Signed off for install": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER,
|
||||
"Retrofit: Signed off for install": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER,
|
||||
"Audit": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER,
|
||||
"Accepted": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER,
|
||||
"Sold": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER
|
||||
}
|
||||
|
||||
def get_max_status(tag_str):
|
||||
if pd.isna(tag_str):
|
||||
return None
|
||||
matched_statuses = []
|
||||
for tag, status in mapping.items():
|
||||
if tag in tag_str:
|
||||
matched_statuses.append(status)
|
||||
if not matched_statuses:
|
||||
return None
|
||||
return max(matched_statuses).label
|
||||
|
||||
matched["ecosurv_install_status"] = matched["ecosurv_tags"].apply(get_max_status)
|
||||
|
||||
self.standardised_asset_list = self.standardised_asset_list.merge(
|
||||
matched,
|
||||
how="left",
|
||||
|
|
@ -2380,7 +2493,7 @@ class AssetList:
|
|||
# Perform the remap
|
||||
outcomes["Outcome"] = outcomes["Notes / Outcomes"].map(remap_dictionary)
|
||||
|
||||
outcomes["Outcome"] = outcomes["Outcome"].str.lower()
|
||||
outcomes["Outcome"] = outcomes["Outcome"].str.lower().str.strip()
|
||||
|
||||
logger.info("Matching outcomes to asset list")
|
||||
# Merge the outcomes onto the asset list - we check we're able to match sufficiently well
|
||||
|
|
@ -2542,12 +2655,13 @@ class AssetList:
|
|||
apply(get_latest_note).
|
||||
reset_index(drop=True)
|
||||
)
|
||||
latest_note = latest_note[["domna_property_id", notes_col]]
|
||||
latest_note = latest_note[["domna_property_id", notes_col, "Outcome"]].rename(
|
||||
columns={"Notes": "latest_outcome_note", "Outcome": "latest_outcome"}
|
||||
)
|
||||
|
||||
pivot_df = lookup.groupby(["domna_property_id", "Outcome"]).size().unstack(fill_value=0).reset_index()
|
||||
pivot_df = pivot_df.merge(
|
||||
visit_counts, how="left", on="domna_property_id"
|
||||
)
|
||||
pivot_df = pivot_df.merge(visit_counts, how="left", on="domna_property_id")
|
||||
pivot_df = pivot_df.merge(latest_note, how="left", on="domna_property_id")
|
||||
|
||||
# We want the latest note
|
||||
|
||||
|
|
@ -2558,15 +2672,32 @@ class AssetList:
|
|||
self.outcomes["matched_to_asset_list"] = self.outcomes["row_id"].isin(lookup["row_id"].values)
|
||||
self.outcomes = self.outcomes.merge(lookup[["row_id", "domna_property_id"]], how="left", on="row_id")
|
||||
|
||||
# We flag the outcome status, based on the outcome
|
||||
pivot_df["outcome_status"] = None
|
||||
|
||||
if "surveyed" in pivot_df.columns:
|
||||
pivot_df["outcome_status"] = np.where(
|
||||
pivot_df["surveyed"] > 0, hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.label,
|
||||
pivot_df["outcome_status"]
|
||||
)
|
||||
|
||||
if "installer refusal" in pivot_df.columns:
|
||||
pivot_df["outcome_status"] = np.where(
|
||||
pivot_df["installer refusal"] > 0, hubspot_config.HubspotProcessStatus.NOT_VIABLE.label,
|
||||
pivot_df["outcome_status"]
|
||||
)
|
||||
|
||||
pivot_df["outcome_status"] = np.where(
|
||||
pivot_df["latest_outcome"].isin(["see notes"]) &
|
||||
(pivot_df["outcome_status"] != hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.label),
|
||||
hubspot_config.HubspotProcessStatus.SURVEYED_NO_ACCESS_NEEDS_SIGN_OFF.label,
|
||||
pivot_df["outcome_status"]
|
||||
)
|
||||
|
||||
# We merge out pivoted outcomes onto the asset list
|
||||
self.standardised_asset_list = self.standardised_asset_list.merge(
|
||||
pivot_df, how="left", left_on=self.DOMNA_PROPERTY_ID, right_on="domna_property_id"
|
||||
)
|
||||
# Merge the latest note
|
||||
self.standardised_asset_list = self.standardised_asset_list.merge(
|
||||
latest_note.rename(columns={notes_col: "Latest Route March Note"}),
|
||||
how="left", left_on=self.DOMNA_PROPERTY_ID, right_on="domna_property_id"
|
||||
)
|
||||
|
||||
if self.standardised_asset_list[self.DOMNA_PROPERTY_ID].duplicated().sum():
|
||||
raise ValueError("Duplicates appreared - something went wrong")
|
||||
|
|
@ -2640,6 +2771,7 @@ class AssetList:
|
|||
master_data.columns else "PROPERTY TYPE As per table emailed"
|
||||
)
|
||||
measure_mix_col = "MEASURE COMBO"
|
||||
installer_notes_col = "INSTALLERS NOTES ; REASONS FOR CANCELLATIONS"
|
||||
|
||||
logger.info("Matching master data to asset list")
|
||||
matched = []
|
||||
|
|
@ -2774,19 +2906,30 @@ class AssetList:
|
|||
self.standardised_asset_list = self.standardised_asset_list.drop(columns="house_no")
|
||||
|
||||
# We match the "UPRN" which is the landlords ID, onto the master sheet
|
||||
|
||||
if measure_mix_col not in master_data.columns:
|
||||
master_data[measure_mix_col] = "Measure mix not recorded"
|
||||
|
||||
matched = pd.DataFrame(matched)
|
||||
master_to_append = master_data[[scheme_col, "row_id", install_col, submission_col, measure_mix_col]].merge(
|
||||
master_to_append = master_data[
|
||||
[scheme_col, "row_id", install_col, submission_col, measure_mix_col, installer_notes_col]
|
||||
].merge(
|
||||
matched, how="left", on="row_id"
|
||||
).rename(
|
||||
columns={
|
||||
scheme_col: "funding_scheme",
|
||||
measure_mix_col: "measure_mix",
|
||||
install_col: "survey_status",
|
||||
submission_col: "submission_date"
|
||||
submission_col: "submission_date",
|
||||
installer_notes_col: "submission_installer_notes"
|
||||
}
|
||||
)
|
||||
master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel")
|
||||
master_to_append["installed"] = master_to_append["survey_status"].str.lower().str.contains("installed")
|
||||
master_to_append["submission_cancelled"] = (
|
||||
master_to_append["survey_status"].str.lower().str.contains("cancel")
|
||||
)
|
||||
master_to_append["submission_installed"] = (
|
||||
master_to_append["survey_status"].str.lower().str.contains("installed")
|
||||
)
|
||||
master_surveyed.append(master_to_append)
|
||||
unmatched_df = master_data[
|
||||
master_data["row_id"].isin(unmatched)
|
||||
|
|
@ -2822,7 +2965,21 @@ class AssetList:
|
|||
].astype(str)
|
||||
|
||||
# We de-dupe crudely on landlord property id
|
||||
self.master_surveyed = master_surveyed.drop_duplicates(subset=[self.STANDARD_LANDLORD_PROPERTY_ID])
|
||||
self.master_surveyed = master_surveyed.drop_duplicates(subset=[self.STANDARD_LANDLORD_PROPERTY_ID]).copy()
|
||||
|
||||
# We now add the submission status, based on the hubspot stages
|
||||
self.master_surveyed["submission_status"] = hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER.label
|
||||
self.master_surveyed["submission_status"] = np.where(
|
||||
self.master_surveyed["submission_cancelled"] == True,
|
||||
hubspot_config.HubspotProcessStatus.INSTALLER_CANCELLED_FINALIZED.label,
|
||||
self.master_surveyed["submission_status"]
|
||||
)
|
||||
|
||||
self.master_surveyed["submission_status"] = np.where(
|
||||
self.master_surveyed["submission_installed"] == True,
|
||||
hubspot_config.HubspotProcessStatus.INSTALL_COMPLETE.label,
|
||||
self.master_surveyed["submission_status"]
|
||||
)
|
||||
|
||||
self.standardised_asset_list = self.standardised_asset_list.merge(
|
||||
self.master_surveyed, how="left", on=self.STANDARD_LANDLORD_PROPERTY_ID
|
||||
|
|
|
|||
|
|
@ -99,66 +99,36 @@ def app():
|
|||
phase = False
|
||||
ecosurv_landlords = "thrive"
|
||||
|
||||
# Medway
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Medway"
|
||||
data_filename = "MEDWAY Asset List.xlsx"
|
||||
sheet_name = "Asset list"
|
||||
# Torus
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Torus/Phase 2"
|
||||
data_filename = "Torus Property Asset List - INSPECTIONS.xlsx"
|
||||
sheet_name = "TORUS"
|
||||
postcode_column = 'Postcode'
|
||||
fulladdress_column = None
|
||||
address1_column = "House Number"
|
||||
address1_column = "AddressLine1"
|
||||
address1_method = None
|
||||
address_cols_to_concat = ["House Number", "Street 1"]
|
||||
address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = "Year Built"
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "Property Type - Academy"
|
||||
landlord_built_form = "Property Type - Academy"
|
||||
landlord_wall_construction = None
|
||||
landlord_roof_construction = None
|
||||
landlord_heating_system = None
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "Row ID"
|
||||
landlord_sap = None
|
||||
outcomes_filename = []
|
||||
outcomes_sheetname = []
|
||||
outcomes_postcode = []
|
||||
outcomes_houseno = []
|
||||
outcomes_id = []
|
||||
outcomes_address = []
|
||||
master_filepaths = []
|
||||
master_to_asset_list_filepath = None
|
||||
phase = False
|
||||
ecosurv_landlords = None
|
||||
|
||||
# MHS
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS"
|
||||
data_filename = "MHS HOMES (Full Asset List) - for programme build.xlsx"
|
||||
sheet_name = "Sheet1"
|
||||
postcode_column = 'Postcode'
|
||||
fulladdress_column = "FullAddress"
|
||||
address1_column = None
|
||||
address1_method = "house_number_extraction"
|
||||
address_cols_to_concat = []
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = "BuiltInYear"
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "AssetType"
|
||||
landlord_built_form = "PropertyType"
|
||||
landlord_wall_construction = None
|
||||
landlord_roof_construction = None
|
||||
landlord_heating_system = None
|
||||
landlord_existing_pv = None
|
||||
landlord_year_built = "Property Age"
|
||||
landlord_os_uprn = "NatUPRN"
|
||||
landlord_property_type = "Property Type"
|
||||
landlord_built_form = "Built Form"
|
||||
landlord_wall_construction = "Wall Construction"
|
||||
landlord_roof_construction = "Roof Construction"
|
||||
landlord_heating_system = "Space Heating Source"
|
||||
landlord_existing_pv = "Low Carbon Technology (Solar PV)"
|
||||
landlord_property_id = "UPRN"
|
||||
landlord_sap = None
|
||||
outcomes_filename = []
|
||||
outcomes_sheetname = []
|
||||
outcomes_postcode = []
|
||||
outcomes_houseno = []
|
||||
outcomes_id = []
|
||||
outcomes_address = []
|
||||
landlord_sap = "SAP Score"
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
outcomes_postcode = None
|
||||
outcomes_houseno = None
|
||||
outcomes_id = None
|
||||
outcomes_address = None
|
||||
master_filepaths = []
|
||||
master_to_asset_list_filepath = None
|
||||
phase = False
|
||||
master_id_colnames = []
|
||||
phase = True
|
||||
ecosurv_landlords = None
|
||||
|
||||
# Southern Midlands
|
||||
|
|
@ -300,7 +270,8 @@ def app():
|
|||
|
||||
asset_list.flag_survey_master(
|
||||
master_filepaths=master_filepaths,
|
||||
master_to_asset_list_filepath=master_to_asset_list_filepath
|
||||
master_to_asset_list_filepath=master_to_asset_list_filepath,
|
||||
master_id_colnames=master_id_colnames,
|
||||
)
|
||||
|
||||
asset_list.flag_ecosurv(ecosurv_landlords)
|
||||
|
|
@ -505,6 +476,9 @@ def app():
|
|||
|
||||
pprint(asset_list.work_type_figures)
|
||||
|
||||
# We now flag the status of the property
|
||||
asset_list.label_property_status()
|
||||
|
||||
asset_list.flat_analysis()
|
||||
|
||||
asset_list.load_contact_details(
|
||||
|
|
|
|||
28
asset_list/hubspot/config.py
Normal file
28
asset_list/hubspot/config.py
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
from enum import IntEnum
|
||||
|
||||
|
||||
class HubspotProcessStatus(IntEnum):
|
||||
def __new__(cls, value, label):
|
||||
obj = int.__new__(cls, value)
|
||||
obj._value_ = value
|
||||
obj.label = label
|
||||
return obj
|
||||
|
||||
# the numerical values of this enum aren't important, but they define the order of operations
|
||||
|
||||
# This is the first stage, where a survey is ready to go
|
||||
READY_TO_BE_SCHEDULED = 1, "READY TO BE SCHEDULED"
|
||||
# The property didn't get access and needs sign off
|
||||
SURVEYED_NO_ACCESS_NEEDS_SIGN_OFF = 2, "SURVEYED - NO ACCESS - NEED SIGN OFF"
|
||||
# The survey has been completed. We don't have any update as to whether the property has been installed
|
||||
SURVEYED_COMPLETED_SIGNED_OFF = 3, "SURVEYED - COMPLETED - SIGNED OFF"
|
||||
# The property turned out to be ineligibile
|
||||
NOT_VIABLE = 4, "NOT VIABLE"
|
||||
# The property is with the installer. This will likely be the default for historic programmes
|
||||
SUBMITTED_TO_INSTALLER = 5, "SUBMITTED TO INSTALLER"
|
||||
# The property has been installed
|
||||
INSTALL_COMPLETE = 6, "INSTALL COMPLETE"
|
||||
# The install has complete and lodgement is complete
|
||||
LODGEMENT_COMPLETE = 7, "LODGEMENT COMPLETE"
|
||||
# The property has been cancelled
|
||||
INSTALLER_CANCELLED_FINALIZED = 8, "INSTALLER CANCELLED - FINALIZED"
|
||||
Loading…
Add table
Reference in a new issue