Model/asset_list/hubspot/prepare_for_hubspot.py
2025-08-06 12:23:26 +01:00

189 lines
7.7 KiB
Python

import os
import pandas as pd
from asset_list.AssetList import AssetList
import re
def normalize_uk_phone(number: str | float | int) -> str | None:
if pd.isna(number):
return None
number = str(number)
number = re.sub(r"[^\d+]", "", number)
# Handle common short inputs: add '0' if likely missing
if re.match(r"^7\d{8,9}$", number) or re.match(r"^1\d{8,9}$", number):
number = "0" + number
# Convert to international format
if number.startswith("0"):
number = "+44" + number[1:]
elif number.startswith("0044"):
number = "+" + number[2:]
# Must be +44 followed by 10 digits (some area codes may vary)
if re.match(r"^\+44\d{9,10}$", number):
return number
return None
def app():
"""
TODO: Operations may have removed some cavity_reason/solar_reason values from the standardised asset list after
review. So, we will need to update the hubspot status for these entries and set them to None, if they
were previously being set to ready for scheduling. We don't want to just filter on rows where
cavity_reason and solar_reason are populated, as if we want to include historical surveys, this will remove
them
TODO: If we wish to upload deals in batches
:return:
"""
# inputs:
reconcile_programme = False # If True, the hubspot upload will include all properties with a project code
customer_domain = "https://shgroup.org.uk"
installer_name = "SCIS"
asset_list_filepath = (
"/Users/khalimconn-kowlessar/Downloads/20250701 Optivo Southern - Standardised.xlsx"
)
asset_list_sheet_name = "Solar Route Revised (100)"
asset_list_header = 0
contact_details_filepath = (
"/Users/khalimconn-kowlessar/Downloads/southern_optivo_solar_pv.xlsx"
)
contacts_sheet_name = "Sheet1"
contacts_landlord_property_id = "landlord_property_id"
contacts_phone_number_column = "Primary phone number"
contacts_secondary_phone_number_column = "Secondary phone number"
contacts_secondary_contact_full_name = None
contacts_email_column = "Email Address"
contacts_fullname_column = None
contacts_firstname_column = "Name"
contacts_lastname_column = None
existing_programme_filepath = None
asset_list = AssetList.load_standardised_asset_list(
asset_list_filepath, asset_list_sheet_name, asset_list_header
)
asset_list.load_contact_details(
local_filepath=contact_details_filepath,
sheet_name=contacts_sheet_name,
landlord_property_id=contacts_landlord_property_id,
phone_number_column=contacts_phone_number_column,
secondary_phone_number_column=contacts_secondary_phone_number_column,
secondary_contact_full_name=contacts_secondary_contact_full_name,
email_column=contacts_email_column,
fullname_column=contacts_fullname_column,
firstname_column=contacts_firstname_column,
lastname_column=contacts_lastname_column
)
asset_list.prepare_for_crm(
company_domain=customer_domain,
installer_name=installer_name,
reconcile_programme=reconcile_programme
)
for x in asset_list.hubspot_data["Phone <CONTACT phone>"].values:
normalize_uk_phone(x)
asset_list.hubspot_data["Phone <CONTACT phone>"] = (
asset_list.hubspot_data["Phone <CONTACT phone>"].astype("Int64").astype(str).apply(normalize_uk_phone)
)
asset_list.hubspot_data["Secondary Phone <CONTACT secondary_phone_number>"] = asset_list.hubspot_data[
"Secondary Phone <CONTACT secondary_phone_number>"].astype(
"Int64").astype(
str).apply(
normalize_uk_phone)
# Remove the existing programme
# existing_programme = pd.read_csv(existing_programme_filepath, encoding="utf-8-sig")
# asset_list.hubspot_data = asset_list.hubspot_data[
# ~asset_list.hubspot_data["Domna Property ID <LISTING domna_property_id>"].isin(
# existing_programme['Domna Property ID'].values
# )
# ]
# Get the filepath and the filename. Append hubspot upload to the filename. We also change the file type to csv
directory, filename = os.path.split(asset_list_filepath)
name, ext = os.path.splitext(filename)
output_filename = f"{name} - Hubspot Upload.csv"
output_filepath = os.path.join(directory, output_filename)
if pd.isnull(asset_list.hubspot_data['Project Code <DEAL project_code>']).sum():
raise ValueError("FIX MEEE")
if pd.isnull(asset_list.hubspot_data['Deal Stage <DEAL dealstage>']).any():
raise ValueError("Warning: Some rows have missing deal stage. These will not be uploaded to HubSpot.")
# Just store locally
asset_list.hubspot_data.to_csv(output_filepath, index=False, encoding="utf-8-sig")
# # TODO: Set this up separately, but we associate multiple contacts to the same deal
# contact_details = pd.read_csv(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot "
# "Upload/Hubspot/contact "
# "details.csv"
# )
#
# # contacts_phone_number_column = "phone_number"
# # contacts_secondary_phone_number_column = "secondary_phone_number"
# # contacts_secondary_contact_full_name = "secondary_contact_full_name"
# # contacts_email_column = "email"
# # contacts_fullname_column = "fullname"
# # contacts_firstname_column = "First Name"
# # contacts_lastname_column = "Last Name"
# contact_details["phone_number"] = contact_details["Mobile Phone"].copy()
# # If phone number is NaN, we will use the landline number
# contact_details["phone_number"] = contact_details["phone_number"].fillna(contact_details["Landline"])
# contact_details["secondary_phone_number"] = contact_details["Landline"].copy()
# # If secondary phone number is the same as primary, we remove it
# import numpy as np
# contact_details["secondary_phone_number"] = np.where(
# contact_details["secondary_phone_number"] == contact_details["phone_number"],
# np.nan,
# contact_details["secondary_phone_number"]
# )
# contact_details = contact_details[
# ['Property Reference Number (Main Address) (Property)', "Email Address", "phone_number",
# "secondary_phone_number", "First Name", "Last Name"]].copy().rename(
# columns={"Property Reference Number (Main Address) (Property)": "landlord_proprty_id"}
# )
# contact_details["fullname"] = contact_details["First Name"] + " " + contact_details["Last Name"]
# # Format the phone numbers
#
# contact_details["phone_number"] = contact_details["phone_number"].astype(int).astype(str).apply(
# normalize_uk_phone)
# contact_details["secondary_phone_number"] = contact_details["secondary_phone_number"].astype("Int64").astype(
# str).apply(
# normalize_uk_phone)
#
# # Add in the Hubspot deal data
# hubspot_data = pd.read_csv(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Hubspot/"
# "property-status.csv",
# encoding="utf-8-sig"
# )
# # Merge on contact details
# contact_details = hubspot_data[["Landlord Property ID", "Deal ID"]].merge(
# contact_details,
# how="left",
# right_on="landlord_proprty_id",
# left_on="Landlord Property ID"
# )
#
# contact_details = contact_details.drop(columns=["landlord_proprty_id"])
#
# # Store as csv
# contact_details.to_csv(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar "
# "Programme Hubspot Upload/Hubspot/"
# "contact_details.csv",
# index=False, encoding="utf-8-sig"
# )