Model/asset_list/hubspot/prepare_for_hubspot.py

import os
import pandas as pd
from asset_list.AssetList import AssetList

import re


def normalize_uk_phone(number: str | float | int) -> str | None:
    if pd.isna(number):
        return None

    number = str(number)
    number = re.sub(r"[^\d+]", "", number)

    # Handle common short inputs: add '0' if likely missing
    if re.match(r"^7\d{8,9}$", number) or re.match(r"^1\d{8,9}$", number):
        number = "0" + number

    # Convert to international format
    if number.startswith("0"):
        number = "+44" + number[1:]
    elif number.startswith("0044"):
        number = "+" + number[2:]

    # Must be +44 followed by 10 digits (some area codes may vary)
    if re.match(r"^\+44\d{9,10}$", number):
        return number

    return None


def app():
    """
    TODO: Operations may have removed some cavity_reason/solar_reason values from the standardised asset list after
          review. So, we will need to update the hubspot status for these entries and set them to None, if they
          were previously being set to ready for scheduling. We don't want to just filter on rows where
          cavity_reason and solar_reason are populated, as if we want to include historical surveys, this will remove
          them


    TODO: If we wish to upload deals in batches

    :return:
    """

    # inputs:
    reconcile_programme = False  # If True, the hubspot upload will include all properties with a project code
    customer_domain = "https://shgroup.org.uk"
    installer_name = "SCIS"
    asset_list_filepath = (
        "/Users/khalimconn-kowlessar/Downloads/20250701 Optivo Southern - Standardised.xlsx"
    )
    asset_list_sheet_name = "Solar Route Revised (100)"
    asset_list_header = 0

    contact_details_filepath = (
        "/Users/khalimconn-kowlessar/Downloads/southern_optivo_solar_pv.xlsx"
    )
    contacts_sheet_name = "Sheet1"
    contacts_landlord_property_id = "landlord_property_id"
    contacts_phone_number_column = "Primary phone number"
    contacts_secondary_phone_number_column = "Secondary phone number"
    contacts_secondary_contact_full_name = None
    contacts_email_column = "Email Address"
    contacts_fullname_column = None
    contacts_firstname_column = "Name"
    contacts_lastname_column = None

    existing_programme_filepath = None

    asset_list = AssetList.load_standardised_asset_list(
        asset_list_filepath, asset_list_sheet_name, asset_list_header
    )
    asset_list.load_contact_details(
        local_filepath=contact_details_filepath,
        sheet_name=contacts_sheet_name,
        landlord_property_id=contacts_landlord_property_id,
        phone_number_column=contacts_phone_number_column,
        secondary_phone_number_column=contacts_secondary_phone_number_column,
        secondary_contact_full_name=contacts_secondary_contact_full_name,
        email_column=contacts_email_column,
        fullname_column=contacts_fullname_column,
        firstname_column=contacts_firstname_column,
        lastname_column=contacts_lastname_column
    )

    asset_list.prepare_for_crm(
        company_domain=customer_domain,
        installer_name=installer_name,
        reconcile_programme=reconcile_programme
    )

    for x in asset_list.hubspot_data["Phone <CONTACT phone>"].values:
        normalize_uk_phone(x)

    asset_list.hubspot_data["Phone <CONTACT phone>"] = (
        asset_list.hubspot_data["Phone <CONTACT phone>"].astype("Int64").astype(str).apply(normalize_uk_phone)
    )
    asset_list.hubspot_data["Secondary Phone <CONTACT secondary_phone_number>"] = asset_list.hubspot_data[
        "Secondary Phone <CONTACT secondary_phone_number>"].astype(
        "Int64").astype(
        str).apply(
        normalize_uk_phone)

    # Remove the existing programme
    # existing_programme = pd.read_csv(existing_programme_filepath, encoding="utf-8-sig")
    # asset_list.hubspot_data = asset_list.hubspot_data[
    #     ~asset_list.hubspot_data["Domna Property ID <LISTING domna_property_id>"].isin(
    #         existing_programme['Domna Property ID'].values
    #     )
    # ]

    # Get the filepath and the filename. Append hubspot upload to the filename. We also change the file type to csv
    directory, filename = os.path.split(asset_list_filepath)
    name, ext = os.path.splitext(filename)
    output_filename = f"{name} - Hubspot Upload.csv"
    output_filepath = os.path.join(directory, output_filename)

    if pd.isnull(asset_list.hubspot_data['Project Code <DEAL project_code>']).sum():
        raise ValueError("FIX MEEE")

    if pd.isnull(asset_list.hubspot_data['Deal Stage <DEAL dealstage>']).any():
        raise ValueError("Warning: Some rows have missing deal stage. These will not be uploaded to HubSpot.")

    # Just store locally
    asset_list.hubspot_data.to_csv(output_filepath, index=False, encoding="utf-8-sig")

    # # TODO: Set this up separately, but we associate multiple contacts to the same deal
    # contact_details = pd.read_csv(
    #     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot "
    #     "Upload/Hubspot/contact "
    #     "details.csv"
    # )
    #
    # # contacts_phone_number_column = "phone_number"
    # #     contacts_secondary_phone_number_column = "secondary_phone_number"
    # #     contacts_secondary_contact_full_name = "secondary_contact_full_name"
    # #     contacts_email_column = "email"
    # #     contacts_fullname_column = "fullname"
    # #     contacts_firstname_column = "First Name"
    # #     contacts_lastname_column = "Last Name"
    # contact_details["phone_number"] = contact_details["Mobile Phone"].copy()
    # # If phone number is NaN, we will use the landline number
    # contact_details["phone_number"] = contact_details["phone_number"].fillna(contact_details["Landline"])
    # contact_details["secondary_phone_number"] = contact_details["Landline"].copy()
    # # If secondary phone number is the same as primary, we remove it
    # import numpy as np
    # contact_details["secondary_phone_number"] = np.where(
    #     contact_details["secondary_phone_number"] == contact_details["phone_number"],
    #     np.nan,
    #     contact_details["secondary_phone_number"]
    # )
    # contact_details = contact_details[
    #     ['Property Reference Number (Main Address) (Property)', "Email Address", "phone_number",
    #      "secondary_phone_number", "First Name", "Last Name"]].copy().rename(
    #     columns={"Property Reference Number (Main Address) (Property)": "landlord_proprty_id"}
    # )
    # contact_details["fullname"] = contact_details["First Name"] + " " + contact_details["Last Name"]
    # # Format the phone numbers
    #
    # contact_details["phone_number"] = contact_details["phone_number"].astype(int).astype(str).apply(
    # normalize_uk_phone)
    # contact_details["secondary_phone_number"] = contact_details["secondary_phone_number"].astype("Int64").astype(
    #     str).apply(
    #     normalize_uk_phone)
    #
    # # Add in the Hubspot deal data
    # hubspot_data = pd.read_csv(
    #     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar Programme Hubspot Upload/Hubspot/"
    #     "property-status.csv",
    #     encoding="utf-8-sig"
    # )
    # # Merge on contact details
    # contact_details = hubspot_data[["Landlord Property ID", "Deal ID"]].merge(
    #     contact_details,
    #     how="left",
    #     right_on="landlord_proprty_id",
    #     left_on="Landlord Property ID"
    # )
    #
    # contact_details = contact_details.drop(columns=["landlord_proprty_id"])
    #
    # # Store as csv
    # contact_details.to_csv(
    #     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Solar "
    #     "Programme Hubspot Upload/Hubspot/"
    #     "contact_details.csv",
    #     index=False, encoding="utf-8-sig"
    # )