Model/etl/customers/thrive/Project codes.py

"""
THis script will take the standardised asset list and append on the project codes.
We also, review the existing install status, in case anything is wrong
"""
import pandas as pd
import numpy as np

standardised_asset_list = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
    "Complete - Updated May 2025 - Standardised.xlsx",
    sheet_name="Standardised Asset List",
)

project_code_allocations = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Master Tracker (Thrive - "
    "Warmfront).xlsx",
    sheet_name="Master Tracker",
    header=1
)

programme_codes = project_code_allocations[
    ["UPRN", "Proposed Progamme", "New Proposed Programme", "Warmfront Survey Notes", ]
].copy()
programme_codes["programme_reference"] = programme_codes["New Proposed Programme"].copy()
programme_codes["programme_reference"] = np.where(
    pd.isnull(programme_codes["programme_reference"]),
    programme_codes["Proposed Progamme"],
    programme_codes["programme_reference"]
)

PROJECT_CODE_MAP = {
    'Phase 2': "THRIVE-002",
    'Phase 3': "THRIVE-003",
    'Phase 4': "THRIVE-004",
    'Phase 5': "THRIVE-005",
    'Phase 6': "THRIVE-006",
    'Phase 7': "THRIVE-007",
    'Phase 8': "THRIVE-008",
    'Phase 9': "THRIVE-009",
    'Phase 10': "THRIVE-010",
    "Week 1": "THRIVE-WEEK-001",
    "Week 2": "THRIVE-WEEK-002",
    "Week 4": "THRIVE-WEEK-004",
    "Week 7": "THRIVE-WEEK-007",
}
programme_codes["project_code"] = programme_codes["programme_reference"].map(PROJECT_CODE_MAP)

thrive_notes = project_code_allocations[["UPRN", "Thrive Notes", "Priority", "Notes Reply (Thrive)"]].copy()

standardised_asset_list = standardised_asset_list.merge(
    programme_codes[["UPRN", "project_code", "programme_reference"]],
    how="left",
    left_on="landlord_property_id",
    right_on="UPRN",
).merge(
    thrive_notes[["UPRN", "Thrive Notes", "Priority", "Notes Reply (Thrive)"]],
    how="left",
    on="UPRN",
)

standardised_asset_list = standardised_asset_list.drop(columns=["UPRN"])

# We fill the project code for historical completions
standardised_asset_list["project_code"] = np.where(
    pd.isnull(standardised_asset_list["project_code"]) & (
        standardised_asset_list["hubspot_status"] != "READY TO BE SCHEDULED"
    ) & (
        ~pd.isnull(standardised_asset_list["hubspot_status"])
    ),
    "THRIVE-HISTORICAL",
    standardised_asset_list["project_code"]
)

# Store as an excel
filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Programme - "
            "reconciled.xlsx")
# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
# Other tabs:
block_analysis = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
    "Complete - Updated May 2025 - Standardised.xlsx",
    sheet_name="Block Analysis",
)
outcomes = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
    "Complete - Updated May 2025 - Standardised.xlsx",
    sheet_name="Outcomes",
)
unmatched_submissions = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
    "Complete - Updated May 2025 - Standardised.xlsx",
    sheet_name="Unmatched Submissions",
)
unmatched_ecosurv = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
    "Complete - Updated May 2025 - Standardised.xlsx",
    sheet_name="Unmatched Ecosurv",
)

with pd.ExcelWriter(filename) as writer:
    standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False)
    block_analysis.to_excel(writer, sheet_name="Block Analysis", index=False)
    # If we have outcomes, we add a tab with the outcomes
    outcomes.to_excel(writer, sheet_name="Outcomes", index=False)
    unmatched_submissions.to_excel(writer, sheet_name="Unmatched Submissions", index=False)
    unmatched_ecosurv.to_excel(writer, sheet_name="Unmatched Ecosurv", index=False)

# A check, just comparing against the master tracker to make sure I have all of the installs
asset_list = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
    "Complete - Updated May 2025 - Standardised.xlsx",
    sheet_name="Standardised Asset List",
)

master_tracker = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Master Tracker (Thrive - "
    "Warmfront).xlsx",
    sheet_name="Master Tracker",
    header=1
)

df = asset_list[["landlord_property_id", "hubspot_status"]].merge(
    master_tracker[~pd.isnull(master_tracker['Date Completed'])][["UPRN", "Date Completed"]],
    how="inner",
    left_on="landlord_property_id",
    right_on="UPRN"
)

df["hubspot_status"].value_counts()
df[df["hubspot_status"] == "SUBMITTED TO INSTALLER"]