diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index 199b175c..e68ee6dd 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -4,8 +4,8 @@ import re import tiktoken from pprint import pprint from datetime import datetime +import asset_list.hubspot.config as hubspot_config -from numpy.ma.core import masked_not_equal from openai import OpenAI import numpy as np import pandas as pd @@ -292,6 +292,13 @@ class AssetList: "Any further surveyor notes", 'Surveyors Name' ] + NON_INTRUSIVES_NEW_FORMAT_COLNAMES = [ + "Has the property been re-walled?", "Is the property tile hung?", "Does the property have a render?", + "Does the property have cladding?", "Gable Wall Obstructions", + "Does the property have foliage that needs removal?", + "Potential unsafe environment", "Date of Inspection" + ] + NON_INTRUSIVES_ELIGIBILITY_COLUMN = "Eligibility (Red/Yellow/Green)" OLD_FORMAT_NON_INTRUSIVE_COLNAMES = ['WFT Findings', 'ECO Eligibility'] @@ -400,6 +407,10 @@ class AssetList: self.non_intrusives_eligibility = "Eligibility (Red/Yellow/Green)" in self.raw_asset_list.columns + self.new_format_non_insturives_present = ( + "Has the property been re-walled?" in self.standardised_asset_list.columns + ) + # Names of columns self.landlord_property_id = landlord_property_id self.address1_colname = address1_colname @@ -687,6 +698,9 @@ class AssetList: if self.non_intrusives_eligibility: non_intrusive_columns.append(self.NON_INTRUSIVES_ELIGIBILITY_COLUMN) + if self.new_format_non_insturives_present: + non_intrusive_columns += self.NON_INTRUSIVES_NEW_FORMAT_COLNAMES + if self.old_format_non_intrusives_present: # We check if we have the ECO Eligibility column, which we might not have non_intrusive_columns = [ @@ -931,6 +945,23 @@ class AssetList: self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID].astype(str) ) + # CLean up the standard SAP column, that can be problematic + if self.landlord_sap is not None: + self.standardised_asset_list[self.STANDARD_SAP] = ( + self.standardised_asset_list[self.STANDARD_SAP] + .astype(str) + .str.replace('\xa0', ' ', regex=False) + .str.strip() + ) + self.standardised_asset_list[self.STANDARD_SAP] = np.where( + self.standardised_asset_list[self.STANDARD_SAP] == "", + None, + self.standardised_asset_list[self.STANDARD_SAP] + ) + self.standardised_asset_list[self.STANDARD_SAP] = ( + self.standardised_asset_list[self.STANDARD_SAP].astype(float) + ) + def merge_data(self, df: pd.DataFrame): """ Used to insert data into the standardised asset list, based on the domna property id @@ -1864,7 +1895,7 @@ class AssetList: for col in ["cavity_reason", "solar_reason"]: self.standardised_asset_list[col] = np.where( ( - (~pd.isnull(self.standardised_asset_list["submission_date"])) + (~pd.isnull(self.standardised_asset_list["submission_status"])) ), None, self.standardised_asset_list[col] @@ -1874,7 +1905,7 @@ class AssetList: for col in ["cavity_reason", "solar_reason"]: self.standardised_asset_list[col] = np.where( ( - (~pd.isnull(self.standardised_asset_list["ecosurv_reference"])) + (~pd.isnull(self.standardised_asset_list["ecosurv_status"])) ), None, self.standardised_asset_list[col] @@ -1911,6 +1942,42 @@ class AssetList: self.outcomes[self.DOMNA_PROPERTY_ID].isin(identified_work) ] + def label_property_status(self): + """ + This function is designed to be run after identify_worktypes() has been run, and will create a "property_status" + column, which will note where each property is (to be surveyed, surveyed, installed), using the stages we + recognise within hubspot + :return: + """ + + # For anything that is ready to go, that gets set to ready to be scheduled + self.standardised_asset_list["hubspot_status"] = np.where( + ~pd.isnull(self.standardised_asset_list["cavity_reason"]) | + ~pd.isnull(self.standardised_asset_list["solar_reason"]), + hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label, + None + ) + + # we step through the process of flagging completed surveys + + # We utilise submissions, ecosurv and outcomes to define the hubspot status + # We'll take the maximum of these three columns, based on the enum integer value + label_to_enum = {e.label: e for e in hubspot_config.HubspotProcessStatus} + + def get_max_status_from_columns(row): + status_candidates = [] + for col in ["submission_status", "ecosurv_install_status", "outcome_status"]: + label = row.get(col) + if label in label_to_enum: + status_candidates.append(label_to_enum[label]) + if not status_candidates: + return row["hubspot_status"] # fallback to existing status if no updates + return max(status_candidates).label + + self.standardised_asset_list["hubspot_status"] = self.standardised_asset_list.apply( + get_max_status_from_columns, axis=1 + ) + def flat_analysis(self): # We need to deduce the building name - we strip out the house number @@ -2331,6 +2398,52 @@ class AssetList: # It doesn't matter too much which record we take matched = matched.drop_duplicates(subset=[self.STANDARD_LANDLORD_PROPERTY_ID]) + # We merge on the status of the property + matched = matched.merge( + self.ecosurv[["Reference", "Status", "Lead Status", "Tags"]].rename( + columns={ + "Reference": "ecosurv_reference", + "status": "ecosurv_status", + "Lead Status": "ecosurv_lead_status", + "Tags": "ecosurv_tags" + } + ), how="left", on="ecosurv_reference" + ) + + matched["ecosurv_install_status"] = None + + # This mapping is ordered by process order, where lodgment is the final step so if we have an indication + # that the property is ready for lodgement, we set the status to that. We then proceed through the other + # statuses where the penultimate status is install complete + mapping = { + "Cancelled": hubspot_config.HubspotProcessStatus.INSTALLER_CANCELLED_FINALIZED, + "TrustMark: Lodged": hubspot_config.HubspotProcessStatus.LODGEMENT_COMPLETE, + "Retrofit: Complete": hubspot_config.HubspotProcessStatus.INSTALL_COMPLETE, + "Retrofit: Awaiting TrustMark": hubspot_config.HubspotProcessStatus.INSTALL_COMPLETE, + "Retrofit: Awaiting post checks": hubspot_config.HubspotProcessStatus.INSTALL_COMPLETE, + "Installer Notification Sent": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER, + "Submitted to RC": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER, + "COONEY": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER, + "Signed off for install": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER, + "Retrofit: Signed off for install": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER, + "Audit": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER, + "Accepted": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER, + "Sold": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER + } + + def get_max_status(tag_str): + if pd.isna(tag_str): + return None + matched_statuses = [] + for tag, status in mapping.items(): + if tag in tag_str: + matched_statuses.append(status) + if not matched_statuses: + return None + return max(matched_statuses).label + + matched["ecosurv_install_status"] = matched["ecosurv_tags"].apply(get_max_status) + self.standardised_asset_list = self.standardised_asset_list.merge( matched, how="left", @@ -2380,7 +2493,7 @@ class AssetList: # Perform the remap outcomes["Outcome"] = outcomes["Notes / Outcomes"].map(remap_dictionary) - outcomes["Outcome"] = outcomes["Outcome"].str.lower() + outcomes["Outcome"] = outcomes["Outcome"].str.lower().str.strip() logger.info("Matching outcomes to asset list") # Merge the outcomes onto the asset list - we check we're able to match sufficiently well @@ -2542,12 +2655,13 @@ class AssetList: apply(get_latest_note). reset_index(drop=True) ) - latest_note = latest_note[["domna_property_id", notes_col]] + latest_note = latest_note[["domna_property_id", notes_col, "Outcome"]].rename( + columns={"Notes": "latest_outcome_note", "Outcome": "latest_outcome"} + ) pivot_df = lookup.groupby(["domna_property_id", "Outcome"]).size().unstack(fill_value=0).reset_index() - pivot_df = pivot_df.merge( - visit_counts, how="left", on="domna_property_id" - ) + pivot_df = pivot_df.merge(visit_counts, how="left", on="domna_property_id") + pivot_df = pivot_df.merge(latest_note, how="left", on="domna_property_id") # We want the latest note @@ -2558,15 +2672,32 @@ class AssetList: self.outcomes["matched_to_asset_list"] = self.outcomes["row_id"].isin(lookup["row_id"].values) self.outcomes = self.outcomes.merge(lookup[["row_id", "domna_property_id"]], how="left", on="row_id") + # We flag the outcome status, based on the outcome + pivot_df["outcome_status"] = None + + if "surveyed" in pivot_df.columns: + pivot_df["outcome_status"] = np.where( + pivot_df["surveyed"] > 0, hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.label, + pivot_df["outcome_status"] + ) + + if "installer refusal" in pivot_df.columns: + pivot_df["outcome_status"] = np.where( + pivot_df["installer refusal"] > 0, hubspot_config.HubspotProcessStatus.NOT_VIABLE.label, + pivot_df["outcome_status"] + ) + + pivot_df["outcome_status"] = np.where( + pivot_df["latest_outcome"].isin(["see notes"]) & + (pivot_df["outcome_status"] != hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.label), + hubspot_config.HubspotProcessStatus.SURVEYED_NO_ACCESS_NEEDS_SIGN_OFF.label, + pivot_df["outcome_status"] + ) + # We merge out pivoted outcomes onto the asset list self.standardised_asset_list = self.standardised_asset_list.merge( pivot_df, how="left", left_on=self.DOMNA_PROPERTY_ID, right_on="domna_property_id" ) - # Merge the latest note - self.standardised_asset_list = self.standardised_asset_list.merge( - latest_note.rename(columns={notes_col: "Latest Route March Note"}), - how="left", left_on=self.DOMNA_PROPERTY_ID, right_on="domna_property_id" - ) if self.standardised_asset_list[self.DOMNA_PROPERTY_ID].duplicated().sum(): raise ValueError("Duplicates appreared - something went wrong") @@ -2640,6 +2771,7 @@ class AssetList: master_data.columns else "PROPERTY TYPE As per table emailed" ) measure_mix_col = "MEASURE COMBO" + installer_notes_col = "INSTALLERS NOTES ; REASONS FOR CANCELLATIONS" logger.info("Matching master data to asset list") matched = [] @@ -2774,19 +2906,30 @@ class AssetList: self.standardised_asset_list = self.standardised_asset_list.drop(columns="house_no") # We match the "UPRN" which is the landlords ID, onto the master sheet + + if measure_mix_col not in master_data.columns: + master_data[measure_mix_col] = "Measure mix not recorded" + matched = pd.DataFrame(matched) - master_to_append = master_data[[scheme_col, "row_id", install_col, submission_col, measure_mix_col]].merge( + master_to_append = master_data[ + [scheme_col, "row_id", install_col, submission_col, measure_mix_col, installer_notes_col] + ].merge( matched, how="left", on="row_id" ).rename( columns={ scheme_col: "funding_scheme", measure_mix_col: "measure_mix", install_col: "survey_status", - submission_col: "submission_date" + submission_col: "submission_date", + installer_notes_col: "submission_installer_notes" } ) - master_to_append["cancelled"] = master_to_append["survey_status"].str.lower().str.contains("cancel") - master_to_append["installed"] = master_to_append["survey_status"].str.lower().str.contains("installed") + master_to_append["submission_cancelled"] = ( + master_to_append["survey_status"].str.lower().str.contains("cancel") + ) + master_to_append["submission_installed"] = ( + master_to_append["survey_status"].str.lower().str.contains("installed") + ) master_surveyed.append(master_to_append) unmatched_df = master_data[ master_data["row_id"].isin(unmatched) @@ -2822,7 +2965,21 @@ class AssetList: ].astype(str) # We de-dupe crudely on landlord property id - self.master_surveyed = master_surveyed.drop_duplicates(subset=[self.STANDARD_LANDLORD_PROPERTY_ID]) + self.master_surveyed = master_surveyed.drop_duplicates(subset=[self.STANDARD_LANDLORD_PROPERTY_ID]).copy() + + # We now add the submission status, based on the hubspot stages + self.master_surveyed["submission_status"] = hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER.label + self.master_surveyed["submission_status"] = np.where( + self.master_surveyed["submission_cancelled"] == True, + hubspot_config.HubspotProcessStatus.INSTALLER_CANCELLED_FINALIZED.label, + self.master_surveyed["submission_status"] + ) + + self.master_surveyed["submission_status"] = np.where( + self.master_surveyed["submission_installed"] == True, + hubspot_config.HubspotProcessStatus.INSTALL_COMPLETE.label, + self.master_surveyed["submission_status"] + ) self.standardised_asset_list = self.standardised_asset_list.merge( self.master_surveyed, how="left", on=self.STANDARD_LANDLORD_PROPERTY_ID diff --git a/asset_list/app.py b/asset_list/app.py index 3441e5de..31c404e5 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -99,66 +99,36 @@ def app(): phase = False ecosurv_landlords = "thrive" - # Medway - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Medway" - data_filename = "MEDWAY Asset List.xlsx" - sheet_name = "Asset list" + # Torus + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Torus/Phase 2" + data_filename = "Torus Property Asset List - INSPECTIONS.xlsx" + sheet_name = "TORUS" postcode_column = 'Postcode' fulladdress_column = None - address1_column = "House Number" + address1_column = "AddressLine1" address1_method = None - address_cols_to_concat = ["House Number", "Street 1"] + address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"] missing_postcodes_method = None - landlord_year_built = "Year Built" - landlord_os_uprn = None - landlord_property_type = "Property Type - Academy" - landlord_built_form = "Property Type - Academy" - landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = None - landlord_existing_pv = None - landlord_property_id = "Row ID" - landlord_sap = None - outcomes_filename = [] - outcomes_sheetname = [] - outcomes_postcode = [] - outcomes_houseno = [] - outcomes_id = [] - outcomes_address = [] - master_filepaths = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - - # MHS - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS" - data_filename = "MHS HOMES (Full Asset List) - for programme build.xlsx" - sheet_name = "Sheet1" - postcode_column = 'Postcode' - fulladdress_column = "FullAddress" - address1_column = None - address1_method = "house_number_extraction" - address_cols_to_concat = [] - missing_postcodes_method = None - landlord_year_built = "BuiltInYear" - landlord_os_uprn = None - landlord_property_type = "AssetType" - landlord_built_form = "PropertyType" - landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = None - landlord_existing_pv = None + landlord_year_built = "Property Age" + landlord_os_uprn = "NatUPRN" + landlord_property_type = "Property Type" + landlord_built_form = "Built Form" + landlord_wall_construction = "Wall Construction" + landlord_roof_construction = "Roof Construction" + landlord_heating_system = "Space Heating Source" + landlord_existing_pv = "Low Carbon Technology (Solar PV)" landlord_property_id = "UPRN" - landlord_sap = None - outcomes_filename = [] - outcomes_sheetname = [] - outcomes_postcode = [] - outcomes_houseno = [] - outcomes_id = [] - outcomes_address = [] + landlord_sap = "SAP Score" + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + outcomes_id = None + outcomes_address = None master_filepaths = [] master_to_asset_list_filepath = None - phase = False + master_id_colnames = [] + phase = True ecosurv_landlords = None # Southern Midlands @@ -300,7 +270,8 @@ def app(): asset_list.flag_survey_master( master_filepaths=master_filepaths, - master_to_asset_list_filepath=master_to_asset_list_filepath + master_to_asset_list_filepath=master_to_asset_list_filepath, + master_id_colnames=master_id_colnames, ) asset_list.flag_ecosurv(ecosurv_landlords) @@ -505,6 +476,9 @@ def app(): pprint(asset_list.work_type_figures) + # We now flag the status of the property + asset_list.label_property_status() + asset_list.flat_analysis() asset_list.load_contact_details( diff --git a/asset_list/hubspot/config.py b/asset_list/hubspot/config.py new file mode 100644 index 00000000..180bf0e0 --- /dev/null +++ b/asset_list/hubspot/config.py @@ -0,0 +1,28 @@ +from enum import IntEnum + + +class HubspotProcessStatus(IntEnum): + def __new__(cls, value, label): + obj = int.__new__(cls, value) + obj._value_ = value + obj.label = label + return obj + + # the numerical values of this enum aren't important, but they define the order of operations + + # This is the first stage, where a survey is ready to go + READY_TO_BE_SCHEDULED = 1, "READY TO BE SCHEDULED" + # The property didn't get access and needs sign off + SURVEYED_NO_ACCESS_NEEDS_SIGN_OFF = 2, "SURVEYED - NO ACCESS - NEED SIGN OFF" + # The survey has been completed. We don't have any update as to whether the property has been installed + SURVEYED_COMPLETED_SIGNED_OFF = 3, "SURVEYED - COMPLETED - SIGNED OFF" + # The property turned out to be ineligibile + NOT_VIABLE = 4, "NOT VIABLE" + # The property is with the installer. This will likely be the default for historic programmes + SUBMITTED_TO_INSTALLER = 5, "SUBMITTED TO INSTALLER" + # The property has been installed + INSTALL_COMPLETE = 6, "INSTALL COMPLETE" + # The install has complete and lodgement is complete + LODGEMENT_COMPLETE = 7, "LODGEMENT COMPLETE" + # The property has been cancelled + INSTALLER_CANCELLED_FINALIZED = 8, "INSTALLER CANCELLED - FINALIZED"