diff --git a/.idea/Model.iml b/.idea/Model.iml index 09f2e496..c6561970 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index fb10c6b0..50cad4ca 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index e68ee6dd..fea0f59e 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -279,6 +279,7 @@ class AssetList: STANDARD_HEATING_SYSTEM = "landlord_heating_system" STANDARD_EXISTING_PV = "landlord_existing_pv" STANDARD_SAP = "landlord_sap_rating" + STANDARD_BLOCK_REFERENCE = "landlord_block_reference" DOMNA_PROPERTY_ID = "domna_property_id" @@ -369,6 +370,7 @@ class AssetList: landlord_heating_system=None, landlord_existing_pv=None, landlord_sap=None, + landlord_block_reference=None, phase=False, header=0 ): @@ -382,7 +384,7 @@ class AssetList: self.standardised_asset_list = self.raw_asset_list.copy() # Will be used to store aggregated figures against the various work types self.work_type_figures = {} - self.flat_data = None + self.block_analysis_df = None self.duplicated_addresses = None self.contact_details = None self.contact_detail_fields = None @@ -425,6 +427,7 @@ class AssetList: self.landlord_heating_system = landlord_heating_system self.landlord_existing_pv = landlord_existing_pv self.landlord_sap = landlord_sap + self.landlord_block_reference = landlord_block_reference # parameters for cleaning self.full_address_cols_to_concat = full_address_cols_to_concat @@ -671,6 +674,7 @@ class AssetList: self.landlord_heating_system, self.landlord_existing_pv, self.landlord_sap, + self.landlord_block_reference, ] # Keep just non-null variables (e.g landlord may not provide uprn self.keep_variables = [v for v in variables if v is not None] @@ -688,6 +692,7 @@ class AssetList: self.landlord_heating_system: self.STANDARD_HEATING_SYSTEM, self.landlord_existing_pv: self.STANDARD_EXISTING_PV, self.landlord_sap: self.STANDARD_SAP, + self.landlord_block_reference: self.STANDARD_BLOCK_REFERENCE } self.rename_map = {k: v for k, v in self.rename_map.items() if k is not None} @@ -1905,7 +1910,7 @@ class AssetList: for col in ["cavity_reason", "solar_reason"]: self.standardised_asset_list[col] = np.where( ( - (~pd.isnull(self.standardised_asset_list["ecosurv_status"])) + (~pd.isnull(self.standardised_asset_list["ecosurv_install_status"])) ), None, self.standardised_asset_list[col] @@ -1978,42 +1983,42 @@ class AssetList: get_max_status_from_columns, axis=1 ) - def flat_analysis(self): + def block_analysis(self): - # We need to deduce the building name - we strip out the house number + if self.landlord_block_reference is None: + # This information is not available + return - # We want to deduce if flats have 50% of the properties below C75 - # We group by postcode and property type - grouped = self.standardised_asset_list.groupby( - [self.STANDARD_POSTCODE, self.STANDARD_PROPERTY_TYPE] - ) + # Reverse mapping: label -> enum + LABEL_TO_ENUM = {e.label: e for e in hubspot_config.HubspotProcessStatus} - flat_data = [] - for _, group in grouped: - if "flat" in group[self.STANDARD_PROPERTY_TYPE].values: - num_flats = group[self.STANDARD_PROPERTY_TYPE].shape[0] - num_below_c75 = group[ - self.EPC_API_DATA_NAMES["current-energy-efficiency"] - ].lt(self.FILLED_CAVITY_SAP_THRESHOLD).sum() - # Check if any flats are below C69 - num_flats_below_c69 = group[ - self.EPC_API_DATA_NAMES["current-energy-efficiency"] - ].lt(69).sum() + # Threshold status - anythign that is at this stage or beyond is considered surveyed + threshold = hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.value - flat_data.append( - { - "Postcode": group[self.STANDARD_POSTCODE].iloc[0], - "Property Type": "Flat", - "Number of Flats with EPC": num_flats, - "Number of Flats below C75": num_below_c75, - "Proportion of Flat EPCs below C75": round(100 * num_below_c75 / num_flats), - "Number of Flats Below C69": num_flats_below_c69, - } - ) + block_analysis = [] + for block_reference, group in self.standardised_asset_list.groupby(self.STANDARD_BLOCK_REFERENCE): + cavity_breakdown = group["cavity_reason"].fillna("No Eligibility").value_counts(normalize=True) * 100 - flat_data = pd.DataFrame(flat_data) + if all(cavity_breakdown.index == "No Eligibility"): + continue - self.flat_data = flat_data + works = group["hubspot_status"] + above_threshold = works.map(LABEL_TO_ENUM.get).dropna() + count_above = (above_threshold >= threshold).sum() + proportion = count_above / len(works) + + block_analysis.append( + { + "Block Reference": block_reference, + "Proportion of properties suryeyed": proportion, + **cavity_breakdown.to_dict(), + } + ) + + block_analysis = pd.DataFrame(block_analysis) + block_analysis = block_analysis.fillna(0) + + self.block_analysis_df = block_analysis @staticmethod def split_full_name(x): @@ -2403,14 +2408,15 @@ class AssetList: self.ecosurv[["Reference", "Status", "Lead Status", "Tags"]].rename( columns={ "Reference": "ecosurv_reference", - "status": "ecosurv_status", + "Status": "ecosurv_status", "Lead Status": "ecosurv_lead_status", - "Tags": "ecosurv_tags" + "Tags": "ecosurv_tags", + "Installer": "ecosurv_installer" } ), how="left", on="ecosurv_reference" ) - matched["ecosurv_install_status"] = None + matched["ecosurv_install_status"] = hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER # This mapping is ordered by process order, where lodgment is the final step so if we have an indication # that the property is ready for lodgement, we set the status to that. We then proceed through the other @@ -2772,6 +2778,7 @@ class AssetList: ) measure_mix_col = "MEASURE COMBO" installer_notes_col = "INSTALLERS NOTES ; REASONS FOR CANCELLATIONS" + installer_col = "INSTALLER" logger.info("Matching master data to asset list") matched = [] @@ -2912,7 +2919,7 @@ class AssetList: matched = pd.DataFrame(matched) master_to_append = master_data[ - [scheme_col, "row_id", install_col, submission_col, measure_mix_col, installer_notes_col] + [scheme_col, "row_id", install_col, submission_col, measure_mix_col, installer_notes_col, installer_col] ].merge( matched, how="left", on="row_id" ).rename( @@ -2921,7 +2928,8 @@ class AssetList: measure_mix_col: "measure_mix", install_col: "survey_status", submission_col: "submission_date", - installer_notes_col: "submission_installer_notes" + installer_notes_col: "submission_installer_notes", + installer_col: "submission_installer" } ) master_to_append["submission_cancelled"] = ( diff --git a/asset_list/app.py b/asset_list/app.py index 31c404e5..41623880 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -82,6 +82,7 @@ def app(): landlord_existing_pv = None landlord_property_id = "thrive_property_id" landlord_sap = "sap_rating_updated" + landlord_block_reference = "block_reference" outcomes_filename = [ os.path.join(data_folder, "Thrive - Outcomes - April 24-March25 - Corrected.xlsx") ] @@ -119,6 +120,7 @@ def app(): landlord_existing_pv = "Low Carbon Technology (Solar PV)" landlord_property_id = "UPRN" landlord_sap = "SAP Score" + landlord_block_reference = None outcomes_filename = None outcomes_sheetname = None outcomes_postcode = None @@ -209,6 +211,7 @@ def app(): landlord_heating_system=landlord_heating_system, landlord_existing_pv=landlord_existing_pv, landlord_sap=landlord_sap, + landlord_block_reference=landlord_block_reference, phase=phase ) asset_list.init_standardise() @@ -479,7 +482,7 @@ def app(): # We now flag the status of the property asset_list.label_property_status() - asset_list.flat_analysis() + asset_list.block_analysis() asset_list.load_contact_details( local_filepath=os.path.join(data_folder, "Full property list wth D&V report V look up 12.2.25.xlsx"), @@ -526,7 +529,8 @@ def app(): with pd.ExcelWriter(filename) as writer: asset_list.standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False) - asset_list.flat_data.to_excel(writer, sheet_name="Flat Data", index=False) + if asset_list.block_analysis_df is not None: + asset_list.block_analysis_df.to_excel(writer, sheet_name="Block Analysis", index=False) # If we have outcomes, we add a tab with the outcomes if not asset_list.outcomes_for_output.empty: asset_list.outcomes_for_output.to_excel(writer, sheet_name="Outcomes", index=False) diff --git a/asset_list/hubspot/prepare_for_hubspot.py b/asset_list/hubspot/prepare_for_hubspot.py new file mode 100644 index 00000000..302d2673 --- /dev/null +++ b/asset_list/hubspot/prepare_for_hubspot.py @@ -0,0 +1,18 @@ +import pandas as pd + + +def app(): + """ + TODO: Operations may have removed some cavity_reason/solar_reason values from the standardised asset list after + review. So, we will need to update the hubspot status for these entries and set them to None, if they + were previously being set to ready for scheduling. We don't want to just filter on rows where + cavity_reason and solar_reason are populated, as if we want to include historical surveys, this will remove + them + + :return: + """ + + filepath = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive " + "Programme - reconciled.xlsx") + + standardised_asset_list = pd.read_excel(filepath, sheet_name="Standardised Asset List") diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 58c3dc8e..5316fd03 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -507,7 +507,7 @@ async def model_engine(body: PlanTriggerRequest): ) # if we have a remote assment data type, we pull the additional data and include it - if body.event_type == "remote_assessment": + if (body.event_type == "remote_assessment") and not (epc_searcher.newest_epc["estimated"]): logger.info("Retrieving find my epc data") try: property_non_invasive_recommendations, patch = RetrieveFindMyEpc.get_from_epc( diff --git a/etl/customers/thrive/Project codes.py b/etl/customers/thrive/Project codes.py new file mode 100644 index 00000000..6235ebed --- /dev/null +++ b/etl/customers/thrive/Project codes.py @@ -0,0 +1,108 @@ +""" +THis script will take the standardised asset list and append on the project codes. +We also, review the existing install status, in case anything is wrong +""" +import pandas as pd +import numpy as np + +standardised_asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - " + "Complete - Updated May 2025 - Standardised.xlsx", + sheet_name="Standardised Asset List", +) + +project_code_allocations = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Master Tracker (Thrive - " + "Warmfront).xlsx", + sheet_name="Master Tracker", + header=1 +) + +programme_codes = project_code_allocations[ + ["UPRN", "Proposed Progamme", "New Proposed Programme", "Warmfront Survey Notes", ] +].copy() +programme_codes["programme_reference"] = programme_codes["New Proposed Programme"].copy() +programme_codes["programme_reference"] = np.where( + pd.isnull(programme_codes["programme_reference"]), + programme_codes["Proposed Progamme"], + programme_codes["programme_reference"] +) + +PROJECT_CODE_MAP = { + 'Phase 2': "THRIVE-002", + 'Phase 3': "THRIVE-003", + 'Phase 4': "THRIVE-004", + 'Phase 5': "THRIVE-005", + 'Phase 6': "THRIVE-006", + 'Phase 7': "THRIVE-007", + 'Phase 8': "THRIVE-008", + 'Phase 9': "THRIVE-009", + 'Phase 10': "THRIVE-010", + "Week1": "THRIVE-WEEK-001", + "Week2": "THRIVE-WEEK-002", + "Week4": "THRIVE-WEEK-004", + "Week7": "THRIVE-WEEK-007", +} +programme_codes["project_code"] = programme_codes["programme_reference"].map(PROJECT_CODE_MAP) + +thrive_notes = project_code_allocations[["UPRN", "Thrive Notes", "Priority", "Notes Reply (Thrive)"]].copy() + +standardised_asset_list = standardised_asset_list.merge( + programme_codes[["UPRN", "project_code", "programme_reference"]], + how="left", + left_on="landlord_property_id", + right_on="UPRN", +).merge( + thrive_notes[["UPRN", "Thrive Notes", "Priority", "Notes Reply (Thrive)"]], + how="left", + on="UPRN", +) + +standardised_asset_list = standardised_asset_list.drop(columns=["UPRN"]) + +# We fill the project code for historical completions +standardised_asset_list["project_code"] = np.where( + pd.isnull(standardised_asset_list["project_code"]) & ( + standardised_asset_list["hubspot_status"] != "READY TO BE SCHEDULED" + ) & ( + ~pd.isnull(standardised_asset_list["hubspot_status"]) + ), + "THRIVE-HISTORICAL", + standardised_asset_list["project_code"] +) + +# Store as an excel +filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Programme - " + "reconciled.xlsx") +# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data +# Other tabs: +block_analysis = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - " + "Complete - Updated May 2025 - Standardised.xlsx", + sheet_name="Block Analysis", +) +outcomes = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - " + "Complete - Updated May 2025 - Standardised.xlsx", + sheet_name="Outcomes", +) +unmatched_submissions = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - " + "Complete - Updated May 2025 - Standardised.xlsx", + sheet_name="Unmatched Submissions", +) +unmatched_ecosurv = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - " + "Complete - Updated May 2025 - Standardised.xlsx", + sheet_name="Unmatched Ecosurv", +) + +with pd.ExcelWriter(filename) as writer: + standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False) + block_analysis.to_excel(writer, sheet_name="Block Analysis", index=False) + # If we have outcomes, we add a tab with the outcomes + outcomes.to_excel(writer, sheet_name="Outcomes", index=False) + + unmatched_submissions.to_excel(writer, sheet_name="Unmatched Submissions", index=False) + + unmatched_ecosurv.to_excel(writer, sheet_name="Unmatched Ecosurv", index=False)