don't fetch from find my epc website when the property doesnt have an epc

This commit is contained in:
Khalim Conn-Kowlessar 2025-05-23 10:23:38 +01:00
parent 2e041bfe75
commit 1e0fbb111d
7 changed files with 180 additions and 42 deletions

2
.idea/Model.iml generated
View file

@ -7,7 +7,7 @@
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
</content>
<orderEntry type="jdk" jdkName="AssetList" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="Fastapi-backend" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

2
.idea/misc.xml generated
View file

@ -3,7 +3,7 @@
<component name="Black">
<option name="sdkName" value="Python 3.10 (backend)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="AssetList" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Fastapi-backend" project-jdk-type="Python SDK" />
<component name="PyCharmProfessionalAdvertiser">
<option name="shown" value="true" />
</component>

View file

@ -279,6 +279,7 @@ class AssetList:
STANDARD_HEATING_SYSTEM = "landlord_heating_system"
STANDARD_EXISTING_PV = "landlord_existing_pv"
STANDARD_SAP = "landlord_sap_rating"
STANDARD_BLOCK_REFERENCE = "landlord_block_reference"
DOMNA_PROPERTY_ID = "domna_property_id"
@ -369,6 +370,7 @@ class AssetList:
landlord_heating_system=None,
landlord_existing_pv=None,
landlord_sap=None,
landlord_block_reference=None,
phase=False,
header=0
):
@ -382,7 +384,7 @@ class AssetList:
self.standardised_asset_list = self.raw_asset_list.copy()
# Will be used to store aggregated figures against the various work types
self.work_type_figures = {}
self.flat_data = None
self.block_analysis_df = None
self.duplicated_addresses = None
self.contact_details = None
self.contact_detail_fields = None
@ -425,6 +427,7 @@ class AssetList:
self.landlord_heating_system = landlord_heating_system
self.landlord_existing_pv = landlord_existing_pv
self.landlord_sap = landlord_sap
self.landlord_block_reference = landlord_block_reference
# parameters for cleaning
self.full_address_cols_to_concat = full_address_cols_to_concat
@ -671,6 +674,7 @@ class AssetList:
self.landlord_heating_system,
self.landlord_existing_pv,
self.landlord_sap,
self.landlord_block_reference,
]
# Keep just non-null variables (e.g landlord may not provide uprn
self.keep_variables = [v for v in variables if v is not None]
@ -688,6 +692,7 @@ class AssetList:
self.landlord_heating_system: self.STANDARD_HEATING_SYSTEM,
self.landlord_existing_pv: self.STANDARD_EXISTING_PV,
self.landlord_sap: self.STANDARD_SAP,
self.landlord_block_reference: self.STANDARD_BLOCK_REFERENCE
}
self.rename_map = {k: v for k, v in self.rename_map.items() if k is not None}
@ -1905,7 +1910,7 @@ class AssetList:
for col in ["cavity_reason", "solar_reason"]:
self.standardised_asset_list[col] = np.where(
(
(~pd.isnull(self.standardised_asset_list["ecosurv_status"]))
(~pd.isnull(self.standardised_asset_list["ecosurv_install_status"]))
),
None,
self.standardised_asset_list[col]
@ -1978,42 +1983,42 @@ class AssetList:
get_max_status_from_columns, axis=1
)
def flat_analysis(self):
def block_analysis(self):
# We need to deduce the building name - we strip out the house number
if self.landlord_block_reference is None:
# This information is not available
return
# We want to deduce if flats have 50% of the properties below C75
# We group by postcode and property type
grouped = self.standardised_asset_list.groupby(
[self.STANDARD_POSTCODE, self.STANDARD_PROPERTY_TYPE]
)
# Reverse mapping: label -> enum
LABEL_TO_ENUM = {e.label: e for e in hubspot_config.HubspotProcessStatus}
flat_data = []
for _, group in grouped:
if "flat" in group[self.STANDARD_PROPERTY_TYPE].values:
num_flats = group[self.STANDARD_PROPERTY_TYPE].shape[0]
num_below_c75 = group[
self.EPC_API_DATA_NAMES["current-energy-efficiency"]
].lt(self.FILLED_CAVITY_SAP_THRESHOLD).sum()
# Check if any flats are below C69
num_flats_below_c69 = group[
self.EPC_API_DATA_NAMES["current-energy-efficiency"]
].lt(69).sum()
# Threshold status - anythign that is at this stage or beyond is considered surveyed
threshold = hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.value
flat_data.append(
{
"Postcode": group[self.STANDARD_POSTCODE].iloc[0],
"Property Type": "Flat",
"Number of Flats with EPC": num_flats,
"Number of Flats below C75": num_below_c75,
"Proportion of Flat EPCs below C75": round(100 * num_below_c75 / num_flats),
"Number of Flats Below C69": num_flats_below_c69,
}
)
block_analysis = []
for block_reference, group in self.standardised_asset_list.groupby(self.STANDARD_BLOCK_REFERENCE):
cavity_breakdown = group["cavity_reason"].fillna("No Eligibility").value_counts(normalize=True) * 100
flat_data = pd.DataFrame(flat_data)
if all(cavity_breakdown.index == "No Eligibility"):
continue
self.flat_data = flat_data
works = group["hubspot_status"]
above_threshold = works.map(LABEL_TO_ENUM.get).dropna()
count_above = (above_threshold >= threshold).sum()
proportion = count_above / len(works)
block_analysis.append(
{
"Block Reference": block_reference,
"Proportion of properties suryeyed": proportion,
**cavity_breakdown.to_dict(),
}
)
block_analysis = pd.DataFrame(block_analysis)
block_analysis = block_analysis.fillna(0)
self.block_analysis_df = block_analysis
@staticmethod
def split_full_name(x):
@ -2403,14 +2408,15 @@ class AssetList:
self.ecosurv[["Reference", "Status", "Lead Status", "Tags"]].rename(
columns={
"Reference": "ecosurv_reference",
"status": "ecosurv_status",
"Status": "ecosurv_status",
"Lead Status": "ecosurv_lead_status",
"Tags": "ecosurv_tags"
"Tags": "ecosurv_tags",
"Installer": "ecosurv_installer"
}
), how="left", on="ecosurv_reference"
)
matched["ecosurv_install_status"] = None
matched["ecosurv_install_status"] = hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER
# This mapping is ordered by process order, where lodgment is the final step so if we have an indication
# that the property is ready for lodgement, we set the status to that. We then proceed through the other
@ -2772,6 +2778,7 @@ class AssetList:
)
measure_mix_col = "MEASURE COMBO"
installer_notes_col = "INSTALLERS NOTES ; REASONS FOR CANCELLATIONS"
installer_col = "INSTALLER"
logger.info("Matching master data to asset list")
matched = []
@ -2912,7 +2919,7 @@ class AssetList:
matched = pd.DataFrame(matched)
master_to_append = master_data[
[scheme_col, "row_id", install_col, submission_col, measure_mix_col, installer_notes_col]
[scheme_col, "row_id", install_col, submission_col, measure_mix_col, installer_notes_col, installer_col]
].merge(
matched, how="left", on="row_id"
).rename(
@ -2921,7 +2928,8 @@ class AssetList:
measure_mix_col: "measure_mix",
install_col: "survey_status",
submission_col: "submission_date",
installer_notes_col: "submission_installer_notes"
installer_notes_col: "submission_installer_notes",
installer_col: "submission_installer"
}
)
master_to_append["submission_cancelled"] = (

View file

@ -82,6 +82,7 @@ def app():
landlord_existing_pv = None
landlord_property_id = "thrive_property_id"
landlord_sap = "sap_rating_updated"
landlord_block_reference = "block_reference"
outcomes_filename = [
os.path.join(data_folder, "Thrive - Outcomes - April 24-March25 - Corrected.xlsx")
]
@ -119,6 +120,7 @@ def app():
landlord_existing_pv = "Low Carbon Technology (Solar PV)"
landlord_property_id = "UPRN"
landlord_sap = "SAP Score"
landlord_block_reference = None
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
@ -209,6 +211,7 @@ def app():
landlord_heating_system=landlord_heating_system,
landlord_existing_pv=landlord_existing_pv,
landlord_sap=landlord_sap,
landlord_block_reference=landlord_block_reference,
phase=phase
)
asset_list.init_standardise()
@ -479,7 +482,7 @@ def app():
# We now flag the status of the property
asset_list.label_property_status()
asset_list.flat_analysis()
asset_list.block_analysis()
asset_list.load_contact_details(
local_filepath=os.path.join(data_folder, "Full property list wth D&V report V look up 12.2.25.xlsx"),
@ -526,7 +529,8 @@ def app():
with pd.ExcelWriter(filename) as writer:
asset_list.standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False)
asset_list.flat_data.to_excel(writer, sheet_name="Flat Data", index=False)
if asset_list.block_analysis_df is not None:
asset_list.block_analysis_df.to_excel(writer, sheet_name="Block Analysis", index=False)
# If we have outcomes, we add a tab with the outcomes
if not asset_list.outcomes_for_output.empty:
asset_list.outcomes_for_output.to_excel(writer, sheet_name="Outcomes", index=False)

View file

@ -0,0 +1,18 @@
import pandas as pd
def app():
"""
TODO: Operations may have removed some cavity_reason/solar_reason values from the standardised asset list after
review. So, we will need to update the hubspot status for these entries and set them to None, if they
were previously being set to ready for scheduling. We don't want to just filter on rows where
cavity_reason and solar_reason are populated, as if we want to include historical surveys, this will remove
them
:return:
"""
filepath = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive "
"Programme - reconciled.xlsx")
standardised_asset_list = pd.read_excel(filepath, sheet_name="Standardised Asset List")

View file

@ -507,7 +507,7 @@ async def model_engine(body: PlanTriggerRequest):
)
# if we have a remote assment data type, we pull the additional data and include it
if body.event_type == "remote_assessment":
if (body.event_type == "remote_assessment") and not (epc_searcher.newest_epc["estimated"]):
logger.info("Retrieving find my epc data")
try:
property_non_invasive_recommendations, patch = RetrieveFindMyEpc.get_from_epc(

View file

@ -0,0 +1,108 @@
"""
THis script will take the standardised asset list and append on the project codes.
We also, review the existing install status, in case anything is wrong
"""
import pandas as pd
import numpy as np
standardised_asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
"Complete - Updated May 2025 - Standardised.xlsx",
sheet_name="Standardised Asset List",
)
project_code_allocations = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Master Tracker (Thrive - "
"Warmfront).xlsx",
sheet_name="Master Tracker",
header=1
)
programme_codes = project_code_allocations[
["UPRN", "Proposed Progamme", "New Proposed Programme", "Warmfront Survey Notes", ]
].copy()
programme_codes["programme_reference"] = programme_codes["New Proposed Programme"].copy()
programme_codes["programme_reference"] = np.where(
pd.isnull(programme_codes["programme_reference"]),
programme_codes["Proposed Progamme"],
programme_codes["programme_reference"]
)
PROJECT_CODE_MAP = {
'Phase 2': "THRIVE-002",
'Phase 3': "THRIVE-003",
'Phase 4': "THRIVE-004",
'Phase 5': "THRIVE-005",
'Phase 6': "THRIVE-006",
'Phase 7': "THRIVE-007",
'Phase 8': "THRIVE-008",
'Phase 9': "THRIVE-009",
'Phase 10': "THRIVE-010",
"Week1": "THRIVE-WEEK-001",
"Week2": "THRIVE-WEEK-002",
"Week4": "THRIVE-WEEK-004",
"Week7": "THRIVE-WEEK-007",
}
programme_codes["project_code"] = programme_codes["programme_reference"].map(PROJECT_CODE_MAP)
thrive_notes = project_code_allocations[["UPRN", "Thrive Notes", "Priority", "Notes Reply (Thrive)"]].copy()
standardised_asset_list = standardised_asset_list.merge(
programme_codes[["UPRN", "project_code", "programme_reference"]],
how="left",
left_on="landlord_property_id",
right_on="UPRN",
).merge(
thrive_notes[["UPRN", "Thrive Notes", "Priority", "Notes Reply (Thrive)"]],
how="left",
on="UPRN",
)
standardised_asset_list = standardised_asset_list.drop(columns=["UPRN"])
# We fill the project code for historical completions
standardised_asset_list["project_code"] = np.where(
pd.isnull(standardised_asset_list["project_code"]) & (
standardised_asset_list["hubspot_status"] != "READY TO BE SCHEDULED"
) & (
~pd.isnull(standardised_asset_list["hubspot_status"])
),
"THRIVE-HISTORICAL",
standardised_asset_list["project_code"]
)
# Store as an excel
filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Programme - "
"reconciled.xlsx")
# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
# Other tabs:
block_analysis = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
"Complete - Updated May 2025 - Standardised.xlsx",
sheet_name="Block Analysis",
)
outcomes = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
"Complete - Updated May 2025 - Standardised.xlsx",
sheet_name="Outcomes",
)
unmatched_submissions = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
"Complete - Updated May 2025 - Standardised.xlsx",
sheet_name="Unmatched Submissions",
)
unmatched_ecosurv = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
"Complete - Updated May 2025 - Standardised.xlsx",
sheet_name="Unmatched Ecosurv",
)
with pd.ExcelWriter(filename) as writer:
standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False)
block_analysis.to_excel(writer, sheet_name="Block Analysis", index=False)
# If we have outcomes, we add a tab with the outcomes
outcomes.to_excel(writer, sheet_name="Outcomes", index=False)
unmatched_submissions.to_excel(writer, sheet_name="Unmatched Submissions", index=False)
unmatched_ecosurv.to_excel(writer, sheet_name="Unmatched Ecosurv", index=False)