debugging calico epc search to handle the strict blocks

This commit is contained in:
Khalim Conn-Kowlessar 2025-06-19 22:48:25 +01:00
parent 383a4852e2
commit 980f439f49
7 changed files with 236 additions and 56 deletions

View file

@ -298,7 +298,7 @@ class AssetList:
"Has the property been re-walled?", "Is the property tile hung?", "Does the property have a render?",
"Does the property have cladding?", "Gable Wall Obstructions",
"Does the property have foliage that needs removal?",
"Potential unsafe environment", "Date of Inspection"
"Potential unsafe environment", "Date of Inspection", "Borescoped?"
]
NON_INTRUSIVES_ELIGIBILITY_COLUMN = "Eligibility (Red/Yellow/Green)"
@ -354,6 +354,7 @@ class AssetList:
# Work type prefixes:
# Empties
EMPTY_CAVITY_NON_INTRUSIVE = "Non-Intrusive Data Shows Empty Cavity"
EMPTY_CAVITY_NON_INTRUSIVE_YEAR = 'Non-Intrusive Data Shows Empty Cavity, built after 2002'
EPC_EMPTY_INSPECTIONS_RETRO_DRILLED = "EPC Shows Empty Cavity, inspections show retro drilled"
EPC_EMPTY_INSPECTIONS_FILLED = "EPC Shows Empty Cavity, inspections show filled or other"
EPC_EMPTY_INSPECTIONS_FILLED_AT_BUILD = "EPC Shows Empty Cavity, inspections show filled at build"
@ -1280,7 +1281,8 @@ class AssetList:
)
self.standardised_asset_list["SAP Category"] = np.where(
pd.isnull(self.standardised_asset_list[self.STANDARD_SAP]),
pd.isnull(self.standardised_asset_list[self.STANDARD_SAP]) &
pd.isnull(self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]]),
"SAP Unknown",
self.standardised_asset_list["SAP Category"]
)
@ -1745,8 +1747,6 @@ class AssetList:
self.standardised_asset_list["solar_epc_loft_needs_topup"]
)
z = self.standardised_asset_list[self.standardised_asset_list["landlord_property_id"] == "DW150120029"]
self.standardised_asset_list["solar_eligible"] = (
# Property isn't a flag
not_a_flat &
@ -2035,14 +2035,15 @@ class AssetList:
self.standardised_asset_list["cavity_reason"] = np.where(
(self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "flat") &
(self.standardised_asset_list["SAP Category"] == "SAP Rating 76 or more"),
None,
self.standardised_asset_list["cavity_reason"] + " - (unlikely to quality)",
self.standardised_asset_list["cavity_reason"]
)
# Split cavity_reason on the colon and check if the first part is equal to one of the two options above
# that indicates empties
self.standardised_asset_list["identified_empty_cavity"] = (
self.standardised_asset_list["cavity_reason"].str.split(":").str[0].isin(
[self.EMPTY_CAVITY_NON_INTRUSIVE, self.EPC_EMPTY]
[self.EMPTY_CAVITY_NON_INTRUSIVE, self.EMPTY_CAVITY_NON_INTRUSIVE_YEAR, self.EPC_EMPTY]
)
)
@ -2078,6 +2079,7 @@ class AssetList:
NUM_RE = re.compile(r'\b\d+[A-Za-z]?\b') # captures 12, 12A, etc.
expanded_rows = []
for _, row in blocks.iterrows():
addr = str(row[self.STANDARD_ADDRESS_1])
@ -2194,16 +2196,9 @@ class AssetList:
# if we have any blocks, where work is eligible, we flag them now
# These blocks may be refecence via the landlord_block_reference field, or by property types being
# blocks of flats
has_landlord_block_reference = self.landlord_block_reference is not None
has_landlord_block_reference = sum(~pd.isnull(self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE]))
if has_landlord_block_reference:
# # If we blocks of flats, without a landlord block reference, we create this
# self.fill_landlord_block_reference(has_blocks_of_flats)
#
# # If we have blocks of flats, we split these out into individual units
# self.split_blocks()
# For blocks that have a 50% allocation, we create project codes
self.block_analysis()
# find any block refs with more than 50% emptires
@ -2265,13 +2260,18 @@ class AssetList:
block_analysis = []
for block_reference, group in self.standardised_asset_list.groupby(self.STANDARD_BLOCK_REFERENCE):
cavity_breakdown = group["cavity_reason"].fillna("No Eligibility").value_counts(normalize=True) * 100
if all(cavity_breakdown.index == "No Eligibility"):
continue
# We check the % of empty vs not empty as right now, we're focused on empty
n_empties = ((group["identified_empty_cavity"] == True) & (~pd.isnull(group["cavity_reason"]))).sum()
n_empties = (
(group["identified_empty_cavity"] == True) &
(~pd.isnull(group["cavity_reason"])) &
(~group["cavity_reason"].str.contains("(unlikely to quality)", case=False, na=False, regex=False))
).sum()
works = group["hubspot_status"]
above_threshold = works.map(LABEL_TO_ENUM.get).dropna()
@ -2293,6 +2293,36 @@ class AssetList:
block_analysis = block_analysis.fillna(0)
# We flag which properties are eligible for works. We need at least 50%
block_analysis["Eligible for Works"] = (
block_analysis["Percentage of Empties"] >= 0.50
)
block_analysis = block_analysis.sort_values("Percentage of Empties", ascending=False)
# For properties that are NOT eligible, we should update the cavity reason
ineligible_blocks = block_analysis[
~block_analysis["Eligible for Works"]
]["Block Reference"].values
eligible_blocks = block_analysis[
block_analysis["Eligible for Works"]
]["Block Reference"].values
self.standardised_asset_list["cavity_reason"] = np.where(
self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE].isin(ineligible_blocks),
self.standardised_asset_list["cavity_reason"] + " (Flat in block with less than 50% eligible)",
self.standardised_asset_list["cavity_reason"]
)
# if the property is in a block of flats that eligible, but the property itself is not eligible, we flag this
# The criteria is:
# =The property should be in a block of flats
self.standardised_asset_list["cavity_reason"] = np.where(
self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE].isin(eligible_blocks),
self.standardised_asset_list["cavity_reason"]
+ " " + "(Flat in block with more than 50% eligible, but not eligible itself)",
self.standardised_asset_list["cavity_reason"]
)
self.block_analysis_df = block_analysis
@ -2434,13 +2464,13 @@ class AssetList:
)
# Format the two date columns
programme_data["survey_week"] = pd.to_datetime(programme_data["survey_week"], errors="coerce")
programme_data["survey_date"] = pd.to_datetime(programme_data["survey_date"], errors="coerce")
programme_data[self.EPC_API_DATA_NAMES["inspection-date"]] = pd.to_datetime(
programme_data[self.EPC_API_DATA_NAMES["inspection-date"]],
errors="coerce"
)
# Convert to dd/mm/yyyy format
programme_data["survey_week"] = programme_data["survey_week"].dt.strftime("%d/%m/%Y")
programme_data["survey_date"] = programme_data["survey_date"].dt.strftime("%d/%m/%Y")
programme_data[self.EPC_API_DATA_NAMES["inspection-date"]] = (
programme_data[self.EPC_API_DATA_NAMES["inspection-date"]].dt.strftime("%d/%m/%Y")
)
@ -2457,12 +2487,14 @@ class AssetList:
ready_to_be_scheduled = (
(
programme_data["hubspot_status"] == hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label
) & (~pd.isnull(programme_data["survey_week"]) & ~pd.isnull(programme_data["surveyor"]))
) & (~pd.isnull(programme_data["survey_date"]))
)
completed_works = (
programme_data["hubspot_status"] != hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label
)
programme_data = programme_data[ready_to_be_scheduled | completed_works]
# completed_works = (
# (programme_data["hubspot_status"] !=
# hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label) &
# (~pd.isnull(programme_data["hubspot_status"]))
# )
programme_data = programme_data[ready_to_be_scheduled]
# Merge on the contact details
programme_data = programme_data.merge(
@ -2505,11 +2537,13 @@ class AssetList:
self.CRM_HISTORICAL_CAVITY_PRODUCT["name"]
)
else:
# We shouldn't have any missing products
programme_data = programme_data[
~pd.isnull(programme_data["domna_product"]) &
~pd.isnull(programme_data["surveyor"]) &
~pd.isnull(programme_data["survey_week"])
]
~pd.isnull(programme_data["survey_date"])
]
if pd.isnull(programme_data["domna_product"]).sum():
raise ValueError("Missing products")
programme_data = programme_data.drop(columns=["solar_product", "cavity_product"])
product_df = (
@ -2686,7 +2720,7 @@ class AssetList:
'Last EPC: Room Height <LISTING last_epc__room_height>': self.EPC_API_DATA_NAMES["floor-height"],
'Last EPC: Age Band <LISTING last_epc__age_band>': self.EPC_API_DATA_NAMES["construction-age-band"],
'Pipeline <DEAL pipeline>': 'Pipeline <DEAL pipeline>',
'Expected Commencement Date <DEAL expected_commencement_date>': "survey_week",
'Expected Commencement Date <DEAL expected_commencement_date>': "survey_date",
'Deal Name <DEAL dealname>': "dealname", # Need to create this,
'Product ID <LINE_ITEM hs_product_id>': 'Product ID <LINE_ITEM hs_product_id>',
'Name <LINE_ITEM name>': 'Name <LINE_ITEM name>',
@ -2724,7 +2758,11 @@ class AssetList:
# The listing owner email is the same as the surveyor email (deal owner), so they can see the listing
programme_data['Listing Owner Email <LISTING hubspot_owner_id>'] = programme_data['Deal Owner']
programme_data['Amount <DEAL amount>'] = 0
programme_data["Deal Owner"] = programme_data["Deal Owner"].str.lower()
programme_data["Deal Owner"] = np.where(
~pd.isnull(programme_data["Deal Owner"]),
programme_data["Deal Owner"].astype(str).str.lower(),
programme_data["Deal Owner"]
)
# We make sure we have all of the columns that we need
missed_columns = [c for c in hubspot_config.CRM_UPLOAD_COLUMNS if c not in programme_data.columns]

View file

@ -553,13 +553,9 @@ def app():
)
asset_list.merge_data(epc_df)
asset_list.extract_attributes()
asset_list.identify_worktypes()
pprint(asset_list.work_type_figures)
# We now flag the status of the property
asset_list.label_property_status()
asset_list.analyse_geographies()

View file

@ -19,19 +19,19 @@ def app():
# inputs:
reconcile_programme = False # If True, the hubspot upload will include all properties with a project code
customer_domain = "https://sandwell.gov.uk"
customer_domain = "https://livewest.co.uk"
installer_name = "J & J CRUMP"
asset_list_filepath = (
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/Sandwell BC - Full Asset List MAIN - "
"Standardised.xlsx"
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Hubspot/Livewest South-West - Standardised "
"V2.xlsx"
)
asset_list_sheet_name = "Proposed Program"
asset_list_header = 1
asset_list_sheet_name = "Standardised Asset List"
asset_list_header = 0
contact_details_filepath = (
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/Sandwell Contact Details.xlsx"
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Hubspot/23.06 Livewest Contacts.xlsx"
)
contacts_sheet_name = "Sheet1"
contacts_sheet_name = "Contact Information"
contacts_landlord_property_id = "landlord_property_id"
contacts_phone_number_column = "phone_number"
contacts_secondary_phone_number_column = "secondary_phone_number"

View file

@ -79,7 +79,13 @@ def get_data(
uprn=uprn
)
# Force the skipping of estimating the EPC
searcher.ordnance_survey_client.property_type = None
# We check if the property was split
if home["is_expended_block"]:
searcher.ordnance_survey_client.property_type = "Flat"
searcher.property_type = "Flat"
searcher.set_strict_property_type_search()
else:
searcher.ordnance_survey_client.property_type = None
searcher.ordnance_survey_client.built_form = None
searcher.find_property(skip_os=True)

View file

@ -5,7 +5,7 @@ from typing import List
from backend.app.plan.schemas import HousingType
class Funding:
class FundingOld:
"""
Given a property, this class identifies if the home is possibly eligible for funding under
the various funding schemes. It will also calculate the expected amount of funding available
@ -413,13 +413,32 @@ class Funding:
self.whlg()
class Funding2:
class Funding:
"""
New class to handle funding calculation
"""
def __init__(self, tenure: HousingType):
def __init__(
self,
tenure: HousingType,
social_cavity_abs_rate: float,
social_solid_abs_rate: float,
private_cavity_abs_rate: float,
private_solid_abs_rate: float,
project_scores_matrix,
whlg_eligible_postcodes
):
self.tenure = tenure
self.social_cavity_abs_rate = social_cavity_abs_rate
self.social_solid_abs_rate = social_solid_abs_rate
self.private_cavity_abs_rate = private_cavity_abs_rate
self.private_solid_abs_rate = private_solid_abs_rate
self.starting_sap_band = None
self.ending_sap_band = None
self.floor_area_band = None
self.project_scores_matrix = project_scores_matrix
self.whlg_eligible_postcodes = whlg_eligible_postcodes
@staticmethod
def get_sap_band(sap_score_number):
@ -446,8 +465,22 @@ class Funding2:
return None
@staticmethod
def get_floor_area_band(floor_area):
if floor_area <= 72:
return "0-72"
if floor_area <= 97:
return "73-97"
if floor_area <= 199:
return "98-199"
return "200"
@staticmethod
def eco4_prs_eligibility(
self, starting_sap: int, measures: List, mainheat_description: str, heating_control_description: str
starting_sap: int, measures: List, mainheat_description: str, heating_control_description: str
):
"""
Handles the eligibility criteria for private rental properties under eco
@ -481,31 +514,53 @@ class Funding2:
return False
def calculate_full_project_abs(self):
# Filter the project scores matrix
data = self.project_scores_matrix[
(self.project_scores_matrix["Floor Area Segment"] == self.floor_area_band) &
(self.project_scores_matrix["Starting Band"] == self.starting_sap_band) &
(self.project_scores_matrix["Finishing Band"] == self.ending_sap_band)
]
if data.emtpy:
raise ValueError("Missing abs rate, check the project scores matrix")
return data["Cost Savings"].values[0]
def check_funding(
self, measures: List,
starting_sap: int,
ending_sap: int,
floor_area: float,
mainheat_description: str,
heating_control_description: str
heating_control_description: str,
is_cavity: bool
):
"""
Given a list of measures, this function will check if the package of measures is fundable
:param measures:
:param starting_sap:
:param ending_sap:
:param floor_area:
:param mainheat_description:
:param heating_control_description:
:param is_cavity: Indicates if the property has cavity wall insulation
:return:
"""
starting_band = self.get_sap_band(starting_sap)
ending_band = self.get_sap_band(ending_sap)
# If it's an E or D, should get to an EPC C
if starting_sap >= 55 and ending_sap < 69:
raise NotImplementedError("This property doesn't have sufficient SAP movement")
# For ECO4 eligibility, the property needs to end at a C if it starts at a D or E, otherwise should end at a
# D
if starting_band <= 38 & ending_band >= 55:
if starting_sap <= 38 & ending_sap <= 55:
# F or G should get to D
raise NotImplementedError("Implement F or G to D eligibility")
self.starting_sap_band = self.get_sap_band(starting_sap)
self.ending_sap_band = self.get_sap_band(ending_sap)
self.floor_area_band = self.get_floor_area_band(floor_area)
########################
# Private
########################
@ -513,13 +568,25 @@ class Funding2:
# 2) GBIS
if self.tenure == "Private":
is_eligible = self.eco4_prs_eligibility(
is_eco4_eligible = self.eco4_prs_eligibility(
starting_sap=starting_sap,
measures=measures,
mainheat_description=mainheat_description,
heating_control_description=heating_control_description
)
pass
# Need to implement
# 1) Package has to include an insulation measure
# 2) We should use the funding for the measure that has the largest partial project score
is_gbis_eligible = ()
if not is_eco4_eligible:
return
eco4_abs = self.calculate_full_project_abs()
# We estimate rates now
eco4_funding = (
eco4_abs * self.private_cavity_abs_rate if is_cavity else eco4_abs & self.private_solid_abs_rate
)
########################
# Social

View file

@ -160,6 +160,9 @@ class SearchEpc:
"""
Address lines 1 and postcode are mandatory fields. The other address lines are optional
but can be used to find the epc for the home, if address1 and postcode are insufficient
If you wish to run a strict property type search, please run set_strict_property_type_search()
:param address1: string, propery's address line 1
:param postcode: string, propery's postcode
:param full_address: string, optional parameter, the full address of the property
@ -189,6 +192,7 @@ class SearchEpc:
self.older_epcs = None
self.full_sap_epc = None
self.metadata = None
self.strict_property_type_search = False
# These are the address and postcode values, which we store in the database
self.address_clean = None
@ -199,6 +203,14 @@ class SearchEpc:
self.property_type = property_type
self.fast = fast
def set_strict_property_type_search(self):
"""
This method sets the strict property type search flag to True. When this flag is set, the search will
only return results that match the specified property type.
:return:
"""
self.strict_property_type_search = True
@staticmethod
def get_house_number(address: str, postcode=None) -> str | None:
"""
@ -315,6 +327,8 @@ class SearchEpc:
address_params["address"] = self.address1
if self.postcode:
address_params["postcode"] = self.postcode
if self.strict_property_type_search and self.property_type:
address_params["property-type"] = self.property_type.lower()
# We attempt the search with uprn params
@ -365,11 +379,16 @@ class SearchEpc:
unique_property_types = {r["property-type"] for r in rows}
is_just_a_house = (len(unique_property_types) == 1) & (
("House" in unique_property_types) | ("Bungalow" in unique_property_types)
)
# We allow for variation in property type across flats/maisonettes
# If we know that we have a flat/maisonette, we allow for both property types
if property_type in ["Flat", "Maisonette"]:
if ((len(uprns) == 1) and ((len(unique_property_types) == 1)
) or unique_property_types == {"Flat", "Maisonette"}):
# Make sure we have not JUST a house, or not JUST a flat/maisonette
if property_type in ["Flat", "Maisonette"] and not is_just_a_house:
if (((len(uprns) == 1) and ((len(unique_property_types) == 1)
) or unique_property_types == {"Flat", "Maisonette"})):
return rows
if property_type is not None:
@ -424,6 +443,8 @@ class SearchEpc:
return rows
raise ValueError("property type and address cannot both be None, at least one must be provided")
@staticmethod
def format_address(newest_epc):
"""

View file

@ -0,0 +1,52 @@
import pytest
import pandas as pd
from utils.s3 import read_csv_from_s3
from backend.Funding import Funding
def get_funding_data():
"""
This function retrieves the eco project scores matrix and the warm homes local grant funding data
:return:
"""
project_scores_matrix = read_csv_from_s3(
bucket_name="retrofit-data-dev",
filepath="funding/ECO4 Full Project Scores Matrix.csv",
)
project_scores_matrix = pd.DataFrame(project_scores_matrix)
project_scores_matrix.columns = ['Floor Area Segment', 'Starting Band', 'Finishing Band', 'Cost Savings']
project_scores_matrix["Cost Savings"] = project_scores_matrix["Cost Savings"].astype(float)
whlg_eligible_postcodes = read_csv_from_s3(
bucket_name="retrofit-data-dev",
filepath="funding/whlg eligible postcodes.csv",
)
whlg_eligible_postcodes = pd.DataFrame(whlg_eligible_postcodes)
return project_scores_matrix, whlg_eligible_postcodes
class TestFunding:
def test_prs(self):
eco_project_scores_matrix, whlg_eligible_postcodes = get_funding_data()
funding = Funding(
project_scores_matrix=eco_project_scores_matrix,
whlg_eligible_postcodes=whlg_eligible_postcodes,
social_cavity_abs_rate=13.5,
social_solid_abs_rate=17,
private_cavity_abs_rate=13.5,
private_solid_abs_rate=17,
tenure="Private",
)
measures_1 = ["internal_wall_insulation", "solar_pv"]
funding.check_funding(
measures=measures_1,
starting_sap=54,
ending_sap=69,
floor_area=73,
mainheat_description="Boiler and radiators, mains gas",
heating_control_description="Programmer, room thermostat and TRVs",
is_cavity=True
)