making epc searching more specific

This commit is contained in:
Khalim Conn-Kowlessar 2025-12-01 09:35:07 +00:00
parent f75c630dd1
commit 9e7ed1efd5
2 changed files with 48 additions and 9 deletions

View file

@ -447,11 +447,19 @@ class SearchEpc:
]
elif best_match1[1] > best_match2[1]:
# Get all of the scores
rows_filtered = [r for r in rows if ", ".join([r["address"], r["posttown"]]) == best_match1[0]]
# Get all of the scores - make sure we keep uprn
rows_filtered = [
r for r in rows if
(
(", ".join([r["address"], r["posttown"]]) == best_match1[0]) or
(str(r["uprn"]) == str(self.uprn))
)
]
else:
# Get all of the scores
rows_filtered = [r for r in rows if r["address"] == best_match2[0]]
rows_filtered = [
r for r in rows if (r["address"] == best_match2[0]) or (str(r["uprn"]) == str(self.uprn))
]
# If we have multiple, we filter on newest lodgment date
if len(rows_filtered) > 1:

View file

@ -22,7 +22,9 @@ class RetrieveFindMyEpc:
'Chrome/111.0.0.0 Safari/537.36'
}
def __init__(self, address: str, postcode: str, rrn: str = None, address_postal_town: str = ""):
def __init__(
self, address: str, postcode: str, rrn: str = None, address_postal_town: str = "", sap_rating: int = None
):
"""
This class is tasked with retrieving the latest EPC data from the find my epc website
:param address: The address of the property
@ -40,6 +42,8 @@ class RetrieveFindMyEpc:
if self.address_postal_town:
self.address_postal_town = self.address_postal_town.replace(",", "").replace(" ", "").lower()
self.sap_rating = sap_rating
@staticmethod
def extract_low_carbon_sources(soup):
# Find the section header
@ -351,7 +355,7 @@ class RetrieveFindMyEpc:
postcode_res = BeautifulSoup(postcode_response.text, features="html.parser")
rows = postcode_res.find_all('tr', class_='govuk-table__row')
extracted_table = []
extracted_table, backup_flat = [], []
for row in rows:
# Extract the address and URL
address_tag = row.find('a', class_='govuk-link')
@ -373,6 +377,17 @@ class RetrieveFindMyEpc:
)
if no_primary_match and no_backup_match:
if self.address_cleaned.startswith("flat"):
# We have a flat address, so we can try and match without the flat number
flat_removed_address = self.address_cleaned[4:]
if extracted_address_cleaned.startswith(flat_removed_address):
# We have a backup match
backup_flat.append(
{
"extracted_address": extracted_address,
"extracted_address_url": extracted_address_url,
}
)
continue
# If the address is a match, we can extract the data
@ -391,9 +406,12 @@ class RetrieveFindMyEpc:
}
)
if not extracted_table:
if not extracted_table and not backup_flat:
raise ValueError("No EPC found")
if not extracted_table:
extracted_table = deepcopy(backup_flat)
if len(extracted_table) > 1:
# We take the one with the most recent expiry date
extracted_table = sorted(extracted_table, key=lambda x: x['expiry_date'], reverse=True)
@ -439,6 +457,12 @@ class RetrieveFindMyEpc:
potential_rating = ratings.split(".")[1]
current_sap = int(current_rating.split(' ')[-1])
if current_sap != self.sap_rating:
raise ValueError(
f"SAP rating mismatch: expected {self.sap_rating}, got {current_sap} for address {self.address}, "
f"postcode {self.postcode}"
)
# Retrieve the energy consumption
bills = address_res.find('div', {'id': 'bills-affected'})
bills_list = bills.find_all('li')
@ -736,12 +760,15 @@ class RetrieveFindMyEpc:
return formatted_recommendations
@classmethod
def get_from_epc(cls, epc, epc_page_source=None, rrn=None, address_postal_town=None):
def get_from_epc(cls, epc, epc_page_source=None, rrn=None, address_postal_town=None, sap_rating=None):
if epc_page_source is not None and rrn is None:
raise ValueError("rrn must be provided if epc_page_source is provided")
searcher = cls(address=epc["address"], postcode=epc["postcode"], address_postal_town=address_postal_town)
searcher = cls(
address=epc["address"], postcode=epc["postcode"], address_postal_town=address_postal_town,
sap_rating=sap_rating
)
find_epc_data = searcher.retrieve_newest_find_my_epc_data(epc_page_source=epc_page_source, rrn=rrn)
non_invasive_recommendations = {
@ -797,11 +824,15 @@ class RetrieveFindMyEpc:
modified[k] = config_address
attempts.append(modified)
sap_rating = float(epc["current-energy-efficiency"])
# Iterate attempts
last_error = None
for idx, attempt in enumerate(attempts, start=1):
try:
return cls.get_from_epc(attempt, epc_page, rrn=rrn, address_postal_town=address_postal_town)
return cls.get_from_epc(
attempt, epc_page, rrn=rrn, address_postal_town=address_postal_town, sap_rating=sap_rating
)
except Exception as e:
last_error = e
logger.error(f"Attempt {idx} failed: {e}")