mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
making epc searching more specific
This commit is contained in:
parent
f75c630dd1
commit
9e7ed1efd5
2 changed files with 48 additions and 9 deletions
|
|
@ -447,11 +447,19 @@ class SearchEpc:
|
|||
]
|
||||
|
||||
elif best_match1[1] > best_match2[1]:
|
||||
# Get all of the scores
|
||||
rows_filtered = [r for r in rows if ", ".join([r["address"], r["posttown"]]) == best_match1[0]]
|
||||
# Get all of the scores - make sure we keep uprn
|
||||
rows_filtered = [
|
||||
r for r in rows if
|
||||
(
|
||||
(", ".join([r["address"], r["posttown"]]) == best_match1[0]) or
|
||||
(str(r["uprn"]) == str(self.uprn))
|
||||
)
|
||||
]
|
||||
else:
|
||||
# Get all of the scores
|
||||
rows_filtered = [r for r in rows if r["address"] == best_match2[0]]
|
||||
rows_filtered = [
|
||||
r for r in rows if (r["address"] == best_match2[0]) or (str(r["uprn"]) == str(self.uprn))
|
||||
]
|
||||
|
||||
# If we have multiple, we filter on newest lodgment date
|
||||
if len(rows_filtered) > 1:
|
||||
|
|
|
|||
|
|
@ -22,7 +22,9 @@ class RetrieveFindMyEpc:
|
|||
'Chrome/111.0.0.0 Safari/537.36'
|
||||
}
|
||||
|
||||
def __init__(self, address: str, postcode: str, rrn: str = None, address_postal_town: str = ""):
|
||||
def __init__(
|
||||
self, address: str, postcode: str, rrn: str = None, address_postal_town: str = "", sap_rating: int = None
|
||||
):
|
||||
"""
|
||||
This class is tasked with retrieving the latest EPC data from the find my epc website
|
||||
:param address: The address of the property
|
||||
|
|
@ -40,6 +42,8 @@ class RetrieveFindMyEpc:
|
|||
if self.address_postal_town:
|
||||
self.address_postal_town = self.address_postal_town.replace(",", "").replace(" ", "").lower()
|
||||
|
||||
self.sap_rating = sap_rating
|
||||
|
||||
@staticmethod
|
||||
def extract_low_carbon_sources(soup):
|
||||
# Find the section header
|
||||
|
|
@ -351,7 +355,7 @@ class RetrieveFindMyEpc:
|
|||
postcode_res = BeautifulSoup(postcode_response.text, features="html.parser")
|
||||
rows = postcode_res.find_all('tr', class_='govuk-table__row')
|
||||
|
||||
extracted_table = []
|
||||
extracted_table, backup_flat = [], []
|
||||
for row in rows:
|
||||
# Extract the address and URL
|
||||
address_tag = row.find('a', class_='govuk-link')
|
||||
|
|
@ -373,6 +377,17 @@ class RetrieveFindMyEpc:
|
|||
)
|
||||
|
||||
if no_primary_match and no_backup_match:
|
||||
if self.address_cleaned.startswith("flat"):
|
||||
# We have a flat address, so we can try and match without the flat number
|
||||
flat_removed_address = self.address_cleaned[4:]
|
||||
if extracted_address_cleaned.startswith(flat_removed_address):
|
||||
# We have a backup match
|
||||
backup_flat.append(
|
||||
{
|
||||
"extracted_address": extracted_address,
|
||||
"extracted_address_url": extracted_address_url,
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
# If the address is a match, we can extract the data
|
||||
|
|
@ -391,9 +406,12 @@ class RetrieveFindMyEpc:
|
|||
}
|
||||
)
|
||||
|
||||
if not extracted_table:
|
||||
if not extracted_table and not backup_flat:
|
||||
raise ValueError("No EPC found")
|
||||
|
||||
if not extracted_table:
|
||||
extracted_table = deepcopy(backup_flat)
|
||||
|
||||
if len(extracted_table) > 1:
|
||||
# We take the one with the most recent expiry date
|
||||
extracted_table = sorted(extracted_table, key=lambda x: x['expiry_date'], reverse=True)
|
||||
|
|
@ -439,6 +457,12 @@ class RetrieveFindMyEpc:
|
|||
potential_rating = ratings.split(".")[1]
|
||||
current_sap = int(current_rating.split(' ')[-1])
|
||||
|
||||
if current_sap != self.sap_rating:
|
||||
raise ValueError(
|
||||
f"SAP rating mismatch: expected {self.sap_rating}, got {current_sap} for address {self.address}, "
|
||||
f"postcode {self.postcode}"
|
||||
)
|
||||
|
||||
# Retrieve the energy consumption
|
||||
bills = address_res.find('div', {'id': 'bills-affected'})
|
||||
bills_list = bills.find_all('li')
|
||||
|
|
@ -736,12 +760,15 @@ class RetrieveFindMyEpc:
|
|||
return formatted_recommendations
|
||||
|
||||
@classmethod
|
||||
def get_from_epc(cls, epc, epc_page_source=None, rrn=None, address_postal_town=None):
|
||||
def get_from_epc(cls, epc, epc_page_source=None, rrn=None, address_postal_town=None, sap_rating=None):
|
||||
|
||||
if epc_page_source is not None and rrn is None:
|
||||
raise ValueError("rrn must be provided if epc_page_source is provided")
|
||||
|
||||
searcher = cls(address=epc["address"], postcode=epc["postcode"], address_postal_town=address_postal_town)
|
||||
searcher = cls(
|
||||
address=epc["address"], postcode=epc["postcode"], address_postal_town=address_postal_town,
|
||||
sap_rating=sap_rating
|
||||
)
|
||||
find_epc_data = searcher.retrieve_newest_find_my_epc_data(epc_page_source=epc_page_source, rrn=rrn)
|
||||
|
||||
non_invasive_recommendations = {
|
||||
|
|
@ -797,11 +824,15 @@ class RetrieveFindMyEpc:
|
|||
modified[k] = config_address
|
||||
attempts.append(modified)
|
||||
|
||||
sap_rating = float(epc["current-energy-efficiency"])
|
||||
|
||||
# Iterate attempts
|
||||
last_error = None
|
||||
for idx, attempt in enumerate(attempts, start=1):
|
||||
try:
|
||||
return cls.get_from_epc(attempt, epc_page, rrn=rrn, address_postal_town=address_postal_town)
|
||||
return cls.get_from_epc(
|
||||
attempt, epc_page, rrn=rrn, address_postal_town=address_postal_town, sap_rating=sap_rating
|
||||
)
|
||||
except Exception as e:
|
||||
last_error = e
|
||||
logger.error(f"Attempt {idx} failed: {e}")
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue