mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
897 lines
40 KiB
Python
897 lines
40 KiB
Python
import time
|
||
import re
|
||
import requests
|
||
import pandas as pd
|
||
from copy import deepcopy
|
||
from bs4 import BeautifulSoup
|
||
from datetime import datetime
|
||
|
||
from utils.logger import setup_logger
|
||
|
||
logger = setup_logger()
|
||
|
||
|
||
class RetrieveFindMyEpc:
|
||
SEARCH_POSTCODE_URL = (
|
||
"https://find-energy-certificate.service.gov.uk/find-a-certificate/search-by-postcode?postcode={postcode_input}"
|
||
)
|
||
BASE_ENERGY_URL = "https://find-energy-certificate.service.gov.uk"
|
||
|
||
HEADERS = {
|
||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
|
||
'Chrome/111.0.0.0 Safari/537.36'
|
||
}
|
||
|
||
def __init__(
|
||
self, address: str, postcode: str, rrn: str = None, address_postal_town: str = "", sap_rating: int = None
|
||
):
|
||
"""
|
||
This class is tasked with retrieving the latest EPC data from the find my epc website
|
||
:param address: The address of the property
|
||
:param postcode: The postcode of the property
|
||
:param rrn: The RRN of the EPC (if known)
|
||
"""
|
||
self.address = address
|
||
self.postcode = postcode
|
||
self.rrn = rrn
|
||
|
||
self.address_cleaned = self.address.replace(",", "").replace(" ", "").lower()
|
||
|
||
# Containers for the extracted components
|
||
self.walls = []
|
||
|
||
self.address_postal_town = address_postal_town
|
||
if self.address_postal_town:
|
||
self.address_postal_town = self.address_postal_town.replace(",", "").replace(" ", "").lower()
|
||
|
||
self.sap_rating = sap_rating
|
||
|
||
@staticmethod
|
||
def extract_low_carbon_sources(soup):
|
||
# Find the section header
|
||
section_header = soup.find("h3", string="Low and zero carbon energy sources")
|
||
if not section_header:
|
||
return {}
|
||
|
||
# Locate the list following the header
|
||
energy_list = section_header.find_next("ul")
|
||
|
||
# Extract the list items
|
||
sources = {item.get_text(strip=True): True for item in energy_list.find_all("li")}
|
||
return sources
|
||
|
||
@staticmethod
|
||
def get_text(elem):
|
||
return elem.get_text(strip=True) if elem else None
|
||
|
||
def extract_epc_data(self, soup):
|
||
|
||
results = {}
|
||
|
||
# 1. Total floor area
|
||
# We have some isntances of very old EPCs where the total floor area is not available
|
||
tfa = self.get_text(
|
||
soup.find("dt", string="Total floor area").find_next_sibling("dd")
|
||
).split(" ")[0]
|
||
results['total-floor-area'] = int(tfa) if tfa != "Not" else None
|
||
|
||
# Table with features
|
||
rows = soup.select("table.govuk-table tbody tr")
|
||
|
||
rating_map = {
|
||
"Very poor": "Very Poor",
|
||
"Very good": "Very Good"
|
||
}
|
||
|
||
def get_feature_row_text(feature_name, index=0):
|
||
matches = [row for row in rows if row.find("th") and feature_name in row.find("th").text]
|
||
if len(matches) > index:
|
||
# A commonly seen case is when feature_name is Main heating and we want to make sure we get
|
||
# main heating and not main heating control
|
||
if feature_name == "Main heating":
|
||
matches = [
|
||
row for row in matches if row.find("th") and row.find("th").text.strip() == "Main heating"
|
||
]
|
||
cells = matches[index].find_all("td")
|
||
description = self.get_text(cells[0])
|
||
rating = self.get_text(cells[1])
|
||
return description, rating_map.get(rating, rating)
|
||
return None, None
|
||
|
||
# 2-3. First wall description and rating
|
||
results['walls-description'], results['walls-energy-eff'] = get_feature_row_text("Wall", 0)
|
||
|
||
# 4-5. First roof description and rating
|
||
results['roof-description'], results['roof-energy-eff'] = get_feature_row_text("Roof", 0)
|
||
|
||
# 6-7. Windows description and rating
|
||
results['windows-description'], results['windows-energy-eff'] = get_feature_row_text("Window")
|
||
|
||
# 8-9. Main heating description and rating
|
||
results['mainheat-description'], results['mainheat-energy-eff'] = get_feature_row_text("Main heating")
|
||
|
||
# 10-11. Main heating control description and rating
|
||
results['mainheatcont-description'], results['mainheatc-energy-eff'] = get_feature_row_text(
|
||
"Main heating control"
|
||
)
|
||
|
||
# 12-13. Hot water description and rating
|
||
results['hotwater-description'], results['hot-water-energy-ef'] = get_feature_row_text("Hot water")
|
||
|
||
# 14-15. Lighting description and rating
|
||
results['lighting-description'], results['lighting-energy-eff'] = get_feature_row_text("Lighting")
|
||
|
||
# 16. Floor description
|
||
results['floor-description'], _ = get_feature_row_text("Floor")
|
||
|
||
# 17. Secondary heating description
|
||
results['secondheat-description'], _ = get_feature_row_text("Secondary heating")
|
||
|
||
# 18. Primary energy use
|
||
p_energy = soup.find(string=lambda t: "primary energy use for this property per year" in t.lower())
|
||
# We should always have this
|
||
match = re.search(r"(\d+)\s+kilowatt", p_energy)
|
||
results['energy-consumption-current'] = int(match.group(1)) if match else None
|
||
|
||
# 19. Current CO2 emissions
|
||
co2_now = soup.find("dd", id="eir-property-produces")
|
||
# We should always have this
|
||
match = re.search(r"([\d.]+)", co2_now.text)
|
||
results['co2-emissions-current'] = float(match.group(1)) if match else None
|
||
# Need co2-emiss-curr-per-floor-area
|
||
|
||
# 20. Potential CO2 emissions
|
||
co2_pot = soup.find("dd", id="eir-potential-production")
|
||
match = re.search(r"([\d.]+)", co2_pot.text)
|
||
results['co2-emissions-potential'] = float(match.group(1)) if match else None
|
||
|
||
return results
|
||
|
||
def _extract_epc_from_soup(self, soup, epc_certificate, sap_2012_date=None):
|
||
|
||
ratings = soup.find('desc', {'id': 'svg-desc'}).text
|
||
current_rating = ratings.split(".")[0]
|
||
potential_rating = ratings.split(".")[1]
|
||
current_sap = int(current_rating.split(' ')[-1])
|
||
|
||
# Retrieve the energy consumption
|
||
bills = soup.find('div', {'id': 'bills-affected'})
|
||
bills_list = bills.find_all('li')
|
||
if not bills_list:
|
||
# If this is the case, it's usually becaue the EPC was very old. Early EPCs did not have this information
|
||
heating_text = None
|
||
hot_water_text = None
|
||
else:
|
||
heating_text = bills_list[0].text
|
||
hot_water_text = bills_list[1].text
|
||
|
||
# Retrieve the recommendations and SAP points
|
||
recommendations = []
|
||
recommendations_div = soup.find('div', class_='epb-recommended-improvements')
|
||
if recommendations_div:
|
||
# Find all h3 headers for each step and extract their related information
|
||
step_headers = recommendations_div.find_all('h3', class_='govuk-heading-m')
|
||
previous_sap_score = current_sap
|
||
previous_epc = current_rating.split(' ')[-6]
|
||
for step_num, step_header in enumerate(step_headers, start=1):
|
||
# Extract the step title (the measure)
|
||
measure_title = step_header.text.strip().replace(f"Step {step_num}: ", "")
|
||
|
||
# Find the div containing the potential rating within the same section
|
||
potential_rating_div = step_header.find_next(
|
||
'div', class_='epb-recommended-improvements__potential-rating'
|
||
)
|
||
|
||
# Check if the potential rating div is found
|
||
if potential_rating_div:
|
||
# Extract the rating text within the SVG text element
|
||
extracted_rating_text = potential_rating_div.find('text', class_='govuk-!-font-weight-bold')
|
||
if extracted_rating_text is not None:
|
||
rating_text = extracted_rating_text.text.strip()
|
||
else:
|
||
rating_text = " ".join([str(previous_sap_score), previous_epc])
|
||
# Parse the rating text to separate the numeric rating and EPC letter
|
||
new_rating = int(rating_text.split()[0])
|
||
new_epc = rating_text.split()[1]
|
||
|
||
# Append the information as a dictionary to the recommendations list
|
||
recommendations.append({
|
||
"step": step_num,
|
||
"measure": measure_title,
|
||
"new_rating": new_rating,
|
||
"new_epc": new_epc,
|
||
"sap_points": new_rating - previous_sap_score
|
||
})
|
||
previous_sap_score = new_rating
|
||
previous_epc = new_epc
|
||
|
||
# Search for the assessment informaton
|
||
assessment_information = soup.find('div', {'id': 'information'})
|
||
# Parse this information
|
||
rows = assessment_information.find_all('div', class_='govuk-summary-list__row')
|
||
# Create a dictionary to hold the parsed information
|
||
assessment_data = {}
|
||
for row in rows:
|
||
key = row.find('dt').text.strip()
|
||
if key == "Type of assessment":
|
||
# We dont reliably extract this
|
||
continue
|
||
value_tag = row.find('dd')
|
||
|
||
# Check if value contains a link (email)
|
||
if value_tag.find('a'):
|
||
value = value_tag.find('a').text.strip()
|
||
elif value_tag.find('summary'):
|
||
value = value_tag.find('span').text.strip()
|
||
else:
|
||
value = value_tag.text.strip()
|
||
|
||
# These are keys that we have for both the surveyor and the acreditation scheme. Firstly, we'll
|
||
# get the surveyor's name and email so we make that information clear
|
||
if key in ["Telephone", "Email"]:
|
||
if "Assessor's " + key not in assessment_data:
|
||
assessment_data["Assessor's " + key] = value
|
||
else:
|
||
assessment_data["Accreditation Scheme's " + key] = value
|
||
continue
|
||
|
||
assessment_data[key] = value
|
||
|
||
expected_keys = [
|
||
'Assessor’s name',
|
||
"Assessor's Telephone",
|
||
"Assessor's Email",
|
||
'Assessor’s ID',
|
||
'Accreditation scheme',
|
||
'Assessor’s declaration',
|
||
"Accreditation Scheme's Telephone",
|
||
"Accreditation Scheme's Email",
|
||
'Date of assessment',
|
||
'Date of certificate'
|
||
]
|
||
# Check we have all the expected keys
|
||
for key in expected_keys:
|
||
if key not in assessment_data:
|
||
raise ValueError(f"Missing key: {key}")
|
||
|
||
# The wall types of the property
|
||
property_features_table = soup.find("tbody", class_="govuk-table__body")
|
||
property_features_table = property_features_table.find_all("tr")
|
||
|
||
property_components = self.extract_property_components(property_features_table)
|
||
|
||
# Extract walls
|
||
self.walls = [x["description"] for x in property_components if x["component_name"] == "Wall"]
|
||
|
||
# Finally, we format the recommendations
|
||
recommendations = self.format_recommendations(recommendations, assessment_data, sap_2012_date)
|
||
|
||
# 4) Low and zero carbon energy sources
|
||
low_carbon_energy_sources = self.extract_low_carbon_sources(soup)
|
||
|
||
# 5) Pull out the EPC data
|
||
epc_data = self.extract_epc_data(soup)
|
||
|
||
resulting_data = {
|
||
'epc_certificate': epc_certificate,
|
||
'current_epc_rating': current_rating.split(' ')[-6],
|
||
'current_epc_efficiency': current_sap,
|
||
'potential_epc_rating': potential_rating.split(' ')[-6],
|
||
"potential_epc_efficiency": int(potential_rating.split(' ')[-1]),
|
||
"heating_text": heating_text,
|
||
"hot_water_text": hot_water_text,
|
||
"recommendations": recommendations,
|
||
"epc_data": epc_data,
|
||
**assessment_data,
|
||
**low_carbon_energy_sources,
|
||
}
|
||
|
||
return resulting_data
|
||
|
||
def retrieve_all_find_my_epc_data(self, sap_2012_date=None):
|
||
|
||
"""
|
||
This is a quick function to retrieve all the data from the find my epc website for a given postcode and address.
|
||
Using this to fulfill a short term need to retrieve all history for a property
|
||
:param sap_2012_date:
|
||
:return:
|
||
"""
|
||
|
||
if self.rrn:
|
||
# We build the URL directly
|
||
epc_certificate = self.rrn
|
||
chosen_epc = f"{self.BASE_ENERGY_URL}/energy-certificate/{epc_certificate}"
|
||
else:
|
||
chosen_epc, epc_certificate = self._find_epc_page()
|
||
|
||
address_response = requests.get(chosen_epc, headers=self.HEADERS)
|
||
address_res = BeautifulSoup(address_response.text, features="html.parser")
|
||
|
||
# We check the section on "Other cerificates for this property and get the url"
|
||
# Find the section for other certificates
|
||
other_cert_section = address_res.find('div', id='other_certificates_and_reports')
|
||
|
||
# Extract all certificate number rows (anchor tags within a govuk-summary-list)
|
||
other_cert_links = other_cert_section.select('dd.govuk-summary-list__value a')
|
||
|
||
other_certificates = []
|
||
for link in other_cert_links:
|
||
cert_number = link.text.strip()
|
||
cert_url = link['href'].strip()
|
||
other_certificates.append({
|
||
"certificate_number": cert_number,
|
||
"certificate_url": f"https://find-energy-certificate.service.gov.uk{cert_url}"
|
||
})
|
||
|
||
# Always include the currently selected EPC first
|
||
soup_list = [address_res]
|
||
|
||
# Add additional historic certificates
|
||
for link in other_cert_links:
|
||
cert_url = f"https://find-energy-certificate.service.gov.uk{link['href'].strip()}"
|
||
response = requests.get(cert_url, headers=self.HEADERS)
|
||
time.sleep(0.3)
|
||
soup_list.append(BeautifulSoup(response.text, features="html.parser"))
|
||
|
||
all_find_my_epc_data = []
|
||
for soup in soup_list:
|
||
# Start with the primary one
|
||
all_find_my_epc_data.append(self._extract_epc_from_soup(soup, epc_certificate, sap_2012_date))
|
||
|
||
return all_find_my_epc_data
|
||
|
||
def _find_epc_page(self):
|
||
"""
|
||
This function is used to find the EPC page source for a given address and postcode.
|
||
It is done by fetching the page, associating to the postcode and then matching the
|
||
addresses on the page to the address we have been given.
|
||
:return:
|
||
"""
|
||
|
||
postcode_input = self.postcode.replace(" ", "+")
|
||
postcode_search = self.SEARCH_POSTCODE_URL.format(postcode_input=postcode_input)
|
||
postcode_response = requests.get(postcode_search, headers=self.HEADERS)
|
||
|
||
postcode_res = BeautifulSoup(postcode_response.text, features="html.parser")
|
||
rows = postcode_res.find_all('tr', class_='govuk-table__row')
|
||
|
||
extracted_table, backup_flat = [], []
|
||
for row in rows:
|
||
# Extract the address and URL
|
||
address_tag = row.find('a', class_='govuk-link')
|
||
if address_tag is None:
|
||
continue
|
||
extracted_address = None
|
||
extracted_address_url = None
|
||
if address_tag:
|
||
extracted_address = address_tag.text.strip()
|
||
extracted_address_url = address_tag['href']
|
||
|
||
extracted_address_cleaned = (
|
||
extracted_address.replace(",", "").replace(" ", "").lower()
|
||
)
|
||
|
||
no_primary_match = not extracted_address_cleaned.startswith(self.address_cleaned)
|
||
no_backup_match = True if not self.address_postal_town else not (
|
||
extracted_address_cleaned.startswith(self.address_postal_town)
|
||
)
|
||
|
||
if no_primary_match and no_backup_match:
|
||
if self.address_cleaned.startswith("flat"):
|
||
# We have a flat address, so we can try and match without the flat number
|
||
flat_removed_address = self.address_cleaned[4:]
|
||
if extracted_address_cleaned.startswith(flat_removed_address):
|
||
# We have a backup match
|
||
backup_flat.append(
|
||
{
|
||
"extracted_address": extracted_address,
|
||
"extracted_address_url": extracted_address_url,
|
||
}
|
||
)
|
||
continue
|
||
|
||
# If the address is a match, we can extract the data
|
||
|
||
# Extract the expiry date
|
||
expiry_date_tag = row.find('td', class_='govuk-table__cell date')
|
||
expiry_date = None
|
||
if expiry_date_tag is not None:
|
||
expiry_date = expiry_date_tag.parent.find('span').text.strip()
|
||
|
||
extracted_table.append(
|
||
{
|
||
"extracted_address": extracted_address,
|
||
"extracted_address_url": extracted_address_url,
|
||
"expiry_date": datetime.strptime(expiry_date, '%d %B %Y'),
|
||
}
|
||
)
|
||
|
||
if not extracted_table and not backup_flat:
|
||
# This is a relatively new change, as of November 2025, but we see cases where properties do not
|
||
# have data appearing on the find my EPC website, particularly for older EPCs. In this case, we allo
|
||
# for us to not find any information and return nothing
|
||
return None, None
|
||
|
||
if not extracted_table:
|
||
extracted_table = deepcopy(backup_flat)
|
||
|
||
if len(extracted_table) > 1:
|
||
# We take the one with the most recent expiry date
|
||
extracted_table = sorted(extracted_table, key=lambda x: x['expiry_date'], reverse=True)
|
||
|
||
chosen_epc = self.BASE_ENERGY_URL + extracted_table[0]['extracted_address_url']
|
||
epc_certificate = chosen_epc.split('/')[-1]
|
||
|
||
return chosen_epc, epc_certificate
|
||
|
||
@staticmethod
|
||
def extract_property_components(property_features_table: list):
|
||
"""
|
||
Function to pull out a table for property components, marking their appearance index
|
||
:param property_features_table: The table of property features, as extracted by BeautifulSoup
|
||
:return: List of property components with appearance index
|
||
"""
|
||
property_components = []
|
||
for row in property_features_table:
|
||
cells = row.find_all("td")
|
||
component_name = row.find("th").text.strip()
|
||
property_components.append(
|
||
{
|
||
"component_name": component_name,
|
||
"description": cells[0].text.strip(),
|
||
"efficiency": cells[1].text.strip(),
|
||
}
|
||
)
|
||
# Add an appearance index, which will indicate if the component appears multiple times, so this
|
||
# becomes a reference for the building part the component is associated to (main, extensions, etc)
|
||
# We want to inject this appearance index into the component dictionaries
|
||
component_count = {}
|
||
for component in property_components:
|
||
name = component['component_name']
|
||
if name not in component_count:
|
||
component_count[name] = 0
|
||
component['appearance_index'] = component_count[name]
|
||
component_count[name] += 1
|
||
|
||
return property_components
|
||
|
||
def retrieve_newest_find_my_epc_data(
|
||
self, sap_2012_date=None, return_page=False, epc_page_source=None, rrn=None
|
||
):
|
||
"""
|
||
For a post code and address, we pull out all the required data from the find my epc website
|
||
"""
|
||
|
||
if epc_page_source is None and rrn is None:
|
||
chosen_epc, rrn = self._find_epc_page()
|
||
if chosen_epc is None:
|
||
# We have no resulting data
|
||
logger.info("No EPC found for address %s, postcode %s", self.address, self.postcode)
|
||
return {}
|
||
|
||
address_response = requests.get(chosen_epc, headers=self.HEADERS)
|
||
epc_page_source = address_response.text
|
||
address_res = BeautifulSoup(address_response.text, features="html.parser")
|
||
elif self.rrn or rrn:
|
||
epc_certificate = self.rrn if self.rrn else rrn
|
||
chosen_epc = f"{self.BASE_ENERGY_URL}/energy-certificate/{epc_certificate}"
|
||
address_response = requests.get(chosen_epc, headers=self.HEADERS)
|
||
epc_page_source = address_response.text
|
||
address_res = BeautifulSoup(address_response.text, features="html.parser")
|
||
else:
|
||
if rrn is None:
|
||
raise ValueError("rrn must be provided if epc_page_source is provided")
|
||
address_res = BeautifulSoup(epc_page_source, features="html.parser")
|
||
|
||
# Key data we want to retrieve:
|
||
# 1) Rating
|
||
# 2) Bills estimates
|
||
# 3) Recommendations and SAP points
|
||
# 4) Low and zero carbon energy sources
|
||
# 5) The wall types of the property - used for determining if we have an extension wall insulation#
|
||
# recommendation
|
||
|
||
ratings = address_res.find('desc', {'id': 'svg-desc'}).text
|
||
current_rating = ratings.split(".")[0]
|
||
potential_rating = ratings.split(".")[1]
|
||
current_sap = int(current_rating.split(' ')[-1])
|
||
|
||
if self.sap_rating:
|
||
if current_sap != self.sap_rating and not rrn:
|
||
# This means we likely have the wrong data. If we are in this scenario, we return nothing
|
||
return {
|
||
"epc_certificate": None,
|
||
"page_source": None,
|
||
}
|
||
|
||
# Retrieve the energy consumption
|
||
bills = address_res.find('div', {'id': 'bills-affected'})
|
||
bills_list = bills.find_all('li')
|
||
if not bills_list:
|
||
# If this is the case, it's usually becaue the EPC was very old. Early EPCs did not have this information
|
||
heating_text = None
|
||
hot_water_text = None
|
||
else:
|
||
heating_text = bills_list[0].text
|
||
hot_water_text = bills_list[1].text
|
||
|
||
# Retrieve the recommendations and SAP points
|
||
recommendations = []
|
||
recommendations_div = address_res.find('div', class_='epb-recommended-improvements')
|
||
if recommendations_div:
|
||
# Find all h3 headers for each step and extract their related information
|
||
step_headers = recommendations_div.find_all('h3', class_='govuk-heading-m')
|
||
previous_sap_score = current_sap
|
||
previous_epc = current_rating.split(' ')[-6]
|
||
for step_num, step_header in enumerate(step_headers, start=1):
|
||
# Extract the step title (the measure)
|
||
measure_title = step_header.text.strip().replace(f"Step {step_num}: ", "")
|
||
|
||
# Find the div containing the potential rating within the same section
|
||
potential_rating_div = step_header.find_next(
|
||
'div', class_='epb-recommended-improvements__potential-rating'
|
||
)
|
||
|
||
# Check if the potential rating div is found
|
||
if potential_rating_div:
|
||
# Extract the rating text within the SVG text element
|
||
extracted_rating_text = potential_rating_div.find('text', class_='govuk-!-font-weight-bold')
|
||
if extracted_rating_text is not None:
|
||
rating_text = extracted_rating_text.text.strip()
|
||
else:
|
||
rating_text = " ".join([str(previous_sap_score), previous_epc])
|
||
# Parse the rating text to separate the numeric rating and EPC letter
|
||
new_rating = int(rating_text.split()[0])
|
||
new_epc = rating_text.split()[1]
|
||
|
||
# Append the information as a dictionary to the recommendations list
|
||
recommendations.append({
|
||
"step": step_num,
|
||
"measure": measure_title,
|
||
"new_rating": new_rating,
|
||
"new_epc": new_epc,
|
||
"sap_points": new_rating - previous_sap_score
|
||
})
|
||
previous_sap_score = new_rating
|
||
previous_epc = new_epc
|
||
|
||
# Search for the assessment informaton
|
||
assessment_information = address_res.find('div', {'id': 'information'})
|
||
# Parse this information
|
||
rows = assessment_information.find_all('div', class_='govuk-summary-list__row')
|
||
# Create a dictionary to hold the parsed information
|
||
assessment_data = {}
|
||
for row in rows:
|
||
key = row.find('dt').text.strip()
|
||
if key == "Type of assessment":
|
||
# We dont reliably extract this
|
||
continue
|
||
value_tag = row.find('dd')
|
||
|
||
# Check if value contains a link (email)
|
||
if value_tag.find('a'):
|
||
value = value_tag.find('a').text.strip()
|
||
elif value_tag.find('summary'):
|
||
value = value_tag.find('span').text.strip()
|
||
else:
|
||
value = value_tag.text.strip()
|
||
|
||
# These are keys that we have for both the surveyor and the acreditation scheme. Firstly, we'll
|
||
# get the surveyor's name and email so we make that information clear
|
||
if key in ["Telephone", "Email"]:
|
||
if "Assessor's " + key not in assessment_data:
|
||
assessment_data["Assessor's " + key] = value
|
||
else:
|
||
assessment_data["Accreditation Scheme's " + key] = value
|
||
continue
|
||
|
||
assessment_data[key] = value
|
||
|
||
expected_keys = [
|
||
'Assessor’s name',
|
||
"Assessor's Telephone",
|
||
"Assessor's Email",
|
||
'Assessor’s ID',
|
||
'Accreditation scheme',
|
||
'Assessor’s declaration',
|
||
"Accreditation Scheme's Telephone",
|
||
"Accreditation Scheme's Email",
|
||
'Date of assessment',
|
||
'Date of certificate'
|
||
]
|
||
# Check we have all the expected keys
|
||
for key in expected_keys:
|
||
if key not in assessment_data:
|
||
raise ValueError(f"Missing key: {key}")
|
||
|
||
# The wall types of the property
|
||
property_features_table = address_res.find("tbody", class_="govuk-table__body")
|
||
property_features_table = property_features_table.find_all("tr")
|
||
|
||
property_components = self.extract_property_components(property_features_table)
|
||
|
||
# Extract walls
|
||
self.walls = [x["description"] for x in property_components if x["component_name"] == "Wall"]
|
||
|
||
# Finally, we format the recommendations
|
||
recommendations = self.format_recommendations(recommendations, assessment_data, sap_2012_date)
|
||
|
||
# 4) Low and zero carbon energy sources
|
||
low_carbon_energy_sources = self.extract_low_carbon_sources(address_res)
|
||
|
||
# 5) Pull out the EPC data
|
||
epc_data = self.extract_epc_data(address_res)
|
||
|
||
# Pull out the address information which can be found in the box with the class "epc-address"
|
||
# We split it up on break tags
|
||
addr = address_res.find("p", class_="epc-address").get_text(separator="\n").strip()
|
||
lines = addr.split("\n")
|
||
if len(lines) > 2:
|
||
address1 = lines[0]
|
||
address2 = lines[1]
|
||
postcode = lines[-1]
|
||
else:
|
||
address1 = lines[0]
|
||
address2 = ""
|
||
postcode = lines[-1]
|
||
|
||
resulting_data = {
|
||
'epc_certificate': rrn,
|
||
'current_epc_rating': current_rating.split(' ')[-6],
|
||
'current_epc_efficiency': current_sap,
|
||
'potential_epc_rating': potential_rating.split(' ')[-6],
|
||
"potential_epc_efficiency": int(potential_rating.split(' ')[-1]),
|
||
"heating_text": heating_text,
|
||
"hot_water_text": hot_water_text,
|
||
"recommendations": recommendations,
|
||
"property_components": property_components,
|
||
"epc_data": epc_data,
|
||
**assessment_data,
|
||
**low_carbon_energy_sources,
|
||
"page_source": epc_page_source,
|
||
# Add in address a postcode from the page - covers use cases where we are given RRN
|
||
"address1": address1,
|
||
"address2": address2,
|
||
"postcode": postcode,
|
||
}
|
||
|
||
if return_page:
|
||
# We return the page text as well, which can be parsed again later
|
||
return resulting_data, epc_page_source
|
||
|
||
return resulting_data
|
||
|
||
def format_recommendations(self, recommendations, assessment_data, sap_2012_date=None):
|
||
"""
|
||
This function converts the recommendations to a format that we can use in the engine as a non-intrusive survey
|
||
:param recommendations: The recommendations from the EPC
|
||
:param assessment_data: The assessment data from the EPC
|
||
:param sap_2012_date: The date of the SAP 2012 update
|
||
"""
|
||
|
||
measure_map = {
|
||
"Internal or external wall insulation": ["internal_wall_insulation", "external_wall_insulation"],
|
||
"Hot water cylinder insulation": ["hot_water_tank_insulation"],
|
||
"Hot water cylinder thermostat": ["cylinder_thermostat"],
|
||
"High performance external doors": ["insulated_doors"],
|
||
"Floor insulation (solid floor)": ["solid_floor_insulation"],
|
||
"Floor insulation (suspended floor)": ["suspended_floor_insulation"],
|
||
"Double glazed windows": ["double_glazing"],
|
||
"Cavity wall insulation": ["cavity_wall_insulation"],
|
||
"Replace boiler with new condensing boiler": ["boiler_upgrade"],
|
||
"Floor insulation": ["floor_insulation"], # Recommendation typically associated to older EPCs
|
||
"Heating controls (programmer, room thermostat and TRVs)": [
|
||
"roomstat_programmer_trvs", "time_temperature_zone_control"
|
||
],
|
||
"Low energy lighting": ["low_energy_lighting"],
|
||
"Increase loft insulation to 270 mm": ["loft_insulation"],
|
||
"Heating controls (thermostatic radiator valves)": [
|
||
"roomstat_programmer_trvs", "time_temperature_zone_control"
|
||
],
|
||
"Solar water heating": ["solar_water_heating"],
|
||
"Solar photovoltaic panels, 2.5 kWp": ["solar_pv"],
|
||
"Heating controls (room thermostat and TRVs)": [
|
||
"roomstat_programmer_trvs", "time_temperature_zone_control"
|
||
],
|
||
"Change heating to gas condensing boiler": ["boiler_upgrade"],
|
||
"Fan assisted storage heaters and dual immersion cylinder": ["high_heat_retention_storage_heaters"],
|
||
"Flat roof or sloping ceiling insulation": ["flat_roof_insulation", "sloping_ceiling_insulation"],
|
||
"Heating controls (room thermostat)": [
|
||
"roomstat_programmer_trvs", "time_temperature_zone_control"
|
||
],
|
||
"Band A condensing boiler": ["boiler_upgrade"],
|
||
"Double glazing": ["double_glazing"],
|
||
"Flue gas heat recovery device in conjunction with boiler": ["flue_gas_heat_recovery"],
|
||
"Wind turbine": ["wind_turbine"],
|
||
"Loft insulation": ["loft_insulation"],
|
||
"Solar photovoltaic (PV) panels": ["solar_pv"],
|
||
"Party wall insulation": ["party_wall_insulation"],
|
||
'Draught proofing': ["draught_proofing"],
|
||
"Roof insulation recommendation": [],
|
||
"Cavity wall insulation recommendation": [],
|
||
"Windows draught proofing": [],
|
||
"Low energy lighting for all fixed outlets": ["low_energy_lighting"],
|
||
"Cylinder thermostat recommendation": [],
|
||
"Heating controls recommendation": [],
|
||
"Replace boiler with Band A condensing boiler": ["boiler_upgrade"],
|
||
"Band A condensing gas boiler": ["boiler_upgrade"],
|
||
"Install Band A condensing heating unit": ["boiler_upgrade"],
|
||
"Solar panel recommendation": [],
|
||
"Double glazing recommendation": [],
|
||
"Solid wall insulation recommendation": [],
|
||
"Fuel change recommendation": [],
|
||
"PV Cells recommendation": [],
|
||
"Replacement glazing units": ["double_glazing"],
|
||
"Heating controls (time and temperature zone control)": ["time_temperature_zone_control"],
|
||
"High heat retention storage heaters": ["high_heat_retention_storage_heaters"],
|
||
"Gas condensing boiler": ["boiler_upgrade"],
|
||
"Change room heaters to condensing boiler": ["boiler_upgrade"],
|
||
"Cylinder thermostat": ["cylinder_thermostat"],
|
||
"Heat recovery system for mixer showers": ["heat_recovery_shower"],
|
||
"Room-in-roof insulation": ["room_in_roof_insulation"],
|
||
"Fan assisted storage heaters": [],
|
||
"Fan-assisted storage heaters": [],
|
||
"Step 1:": [],
|
||
"Step 2:": [],
|
||
'Step 3:': [],
|
||
'Step 4:': [],
|
||
'Step 5:': [],
|
||
"Biomass stove with boiler": [],
|
||
"Replace boiler with biomass boiler": [],
|
||
"Heating controls (room thermostat and thermostatic radiator valves)": [
|
||
"roomstat_programmer_trvs", "time_temperature_zone_control"
|
||
],
|
||
"Heating controls (programmer, and thermostatic radiator valves)": [
|
||
"roomstat_programmer_trvs", "time_temperature_zone_control"
|
||
],
|
||
"Heating controls (programmer and TRVs)": [
|
||
"roomstat_programmer_trvs", "time_temperature_zone_control"
|
||
],
|
||
"Heating controls (programmer and room thermostat)": [
|
||
"roomstat_programmer_trvs", "time_temperature_zone_control"
|
||
],
|
||
"Replacement warm air unit": [],
|
||
"Secondary glazing": ["secondary_glazing"],
|
||
"Condensing heating unit": ["boiler_upgrade"],
|
||
'???': [],
|
||
'Solar photovoltaic panels, 2.5kWp': ["solar_pv"],
|
||
'Heating controls (programmer, room thermostat and thermostatic radiator valves)': [
|
||
"roomstat_programmer_trvs", "time_temperature_zone_control"
|
||
],
|
||
'Translation missing: en.improvement_code.41.title': [],
|
||
"Condensing boiler (separate from the range cooker)": ["boiler_upgrade"],
|
||
"Heating controls (programmer and thermostatic radiator valves)": [
|
||
"roomstat_programmer_trvs", "time_temperature_zone_control"
|
||
],
|
||
'Heating controls (programmer room thermostat and thermostatic radiator valves)': [
|
||
"roomstat_programmer_trvs", "time_temperature_zone_control"
|
||
],
|
||
"Internal wall insulation": ["internal_wall_insulation"],
|
||
"High heat retention storage heaters and dual immersion cylinder and dual rate meter": [
|
||
"high_heat_retention_storage_heaters"
|
||
],
|
||
"High heat retention storage heaters and dual rate meter": [
|
||
"high_heat_retention_storage_heaters"
|
||
],
|
||
"Increase loft insulation to 250mm": ["loft_insulation"],
|
||
"Solar photovoltaics panels, 25% of roof area": ["solar_pv"],
|
||
'Air or ground source heat pump': ["air_source_heat_pump"],
|
||
"Add PV Battery": ["solar_pv_battery"],
|
||
"Add PV diverter": ["solar_pv_diverter"], # Don't have a recommendation yet
|
||
"Draughtproof single-glazed windows": ["double_glazing"],
|
||
"Upgrade heating controls": ["roomstat_programmer_trvs", "time_temperature_zone_control"],
|
||
"Low energy lighting recommendation": ["low_energy_lighting"],
|
||
"Install cavity wall insulation": ["cavity_wall_insulation"],
|
||
"Install solar water heating": ["solar_water_heating"],
|
||
'Install photovoltaics, 25% of roof area': ["solar_pv"],
|
||
}
|
||
|
||
survey = True
|
||
if sap_2012_date is not None:
|
||
certificate_date = datetime.strptime(assessment_data["Date of certificate"], "%d %B %Y")
|
||
if certificate_date < pd.to_datetime(sap_2012_date):
|
||
survey = False
|
||
|
||
formatted_recommendations = []
|
||
for rec in recommendations:
|
||
mapped = measure_map[rec["measure"]]
|
||
for measure in mapped:
|
||
if measure == "cavity_wall_insulation" and "solid brick" in self.walls[0].lower():
|
||
measure = "extension_cavity_wall_insulation"
|
||
to_append = {
|
||
"type": measure,
|
||
"sap_points": rec["sap_points"],
|
||
"survey": survey,
|
||
}
|
||
if measure == "solar_pv":
|
||
to_append["suitable"] = True
|
||
formatted_recommendations.append(to_append)
|
||
|
||
return formatted_recommendations
|
||
|
||
@classmethod
|
||
def get_from_epc(cls, epc, epc_page_source=None, rrn=None, address_postal_town=None, sap_rating=None):
|
||
|
||
if epc_page_source is not None and rrn is None:
|
||
raise ValueError("rrn must be provided if epc_page_source is provided")
|
||
|
||
searcher = cls(
|
||
address=epc["address"], postcode=epc["postcode"], address_postal_town=address_postal_town,
|
||
sap_rating=sap_rating
|
||
)
|
||
find_epc_data = searcher.retrieve_newest_find_my_epc_data(epc_page_source=epc_page_source, rrn=rrn)
|
||
|
||
non_invasive_recommendations = {
|
||
"uprn": epc["uprn"],
|
||
"address": epc["address"],
|
||
"postcode": epc["postcode"],
|
||
"recommendations": find_epc_data.get("recommendations", []),
|
||
}
|
||
|
||
lodgment_date = find_epc_data.get("Date of certificate", None)
|
||
if not pd.isnull(lodgment_date):
|
||
lodgment_date = str(datetime.strptime(str(lodgment_date), "%d %B %Y"))
|
||
|
||
# We need to add the patch information
|
||
patch = {
|
||
"current-energy-rating": find_epc_data.get("current_epc_rating"),
|
||
"current-energy-efficiency": find_epc_data.get("current_epc_efficiency"),
|
||
"potential-energy-rating": find_epc_data.get("potential_epc_rating"),
|
||
"potential-energy-efficiency": find_epc_data.get("potential_epc_efficiency"),
|
||
**find_epc_data.get("epc_data", {}),
|
||
"lodgement-date": lodgment_date
|
||
}
|
||
|
||
page_source = {
|
||
"rrn": find_epc_data.get("epc_certificate"),
|
||
"page_source": find_epc_data.get("page_source")
|
||
}
|
||
|
||
property_components = find_epc_data.get("property_components", [])
|
||
|
||
return non_invasive_recommendations, patch, page_source, property_components
|
||
|
||
@classmethod
|
||
def get_from_epc_with_fallback(
|
||
cls, epc, epc_page, rrn, cleaned_address=None, config_address=None, address_postal_town=None
|
||
):
|
||
"""
|
||
Attempt get_from_epc with:
|
||
1) Original EPC
|
||
2) EPC with cleaned address
|
||
3) EPC with configured address
|
||
in that order.
|
||
"""
|
||
|
||
# The data we'll use to attempt retrieval
|
||
# 1) Original
|
||
attempts = [epc]
|
||
|
||
# 2) Cleaned
|
||
if cleaned_address:
|
||
modified = deepcopy(epc)
|
||
for k in ["address", "address1"]:
|
||
modified[k] = cleaned_address
|
||
attempts.append(modified)
|
||
|
||
# 3) Config address fallback
|
||
if config_address:
|
||
modified = deepcopy(epc)
|
||
for k in ["address", "address1"]:
|
||
modified[k] = config_address
|
||
attempts.append(modified)
|
||
|
||
sap_rating = float(epc["current-energy-efficiency"])
|
||
|
||
# Iterate attempts
|
||
last_error = None
|
||
for idx, attempt in enumerate(attempts, start=1):
|
||
try:
|
||
return cls.get_from_epc(
|
||
attempt, epc_page, rrn=rrn, address_postal_town=address_postal_town, sap_rating=sap_rating
|
||
)
|
||
except Exception as e:
|
||
last_error = e
|
||
logger.error(f"Attempt {idx} failed: {e}")
|
||
|
||
raise RuntimeError(f"All EPC retrieval attempts failed: {last_error}")
|