Model/etl/find_my_epc/RetrieveFindMyEpc.py
Khalim Conn-Kowlessar 025b18a29c hot fix
2026-04-09 20:51:04 +01:00

897 lines
40 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import time
import re
import requests
import pandas as pd
from copy import deepcopy
from bs4 import BeautifulSoup
from datetime import datetime
from utils.logger import setup_logger
logger = setup_logger()
class RetrieveFindMyEpc:
SEARCH_POSTCODE_URL = (
"https://find-energy-certificate.service.gov.uk/find-a-certificate/search-by-postcode?postcode={postcode_input}"
)
BASE_ENERGY_URL = "https://find-energy-certificate.service.gov.uk"
HEADERS = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/111.0.0.0 Safari/537.36'
}
def __init__(
self, address: str, postcode: str, rrn: str = None, address_postal_town: str = "", sap_rating: int = None
):
"""
This class is tasked with retrieving the latest EPC data from the find my epc website
:param address: The address of the property
:param postcode: The postcode of the property
:param rrn: The RRN of the EPC (if known)
"""
self.address = address
self.postcode = postcode
self.rrn = rrn
self.address_cleaned = self.address.replace(",", "").replace(" ", "").lower()
# Containers for the extracted components
self.walls = []
self.address_postal_town = address_postal_town
if self.address_postal_town:
self.address_postal_town = self.address_postal_town.replace(",", "").replace(" ", "").lower()
self.sap_rating = sap_rating
@staticmethod
def extract_low_carbon_sources(soup):
# Find the section header
section_header = soup.find("h3", string="Low and zero carbon energy sources")
if not section_header:
return {}
# Locate the list following the header
energy_list = section_header.find_next("ul")
# Extract the list items
sources = {item.get_text(strip=True): True for item in energy_list.find_all("li")}
return sources
@staticmethod
def get_text(elem):
return elem.get_text(strip=True) if elem else None
def extract_epc_data(self, soup):
results = {}
# 1. Total floor area
# We have some isntances of very old EPCs where the total floor area is not available
tfa = self.get_text(
soup.find("dt", string="Total floor area").find_next_sibling("dd")
).split(" ")[0]
results['total-floor-area'] = int(tfa) if tfa != "Not" else None
# Table with features
rows = soup.select("table.govuk-table tbody tr")
rating_map = {
"Very poor": "Very Poor",
"Very good": "Very Good"
}
def get_feature_row_text(feature_name, index=0):
matches = [row for row in rows if row.find("th") and feature_name in row.find("th").text]
if len(matches) > index:
# A commonly seen case is when feature_name is Main heating and we want to make sure we get
# main heating and not main heating control
if feature_name == "Main heating":
matches = [
row for row in matches if row.find("th") and row.find("th").text.strip() == "Main heating"
]
cells = matches[index].find_all("td")
description = self.get_text(cells[0])
rating = self.get_text(cells[1])
return description, rating_map.get(rating, rating)
return None, None
# 2-3. First wall description and rating
results['walls-description'], results['walls-energy-eff'] = get_feature_row_text("Wall", 0)
# 4-5. First roof description and rating
results['roof-description'], results['roof-energy-eff'] = get_feature_row_text("Roof", 0)
# 6-7. Windows description and rating
results['windows-description'], results['windows-energy-eff'] = get_feature_row_text("Window")
# 8-9. Main heating description and rating
results['mainheat-description'], results['mainheat-energy-eff'] = get_feature_row_text("Main heating")
# 10-11. Main heating control description and rating
results['mainheatcont-description'], results['mainheatc-energy-eff'] = get_feature_row_text(
"Main heating control"
)
# 12-13. Hot water description and rating
results['hotwater-description'], results['hot-water-energy-ef'] = get_feature_row_text("Hot water")
# 14-15. Lighting description and rating
results['lighting-description'], results['lighting-energy-eff'] = get_feature_row_text("Lighting")
# 16. Floor description
results['floor-description'], _ = get_feature_row_text("Floor")
# 17. Secondary heating description
results['secondheat-description'], _ = get_feature_row_text("Secondary heating")
# 18. Primary energy use
p_energy = soup.find(string=lambda t: "primary energy use for this property per year" in t.lower())
# We should always have this
match = re.search(r"(\d+)\s+kilowatt", p_energy)
results['energy-consumption-current'] = int(match.group(1)) if match else None
# 19. Current CO2 emissions
co2_now = soup.find("dd", id="eir-property-produces")
# We should always have this
match = re.search(r"([\d.]+)", co2_now.text)
results['co2-emissions-current'] = float(match.group(1)) if match else None
# Need co2-emiss-curr-per-floor-area
# 20. Potential CO2 emissions
co2_pot = soup.find("dd", id="eir-potential-production")
match = re.search(r"([\d.]+)", co2_pot.text)
results['co2-emissions-potential'] = float(match.group(1)) if match else None
return results
def _extract_epc_from_soup(self, soup, epc_certificate, sap_2012_date=None):
ratings = soup.find('desc', {'id': 'svg-desc'}).text
current_rating = ratings.split(".")[0]
potential_rating = ratings.split(".")[1]
current_sap = int(current_rating.split(' ')[-1])
# Retrieve the energy consumption
bills = soup.find('div', {'id': 'bills-affected'})
bills_list = bills.find_all('li')
if not bills_list:
# If this is the case, it's usually becaue the EPC was very old. Early EPCs did not have this information
heating_text = None
hot_water_text = None
else:
heating_text = bills_list[0].text
hot_water_text = bills_list[1].text
# Retrieve the recommendations and SAP points
recommendations = []
recommendations_div = soup.find('div', class_='epb-recommended-improvements')
if recommendations_div:
# Find all h3 headers for each step and extract their related information
step_headers = recommendations_div.find_all('h3', class_='govuk-heading-m')
previous_sap_score = current_sap
previous_epc = current_rating.split(' ')[-6]
for step_num, step_header in enumerate(step_headers, start=1):
# Extract the step title (the measure)
measure_title = step_header.text.strip().replace(f"Step {step_num}: ", "")
# Find the div containing the potential rating within the same section
potential_rating_div = step_header.find_next(
'div', class_='epb-recommended-improvements__potential-rating'
)
# Check if the potential rating div is found
if potential_rating_div:
# Extract the rating text within the SVG text element
extracted_rating_text = potential_rating_div.find('text', class_='govuk-!-font-weight-bold')
if extracted_rating_text is not None:
rating_text = extracted_rating_text.text.strip()
else:
rating_text = " ".join([str(previous_sap_score), previous_epc])
# Parse the rating text to separate the numeric rating and EPC letter
new_rating = int(rating_text.split()[0])
new_epc = rating_text.split()[1]
# Append the information as a dictionary to the recommendations list
recommendations.append({
"step": step_num,
"measure": measure_title,
"new_rating": new_rating,
"new_epc": new_epc,
"sap_points": new_rating - previous_sap_score
})
previous_sap_score = new_rating
previous_epc = new_epc
# Search for the assessment informaton
assessment_information = soup.find('div', {'id': 'information'})
# Parse this information
rows = assessment_information.find_all('div', class_='govuk-summary-list__row')
# Create a dictionary to hold the parsed information
assessment_data = {}
for row in rows:
key = row.find('dt').text.strip()
if key == "Type of assessment":
# We dont reliably extract this
continue
value_tag = row.find('dd')
# Check if value contains a link (email)
if value_tag.find('a'):
value = value_tag.find('a').text.strip()
elif value_tag.find('summary'):
value = value_tag.find('span').text.strip()
else:
value = value_tag.text.strip()
# These are keys that we have for both the surveyor and the acreditation scheme. Firstly, we'll
# get the surveyor's name and email so we make that information clear
if key in ["Telephone", "Email"]:
if "Assessor's " + key not in assessment_data:
assessment_data["Assessor's " + key] = value
else:
assessment_data["Accreditation Scheme's " + key] = value
continue
assessment_data[key] = value
expected_keys = [
'Assessors name',
"Assessor's Telephone",
"Assessor's Email",
'Assessors ID',
'Accreditation scheme',
'Assessors declaration',
"Accreditation Scheme's Telephone",
"Accreditation Scheme's Email",
'Date of assessment',
'Date of certificate'
]
# Check we have all the expected keys
for key in expected_keys:
if key not in assessment_data:
raise ValueError(f"Missing key: {key}")
# The wall types of the property
property_features_table = soup.find("tbody", class_="govuk-table__body")
property_features_table = property_features_table.find_all("tr")
property_components = self.extract_property_components(property_features_table)
# Extract walls
self.walls = [x["description"] for x in property_components if x["component_name"] == "Wall"]
# Finally, we format the recommendations
recommendations = self.format_recommendations(recommendations, assessment_data, sap_2012_date)
# 4) Low and zero carbon energy sources
low_carbon_energy_sources = self.extract_low_carbon_sources(soup)
# 5) Pull out the EPC data
epc_data = self.extract_epc_data(soup)
resulting_data = {
'epc_certificate': epc_certificate,
'current_epc_rating': current_rating.split(' ')[-6],
'current_epc_efficiency': current_sap,
'potential_epc_rating': potential_rating.split(' ')[-6],
"potential_epc_efficiency": int(potential_rating.split(' ')[-1]),
"heating_text": heating_text,
"hot_water_text": hot_water_text,
"recommendations": recommendations,
"epc_data": epc_data,
**assessment_data,
**low_carbon_energy_sources,
}
return resulting_data
def retrieve_all_find_my_epc_data(self, sap_2012_date=None):
"""
This is a quick function to retrieve all the data from the find my epc website for a given postcode and address.
Using this to fulfill a short term need to retrieve all history for a property
:param sap_2012_date:
:return:
"""
if self.rrn:
# We build the URL directly
epc_certificate = self.rrn
chosen_epc = f"{self.BASE_ENERGY_URL}/energy-certificate/{epc_certificate}"
else:
chosen_epc, epc_certificate = self._find_epc_page()
address_response = requests.get(chosen_epc, headers=self.HEADERS)
address_res = BeautifulSoup(address_response.text, features="html.parser")
# We check the section on "Other cerificates for this property and get the url"
# Find the section for other certificates
other_cert_section = address_res.find('div', id='other_certificates_and_reports')
# Extract all certificate number rows (anchor tags within a govuk-summary-list)
other_cert_links = other_cert_section.select('dd.govuk-summary-list__value a')
other_certificates = []
for link in other_cert_links:
cert_number = link.text.strip()
cert_url = link['href'].strip()
other_certificates.append({
"certificate_number": cert_number,
"certificate_url": f"https://find-energy-certificate.service.gov.uk{cert_url}"
})
# Always include the currently selected EPC first
soup_list = [address_res]
# Add additional historic certificates
for link in other_cert_links:
cert_url = f"https://find-energy-certificate.service.gov.uk{link['href'].strip()}"
response = requests.get(cert_url, headers=self.HEADERS)
time.sleep(0.3)
soup_list.append(BeautifulSoup(response.text, features="html.parser"))
all_find_my_epc_data = []
for soup in soup_list:
# Start with the primary one
all_find_my_epc_data.append(self._extract_epc_from_soup(soup, epc_certificate, sap_2012_date))
return all_find_my_epc_data
def _find_epc_page(self):
"""
This function is used to find the EPC page source for a given address and postcode.
It is done by fetching the page, associating to the postcode and then matching the
addresses on the page to the address we have been given.
:return:
"""
postcode_input = self.postcode.replace(" ", "+")
postcode_search = self.SEARCH_POSTCODE_URL.format(postcode_input=postcode_input)
postcode_response = requests.get(postcode_search, headers=self.HEADERS)
postcode_res = BeautifulSoup(postcode_response.text, features="html.parser")
rows = postcode_res.find_all('tr', class_='govuk-table__row')
extracted_table, backup_flat = [], []
for row in rows:
# Extract the address and URL
address_tag = row.find('a', class_='govuk-link')
if address_tag is None:
continue
extracted_address = None
extracted_address_url = None
if address_tag:
extracted_address = address_tag.text.strip()
extracted_address_url = address_tag['href']
extracted_address_cleaned = (
extracted_address.replace(",", "").replace(" ", "").lower()
)
no_primary_match = not extracted_address_cleaned.startswith(self.address_cleaned)
no_backup_match = True if not self.address_postal_town else not (
extracted_address_cleaned.startswith(self.address_postal_town)
)
if no_primary_match and no_backup_match:
if self.address_cleaned.startswith("flat"):
# We have a flat address, so we can try and match without the flat number
flat_removed_address = self.address_cleaned[4:]
if extracted_address_cleaned.startswith(flat_removed_address):
# We have a backup match
backup_flat.append(
{
"extracted_address": extracted_address,
"extracted_address_url": extracted_address_url,
}
)
continue
# If the address is a match, we can extract the data
# Extract the expiry date
expiry_date_tag = row.find('td', class_='govuk-table__cell date')
expiry_date = None
if expiry_date_tag is not None:
expiry_date = expiry_date_tag.parent.find('span').text.strip()
extracted_table.append(
{
"extracted_address": extracted_address,
"extracted_address_url": extracted_address_url,
"expiry_date": datetime.strptime(expiry_date, '%d %B %Y'),
}
)
if not extracted_table and not backup_flat:
# This is a relatively new change, as of November 2025, but we see cases where properties do not
# have data appearing on the find my EPC website, particularly for older EPCs. In this case, we allo
# for us to not find any information and return nothing
return None, None
if not extracted_table:
extracted_table = deepcopy(backup_flat)
if len(extracted_table) > 1:
# We take the one with the most recent expiry date
extracted_table = sorted(extracted_table, key=lambda x: x['expiry_date'], reverse=True)
chosen_epc = self.BASE_ENERGY_URL + extracted_table[0]['extracted_address_url']
epc_certificate = chosen_epc.split('/')[-1]
return chosen_epc, epc_certificate
@staticmethod
def extract_property_components(property_features_table: list):
"""
Function to pull out a table for property components, marking their appearance index
:param property_features_table: The table of property features, as extracted by BeautifulSoup
:return: List of property components with appearance index
"""
property_components = []
for row in property_features_table:
cells = row.find_all("td")
component_name = row.find("th").text.strip()
property_components.append(
{
"component_name": component_name,
"description": cells[0].text.strip(),
"efficiency": cells[1].text.strip(),
}
)
# Add an appearance index, which will indicate if the component appears multiple times, so this
# becomes a reference for the building part the component is associated to (main, extensions, etc)
# We want to inject this appearance index into the component dictionaries
component_count = {}
for component in property_components:
name = component['component_name']
if name not in component_count:
component_count[name] = 0
component['appearance_index'] = component_count[name]
component_count[name] += 1
return property_components
def retrieve_newest_find_my_epc_data(
self, sap_2012_date=None, return_page=False, epc_page_source=None, rrn=None
):
"""
For a post code and address, we pull out all the required data from the find my epc website
"""
if epc_page_source is None and rrn is None:
chosen_epc, rrn = self._find_epc_page()
if chosen_epc is None:
# We have no resulting data
logger.info("No EPC found for address %s, postcode %s", self.address, self.postcode)
return {}
address_response = requests.get(chosen_epc, headers=self.HEADERS)
epc_page_source = address_response.text
address_res = BeautifulSoup(address_response.text, features="html.parser")
elif self.rrn or rrn:
epc_certificate = self.rrn if self.rrn else rrn
chosen_epc = f"{self.BASE_ENERGY_URL}/energy-certificate/{epc_certificate}"
address_response = requests.get(chosen_epc, headers=self.HEADERS)
epc_page_source = address_response.text
address_res = BeautifulSoup(address_response.text, features="html.parser")
else:
if rrn is None:
raise ValueError("rrn must be provided if epc_page_source is provided")
address_res = BeautifulSoup(epc_page_source, features="html.parser")
# Key data we want to retrieve:
# 1) Rating
# 2) Bills estimates
# 3) Recommendations and SAP points
# 4) Low and zero carbon energy sources
# 5) The wall types of the property - used for determining if we have an extension wall insulation#
# recommendation
ratings = address_res.find('desc', {'id': 'svg-desc'}).text
current_rating = ratings.split(".")[0]
potential_rating = ratings.split(".")[1]
current_sap = int(current_rating.split(' ')[-1])
if self.sap_rating:
if current_sap != self.sap_rating and not rrn:
# This means we likely have the wrong data. If we are in this scenario, we return nothing
return {
"epc_certificate": None,
"page_source": None,
}
# Retrieve the energy consumption
bills = address_res.find('div', {'id': 'bills-affected'})
bills_list = bills.find_all('li')
if not bills_list:
# If this is the case, it's usually becaue the EPC was very old. Early EPCs did not have this information
heating_text = None
hot_water_text = None
else:
heating_text = bills_list[0].text
hot_water_text = bills_list[1].text
# Retrieve the recommendations and SAP points
recommendations = []
recommendations_div = address_res.find('div', class_='epb-recommended-improvements')
if recommendations_div:
# Find all h3 headers for each step and extract their related information
step_headers = recommendations_div.find_all('h3', class_='govuk-heading-m')
previous_sap_score = current_sap
previous_epc = current_rating.split(' ')[-6]
for step_num, step_header in enumerate(step_headers, start=1):
# Extract the step title (the measure)
measure_title = step_header.text.strip().replace(f"Step {step_num}: ", "")
# Find the div containing the potential rating within the same section
potential_rating_div = step_header.find_next(
'div', class_='epb-recommended-improvements__potential-rating'
)
# Check if the potential rating div is found
if potential_rating_div:
# Extract the rating text within the SVG text element
extracted_rating_text = potential_rating_div.find('text', class_='govuk-!-font-weight-bold')
if extracted_rating_text is not None:
rating_text = extracted_rating_text.text.strip()
else:
rating_text = " ".join([str(previous_sap_score), previous_epc])
# Parse the rating text to separate the numeric rating and EPC letter
new_rating = int(rating_text.split()[0])
new_epc = rating_text.split()[1]
# Append the information as a dictionary to the recommendations list
recommendations.append({
"step": step_num,
"measure": measure_title,
"new_rating": new_rating,
"new_epc": new_epc,
"sap_points": new_rating - previous_sap_score
})
previous_sap_score = new_rating
previous_epc = new_epc
# Search for the assessment informaton
assessment_information = address_res.find('div', {'id': 'information'})
# Parse this information
rows = assessment_information.find_all('div', class_='govuk-summary-list__row')
# Create a dictionary to hold the parsed information
assessment_data = {}
for row in rows:
key = row.find('dt').text.strip()
if key == "Type of assessment":
# We dont reliably extract this
continue
value_tag = row.find('dd')
# Check if value contains a link (email)
if value_tag.find('a'):
value = value_tag.find('a').text.strip()
elif value_tag.find('summary'):
value = value_tag.find('span').text.strip()
else:
value = value_tag.text.strip()
# These are keys that we have for both the surveyor and the acreditation scheme. Firstly, we'll
# get the surveyor's name and email so we make that information clear
if key in ["Telephone", "Email"]:
if "Assessor's " + key not in assessment_data:
assessment_data["Assessor's " + key] = value
else:
assessment_data["Accreditation Scheme's " + key] = value
continue
assessment_data[key] = value
expected_keys = [
'Assessors name',
"Assessor's Telephone",
"Assessor's Email",
'Assessors ID',
'Accreditation scheme',
'Assessors declaration',
"Accreditation Scheme's Telephone",
"Accreditation Scheme's Email",
'Date of assessment',
'Date of certificate'
]
# Check we have all the expected keys
for key in expected_keys:
if key not in assessment_data:
raise ValueError(f"Missing key: {key}")
# The wall types of the property
property_features_table = address_res.find("tbody", class_="govuk-table__body")
property_features_table = property_features_table.find_all("tr")
property_components = self.extract_property_components(property_features_table)
# Extract walls
self.walls = [x["description"] for x in property_components if x["component_name"] == "Wall"]
# Finally, we format the recommendations
recommendations = self.format_recommendations(recommendations, assessment_data, sap_2012_date)
# 4) Low and zero carbon energy sources
low_carbon_energy_sources = self.extract_low_carbon_sources(address_res)
# 5) Pull out the EPC data
epc_data = self.extract_epc_data(address_res)
# Pull out the address information which can be found in the box with the class "epc-address"
# We split it up on break tags
addr = address_res.find("p", class_="epc-address").get_text(separator="\n").strip()
lines = addr.split("\n")
if len(lines) > 2:
address1 = lines[0]
address2 = lines[1]
postcode = lines[-1]
else:
address1 = lines[0]
address2 = ""
postcode = lines[-1]
resulting_data = {
'epc_certificate': rrn,
'current_epc_rating': current_rating.split(' ')[-6],
'current_epc_efficiency': current_sap,
'potential_epc_rating': potential_rating.split(' ')[-6],
"potential_epc_efficiency": int(potential_rating.split(' ')[-1]),
"heating_text": heating_text,
"hot_water_text": hot_water_text,
"recommendations": recommendations,
"property_components": property_components,
"epc_data": epc_data,
**assessment_data,
**low_carbon_energy_sources,
"page_source": epc_page_source,
# Add in address a postcode from the page - covers use cases where we are given RRN
"address1": address1,
"address2": address2,
"postcode": postcode,
}
if return_page:
# We return the page text as well, which can be parsed again later
return resulting_data, epc_page_source
return resulting_data
def format_recommendations(self, recommendations, assessment_data, sap_2012_date=None):
"""
This function converts the recommendations to a format that we can use in the engine as a non-intrusive survey
:param recommendations: The recommendations from the EPC
:param assessment_data: The assessment data from the EPC
:param sap_2012_date: The date of the SAP 2012 update
"""
measure_map = {
"Internal or external wall insulation": ["internal_wall_insulation", "external_wall_insulation"],
"Hot water cylinder insulation": ["hot_water_tank_insulation"],
"Hot water cylinder thermostat": ["cylinder_thermostat"],
"High performance external doors": ["insulated_doors"],
"Floor insulation (solid floor)": ["solid_floor_insulation"],
"Floor insulation (suspended floor)": ["suspended_floor_insulation"],
"Double glazed windows": ["double_glazing"],
"Cavity wall insulation": ["cavity_wall_insulation"],
"Replace boiler with new condensing boiler": ["boiler_upgrade"],
"Floor insulation": ["floor_insulation"], # Recommendation typically associated to older EPCs
"Heating controls (programmer, room thermostat and TRVs)": [
"roomstat_programmer_trvs", "time_temperature_zone_control"
],
"Low energy lighting": ["low_energy_lighting"],
"Increase loft insulation to 270 mm": ["loft_insulation"],
"Heating controls (thermostatic radiator valves)": [
"roomstat_programmer_trvs", "time_temperature_zone_control"
],
"Solar water heating": ["solar_water_heating"],
"Solar photovoltaic panels, 2.5 kWp": ["solar_pv"],
"Heating controls (room thermostat and TRVs)": [
"roomstat_programmer_trvs", "time_temperature_zone_control"
],
"Change heating to gas condensing boiler": ["boiler_upgrade"],
"Fan assisted storage heaters and dual immersion cylinder": ["high_heat_retention_storage_heaters"],
"Flat roof or sloping ceiling insulation": ["flat_roof_insulation", "sloping_ceiling_insulation"],
"Heating controls (room thermostat)": [
"roomstat_programmer_trvs", "time_temperature_zone_control"
],
"Band A condensing boiler": ["boiler_upgrade"],
"Double glazing": ["double_glazing"],
"Flue gas heat recovery device in conjunction with boiler": ["flue_gas_heat_recovery"],
"Wind turbine": ["wind_turbine"],
"Loft insulation": ["loft_insulation"],
"Solar photovoltaic (PV) panels": ["solar_pv"],
"Party wall insulation": ["party_wall_insulation"],
'Draught proofing': ["draught_proofing"],
"Roof insulation recommendation": [],
"Cavity wall insulation recommendation": [],
"Windows draught proofing": [],
"Low energy lighting for all fixed outlets": ["low_energy_lighting"],
"Cylinder thermostat recommendation": [],
"Heating controls recommendation": [],
"Replace boiler with Band A condensing boiler": ["boiler_upgrade"],
"Band A condensing gas boiler": ["boiler_upgrade"],
"Install Band A condensing heating unit": ["boiler_upgrade"],
"Solar panel recommendation": [],
"Double glazing recommendation": [],
"Solid wall insulation recommendation": [],
"Fuel change recommendation": [],
"PV Cells recommendation": [],
"Replacement glazing units": ["double_glazing"],
"Heating controls (time and temperature zone control)": ["time_temperature_zone_control"],
"High heat retention storage heaters": ["high_heat_retention_storage_heaters"],
"Gas condensing boiler": ["boiler_upgrade"],
"Change room heaters to condensing boiler": ["boiler_upgrade"],
"Cylinder thermostat": ["cylinder_thermostat"],
"Heat recovery system for mixer showers": ["heat_recovery_shower"],
"Room-in-roof insulation": ["room_in_roof_insulation"],
"Fan assisted storage heaters": [],
"Fan-assisted storage heaters": [],
"Step 1:": [],
"Step 2:": [],
'Step 3:': [],
'Step 4:': [],
'Step 5:': [],
"Biomass stove with boiler": [],
"Replace boiler with biomass boiler": [],
"Heating controls (room thermostat and thermostatic radiator valves)": [
"roomstat_programmer_trvs", "time_temperature_zone_control"
],
"Heating controls (programmer, and thermostatic radiator valves)": [
"roomstat_programmer_trvs", "time_temperature_zone_control"
],
"Heating controls (programmer and TRVs)": [
"roomstat_programmer_trvs", "time_temperature_zone_control"
],
"Heating controls (programmer and room thermostat)": [
"roomstat_programmer_trvs", "time_temperature_zone_control"
],
"Replacement warm air unit": [],
"Secondary glazing": ["secondary_glazing"],
"Condensing heating unit": ["boiler_upgrade"],
'???': [],
'Solar photovoltaic panels, 2.5kWp': ["solar_pv"],
'Heating controls (programmer, room thermostat and thermostatic radiator valves)': [
"roomstat_programmer_trvs", "time_temperature_zone_control"
],
'Translation missing: en.improvement_code.41.title': [],
"Condensing boiler (separate from the range cooker)": ["boiler_upgrade"],
"Heating controls (programmer and thermostatic radiator valves)": [
"roomstat_programmer_trvs", "time_temperature_zone_control"
],
'Heating controls (programmer room thermostat and thermostatic radiator valves)': [
"roomstat_programmer_trvs", "time_temperature_zone_control"
],
"Internal wall insulation": ["internal_wall_insulation"],
"High heat retention storage heaters and dual immersion cylinder and dual rate meter": [
"high_heat_retention_storage_heaters"
],
"High heat retention storage heaters and dual rate meter": [
"high_heat_retention_storage_heaters"
],
"Increase loft insulation to 250mm": ["loft_insulation"],
"Solar photovoltaics panels, 25% of roof area": ["solar_pv"],
'Air or ground source heat pump': ["air_source_heat_pump"],
"Add PV Battery": ["solar_pv_battery"],
"Add PV diverter": ["solar_pv_diverter"], # Don't have a recommendation yet
"Draughtproof single-glazed windows": ["double_glazing"],
"Upgrade heating controls": ["roomstat_programmer_trvs", "time_temperature_zone_control"],
"Low energy lighting recommendation": ["low_energy_lighting"],
"Install cavity wall insulation": ["cavity_wall_insulation"],
"Install solar water heating": ["solar_water_heating"],
'Install photovoltaics, 25% of roof area': ["solar_pv"],
}
survey = True
if sap_2012_date is not None:
certificate_date = datetime.strptime(assessment_data["Date of certificate"], "%d %B %Y")
if certificate_date < pd.to_datetime(sap_2012_date):
survey = False
formatted_recommendations = []
for rec in recommendations:
mapped = measure_map[rec["measure"]]
for measure in mapped:
if measure == "cavity_wall_insulation" and "solid brick" in self.walls[0].lower():
measure = "extension_cavity_wall_insulation"
to_append = {
"type": measure,
"sap_points": rec["sap_points"],
"survey": survey,
}
if measure == "solar_pv":
to_append["suitable"] = True
formatted_recommendations.append(to_append)
return formatted_recommendations
@classmethod
def get_from_epc(cls, epc, epc_page_source=None, rrn=None, address_postal_town=None, sap_rating=None):
if epc_page_source is not None and rrn is None:
raise ValueError("rrn must be provided if epc_page_source is provided")
searcher = cls(
address=epc["address"], postcode=epc["postcode"], address_postal_town=address_postal_town,
sap_rating=sap_rating
)
find_epc_data = searcher.retrieve_newest_find_my_epc_data(epc_page_source=epc_page_source, rrn=rrn)
non_invasive_recommendations = {
"uprn": epc["uprn"],
"address": epc["address"],
"postcode": epc["postcode"],
"recommendations": find_epc_data.get("recommendations", []),
}
lodgment_date = find_epc_data.get("Date of certificate", None)
if not pd.isnull(lodgment_date):
lodgment_date = str(datetime.strptime(str(lodgment_date), "%d %B %Y"))
# We need to add the patch information
patch = {
"current-energy-rating": find_epc_data.get("current_epc_rating"),
"current-energy-efficiency": find_epc_data.get("current_epc_efficiency"),
"potential-energy-rating": find_epc_data.get("potential_epc_rating"),
"potential-energy-efficiency": find_epc_data.get("potential_epc_efficiency"),
**find_epc_data.get("epc_data", {}),
"lodgement-date": lodgment_date
}
page_source = {
"rrn": find_epc_data.get("epc_certificate"),
"page_source": find_epc_data.get("page_source")
}
property_components = find_epc_data.get("property_components", [])
return non_invasive_recommendations, patch, page_source, property_components
@classmethod
def get_from_epc_with_fallback(
cls, epc, epc_page, rrn, cleaned_address=None, config_address=None, address_postal_town=None
):
"""
Attempt get_from_epc with:
1) Original EPC
2) EPC with cleaned address
3) EPC with configured address
in that order.
"""
# The data we'll use to attempt retrieval
# 1) Original
attempts = [epc]
# 2) Cleaned
if cleaned_address:
modified = deepcopy(epc)
for k in ["address", "address1"]:
modified[k] = cleaned_address
attempts.append(modified)
# 3) Config address fallback
if config_address:
modified = deepcopy(epc)
for k in ["address", "address1"]:
modified[k] = config_address
attempts.append(modified)
sap_rating = float(epc["current-energy-efficiency"])
# Iterate attempts
last_error = None
for idx, attempt in enumerate(attempts, start=1):
try:
return cls.get_from_epc(
attempt, epc_page, rrn=rrn, address_postal_town=address_postal_town, sap_rating=sap_rating
)
except Exception as e:
last_error = e
logger.error(f"Attempt {idx} failed: {e}")
raise RuntimeError(f"All EPC retrieval attempts failed: {last_error}")