import time import re import requests import pandas as pd from copy import deepcopy from bs4 import BeautifulSoup from datetime import datetime from utils.logger import setup_logger logger = setup_logger() class RetrieveFindMyEpc: SEARCH_POSTCODE_URL = ( "https://find-energy-certificate.service.gov.uk/find-a-certificate/search-by-postcode?postcode={postcode_input}" ) BASE_ENERGY_URL = "https://find-energy-certificate.service.gov.uk" HEADERS = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/111.0.0.0 Safari/537.36' } def __init__( self, address: str, postcode: str, rrn: str = None, address_postal_town: str = "", sap_rating: int = None ): """ This class is tasked with retrieving the latest EPC data from the find my epc website :param address: The address of the property :param postcode: The postcode of the property :param rrn: The RRN of the EPC (if known) """ self.address = address self.postcode = postcode self.rrn = rrn self.address_cleaned = self.address.replace(",", "").replace(" ", "").lower() # Containers for the extracted components self.walls = [] self.address_postal_town = address_postal_town if self.address_postal_town: self.address_postal_town = self.address_postal_town.replace(",", "").replace(" ", "").lower() self.sap_rating = sap_rating @staticmethod def extract_low_carbon_sources(soup): # Find the section header section_header = soup.find("h3", string="Low and zero carbon energy sources") if not section_header: return {} # Locate the list following the header energy_list = section_header.find_next("ul") # Extract the list items sources = {item.get_text(strip=True): True for item in energy_list.find_all("li")} return sources @staticmethod def get_text(elem): return elem.get_text(strip=True) if elem else None def extract_epc_data(self, soup): results = {} # 1. Total floor area # We have some isntances of very old EPCs where the total floor area is not available tfa = self.get_text( soup.find("dt", string="Total floor area").find_next_sibling("dd") ).split(" ")[0] results['total-floor-area'] = int(tfa) if tfa != "Not" else None # Table with features rows = soup.select("table.govuk-table tbody tr") rating_map = { "Very poor": "Very Poor", "Very good": "Very Good" } def get_feature_row_text(feature_name, index=0): matches = [row for row in rows if row.find("th") and feature_name in row.find("th").text] if len(matches) > index: # A commonly seen case is when feature_name is Main heating and we want to make sure we get # main heating and not main heating control if feature_name == "Main heating": matches = [ row for row in matches if row.find("th") and row.find("th").text.strip() == "Main heating" ] cells = matches[index].find_all("td") description = self.get_text(cells[0]) rating = self.get_text(cells[1]) return description, rating_map.get(rating, rating) return None, None # 2-3. First wall description and rating results['walls-description'], results['walls-energy-eff'] = get_feature_row_text("Wall", 0) # 4-5. First roof description and rating results['roof-description'], results['roof-energy-eff'] = get_feature_row_text("Roof", 0) # 6-7. Windows description and rating results['windows-description'], results['windows-energy-eff'] = get_feature_row_text("Window") # 8-9. Main heating description and rating results['mainheat-description'], results['mainheat-energy-eff'] = get_feature_row_text("Main heating") # 10-11. Main heating control description and rating results['mainheatcont-description'], results['mainheatc-energy-eff'] = get_feature_row_text( "Main heating control" ) # 12-13. Hot water description and rating results['hotwater-description'], results['hot-water-energy-ef'] = get_feature_row_text("Hot water") # 14-15. Lighting description and rating results['lighting-description'], results['lighting-energy-eff'] = get_feature_row_text("Lighting") # 16. Floor description results['floor-description'], _ = get_feature_row_text("Floor") # 17. Secondary heating description results['secondheat-description'], _ = get_feature_row_text("Secondary heating") # 18. Primary energy use p_energy = soup.find(string=lambda t: "primary energy use for this property per year" in t.lower()) # We should always have this match = re.search(r"(\d+)\s+kilowatt", p_energy) results['energy-consumption-current'] = int(match.group(1)) if match else None # 19. Current CO2 emissions co2_now = soup.find("dd", id="eir-property-produces") # We should always have this match = re.search(r"([\d.]+)", co2_now.text) results['co2-emissions-current'] = float(match.group(1)) if match else None # Need co2-emiss-curr-per-floor-area # 20. Potential CO2 emissions co2_pot = soup.find("dd", id="eir-potential-production") match = re.search(r"([\d.]+)", co2_pot.text) results['co2-emissions-potential'] = float(match.group(1)) if match else None return results def _extract_epc_from_soup(self, soup, epc_certificate, sap_2012_date=None): ratings = soup.find('desc', {'id': 'svg-desc'}).text current_rating = ratings.split(".")[0] potential_rating = ratings.split(".")[1] current_sap = int(current_rating.split(' ')[-1]) # Retrieve the energy consumption bills = soup.find('div', {'id': 'bills-affected'}) bills_list = bills.find_all('li') if not bills_list: # If this is the case, it's usually becaue the EPC was very old. Early EPCs did not have this information heating_text = None hot_water_text = None else: heating_text = bills_list[0].text hot_water_text = bills_list[1].text # Retrieve the recommendations and SAP points recommendations = [] recommendations_div = soup.find('div', class_='epb-recommended-improvements') if recommendations_div: # Find all h3 headers for each step and extract their related information step_headers = recommendations_div.find_all('h3', class_='govuk-heading-m') previous_sap_score = current_sap previous_epc = current_rating.split(' ')[-6] for step_num, step_header in enumerate(step_headers, start=1): # Extract the step title (the measure) measure_title = step_header.text.strip().replace(f"Step {step_num}: ", "") # Find the div containing the potential rating within the same section potential_rating_div = step_header.find_next( 'div', class_='epb-recommended-improvements__potential-rating' ) # Check if the potential rating div is found if potential_rating_div: # Extract the rating text within the SVG text element extracted_rating_text = potential_rating_div.find('text', class_='govuk-!-font-weight-bold') if extracted_rating_text is not None: rating_text = extracted_rating_text.text.strip() else: rating_text = " ".join([str(previous_sap_score), previous_epc]) # Parse the rating text to separate the numeric rating and EPC letter new_rating = int(rating_text.split()[0]) new_epc = rating_text.split()[1] # Append the information as a dictionary to the recommendations list recommendations.append({ "step": step_num, "measure": measure_title, "new_rating": new_rating, "new_epc": new_epc, "sap_points": new_rating - previous_sap_score }) previous_sap_score = new_rating previous_epc = new_epc # Search for the assessment informaton assessment_information = soup.find('div', {'id': 'information'}) # Parse this information rows = assessment_information.find_all('div', class_='govuk-summary-list__row') # Create a dictionary to hold the parsed information assessment_data = {} for row in rows: key = row.find('dt').text.strip() if key == "Type of assessment": # We dont reliably extract this continue value_tag = row.find('dd') # Check if value contains a link (email) if value_tag.find('a'): value = value_tag.find('a').text.strip() elif value_tag.find('summary'): value = value_tag.find('span').text.strip() else: value = value_tag.text.strip() # These are keys that we have for both the surveyor and the acreditation scheme. Firstly, we'll # get the surveyor's name and email so we make that information clear if key in ["Telephone", "Email"]: if "Assessor's " + key not in assessment_data: assessment_data["Assessor's " + key] = value else: assessment_data["Accreditation Scheme's " + key] = value continue assessment_data[key] = value expected_keys = [ 'Assessor’s name', "Assessor's Telephone", "Assessor's Email", 'Assessor’s ID', 'Accreditation scheme', 'Assessor’s declaration', "Accreditation Scheme's Telephone", "Accreditation Scheme's Email", 'Date of assessment', 'Date of certificate' ] # Check we have all the expected keys for key in expected_keys: if key not in assessment_data: raise ValueError(f"Missing key: {key}") # The wall types of the property property_features_table = soup.find("tbody", class_="govuk-table__body") property_features_table = property_features_table.find_all("tr") property_components = self.extract_property_components(property_features_table) # Extract walls self.walls = [x["description"] for x in property_components if x["component_name"] == "Wall"] # Finally, we format the recommendations recommendations = self.format_recommendations(recommendations, assessment_data, sap_2012_date) # 4) Low and zero carbon energy sources low_carbon_energy_sources = self.extract_low_carbon_sources(soup) # 5) Pull out the EPC data epc_data = self.extract_epc_data(soup) resulting_data = { 'epc_certificate': epc_certificate, 'current_epc_rating': current_rating.split(' ')[-6], 'current_epc_efficiency': current_sap, 'potential_epc_rating': potential_rating.split(' ')[-6], "potential_epc_efficiency": int(potential_rating.split(' ')[-1]), "heating_text": heating_text, "hot_water_text": hot_water_text, "recommendations": recommendations, "epc_data": epc_data, **assessment_data, **low_carbon_energy_sources, } return resulting_data def retrieve_all_find_my_epc_data(self, sap_2012_date=None): """ This is a quick function to retrieve all the data from the find my epc website for a given postcode and address. Using this to fulfill a short term need to retrieve all history for a property :param sap_2012_date: :return: """ if self.rrn: # We build the URL directly epc_certificate = self.rrn chosen_epc = f"{self.BASE_ENERGY_URL}/energy-certificate/{epc_certificate}" else: chosen_epc, epc_certificate = self._find_epc_page() address_response = requests.get(chosen_epc, headers=self.HEADERS) address_res = BeautifulSoup(address_response.text, features="html.parser") # We check the section on "Other cerificates for this property and get the url" # Find the section for other certificates other_cert_section = address_res.find('div', id='other_certificates_and_reports') # Extract all certificate number rows (anchor tags within a govuk-summary-list) other_cert_links = other_cert_section.select('dd.govuk-summary-list__value a') other_certificates = [] for link in other_cert_links: cert_number = link.text.strip() cert_url = link['href'].strip() other_certificates.append({ "certificate_number": cert_number, "certificate_url": f"https://find-energy-certificate.service.gov.uk{cert_url}" }) # Always include the currently selected EPC first soup_list = [address_res] # Add additional historic certificates for link in other_cert_links: cert_url = f"https://find-energy-certificate.service.gov.uk{link['href'].strip()}" response = requests.get(cert_url, headers=self.HEADERS) time.sleep(0.3) soup_list.append(BeautifulSoup(response.text, features="html.parser")) all_find_my_epc_data = [] for soup in soup_list: # Start with the primary one all_find_my_epc_data.append(self._extract_epc_from_soup(soup, epc_certificate, sap_2012_date)) return all_find_my_epc_data def _find_epc_page(self): """ This function is used to find the EPC page source for a given address and postcode. It is done by fetching the page, associating to the postcode and then matching the addresses on the page to the address we have been given. :return: """ postcode_input = self.postcode.replace(" ", "+") postcode_search = self.SEARCH_POSTCODE_URL.format(postcode_input=postcode_input) postcode_response = requests.get(postcode_search, headers=self.HEADERS) postcode_res = BeautifulSoup(postcode_response.text, features="html.parser") rows = postcode_res.find_all('tr', class_='govuk-table__row') extracted_table, backup_flat = [], [] for row in rows: # Extract the address and URL address_tag = row.find('a', class_='govuk-link') if address_tag is None: continue extracted_address = None extracted_address_url = None if address_tag: extracted_address = address_tag.text.strip() extracted_address_url = address_tag['href'] extracted_address_cleaned = ( extracted_address.replace(",", "").replace(" ", "").lower() ) no_primary_match = not extracted_address_cleaned.startswith(self.address_cleaned) no_backup_match = True if not self.address_postal_town else not ( extracted_address_cleaned.startswith(self.address_postal_town) ) if no_primary_match and no_backup_match: if self.address_cleaned.startswith("flat"): # We have a flat address, so we can try and match without the flat number flat_removed_address = self.address_cleaned[4:] if extracted_address_cleaned.startswith(flat_removed_address): # We have a backup match backup_flat.append( { "extracted_address": extracted_address, "extracted_address_url": extracted_address_url, } ) continue # If the address is a match, we can extract the data # Extract the expiry date expiry_date_tag = row.find('td', class_='govuk-table__cell date') expiry_date = None if expiry_date_tag is not None: expiry_date = expiry_date_tag.parent.find('span').text.strip() extracted_table.append( { "extracted_address": extracted_address, "extracted_address_url": extracted_address_url, "expiry_date": datetime.strptime(expiry_date, '%d %B %Y'), } ) if not extracted_table and not backup_flat: # This is a relatively new change, as of November 2025, but we see cases where properties do not # have data appearing on the find my EPC website, particularly for older EPCs. In this case, we allo # for us to not find any information and return nothing return None, None if not extracted_table: extracted_table = deepcopy(backup_flat) if len(extracted_table) > 1: # We take the one with the most recent expiry date extracted_table = sorted(extracted_table, key=lambda x: x['expiry_date'], reverse=True) chosen_epc = self.BASE_ENERGY_URL + extracted_table[0]['extracted_address_url'] epc_certificate = chosen_epc.split('/')[-1] return chosen_epc, epc_certificate @staticmethod def extract_property_components(property_features_table: list): """ Function to pull out a table for property components, marking their appearance index :param property_features_table: The table of property features, as extracted by BeautifulSoup :return: List of property components with appearance index """ property_components = [] for row in property_features_table: cells = row.find_all("td") component_name = row.find("th").text.strip() property_components.append( { "component_name": component_name, "description": cells[0].text.strip(), "efficiency": cells[1].text.strip(), } ) # Add an appearance index, which will indicate if the component appears multiple times, so this # becomes a reference for the building part the component is associated to (main, extensions, etc) # We want to inject this appearance index into the component dictionaries component_count = {} for component in property_components: name = component['component_name'] if name not in component_count: component_count[name] = 0 component['appearance_index'] = component_count[name] component_count[name] += 1 return property_components def retrieve_newest_find_my_epc_data( self, sap_2012_date=None, return_page=False, epc_page_source=None, rrn=None ): """ For a post code and address, we pull out all the required data from the find my epc website """ if epc_page_source is None and rrn is None: chosen_epc, rrn = self._find_epc_page() if chosen_epc is None: # We have no resulting data logger.info("No EPC found for address %s, postcode %s", self.address, self.postcode) return {} address_response = requests.get(chosen_epc, headers=self.HEADERS) epc_page_source = address_response.text address_res = BeautifulSoup(address_response.text, features="html.parser") elif self.rrn or rrn: epc_certificate = self.rrn if self.rrn else rrn chosen_epc = f"{self.BASE_ENERGY_URL}/energy-certificate/{epc_certificate}" address_response = requests.get(chosen_epc, headers=self.HEADERS) epc_page_source = address_response.text address_res = BeautifulSoup(address_response.text, features="html.parser") else: if rrn is None: raise ValueError("rrn must be provided if epc_page_source is provided") address_res = BeautifulSoup(epc_page_source, features="html.parser") # Key data we want to retrieve: # 1) Rating # 2) Bills estimates # 3) Recommendations and SAP points # 4) Low and zero carbon energy sources # 5) The wall types of the property - used for determining if we have an extension wall insulation# # recommendation ratings = address_res.find('desc', {'id': 'svg-desc'}).text current_rating = ratings.split(".")[0] potential_rating = ratings.split(".")[1] current_sap = int(current_rating.split(' ')[-1]) if self.sap_rating: if current_sap != self.sap_rating and not rrn: # This means we likely have the wrong data. If we are in this scenario, we return nothing return { "epc_certificate": None, "page_source": None, } # Retrieve the energy consumption bills = address_res.find('div', {'id': 'bills-affected'}) bills_list = bills.find_all('li') if not bills_list: # If this is the case, it's usually becaue the EPC was very old. Early EPCs did not have this information heating_text = None hot_water_text = None else: heating_text = bills_list[0].text hot_water_text = bills_list[1].text # Retrieve the recommendations and SAP points recommendations = [] recommendations_div = address_res.find('div', class_='epb-recommended-improvements') if recommendations_div: # Find all h3 headers for each step and extract their related information step_headers = recommendations_div.find_all('h3', class_='govuk-heading-m') previous_sap_score = current_sap previous_epc = current_rating.split(' ')[-6] for step_num, step_header in enumerate(step_headers, start=1): # Extract the step title (the measure) measure_title = step_header.text.strip().replace(f"Step {step_num}: ", "") # Find the div containing the potential rating within the same section potential_rating_div = step_header.find_next( 'div', class_='epb-recommended-improvements__potential-rating' ) # Check if the potential rating div is found if potential_rating_div: # Extract the rating text within the SVG text element extracted_rating_text = potential_rating_div.find('text', class_='govuk-!-font-weight-bold') if extracted_rating_text is not None: rating_text = extracted_rating_text.text.strip() else: rating_text = " ".join([str(previous_sap_score), previous_epc]) # Parse the rating text to separate the numeric rating and EPC letter new_rating = int(rating_text.split()[0]) new_epc = rating_text.split()[1] # Append the information as a dictionary to the recommendations list recommendations.append({ "step": step_num, "measure": measure_title, "new_rating": new_rating, "new_epc": new_epc, "sap_points": new_rating - previous_sap_score }) previous_sap_score = new_rating previous_epc = new_epc # Search for the assessment informaton assessment_information = address_res.find('div', {'id': 'information'}) # Parse this information rows = assessment_information.find_all('div', class_='govuk-summary-list__row') # Create a dictionary to hold the parsed information assessment_data = {} for row in rows: key = row.find('dt').text.strip() if key == "Type of assessment": # We dont reliably extract this continue value_tag = row.find('dd') # Check if value contains a link (email) if value_tag.find('a'): value = value_tag.find('a').text.strip() elif value_tag.find('summary'): value = value_tag.find('span').text.strip() else: value = value_tag.text.strip() # These are keys that we have for both the surveyor and the acreditation scheme. Firstly, we'll # get the surveyor's name and email so we make that information clear if key in ["Telephone", "Email"]: if "Assessor's " + key not in assessment_data: assessment_data["Assessor's " + key] = value else: assessment_data["Accreditation Scheme's " + key] = value continue assessment_data[key] = value expected_keys = [ 'Assessor’s name', "Assessor's Telephone", "Assessor's Email", 'Assessor’s ID', 'Accreditation scheme', 'Assessor’s declaration', "Accreditation Scheme's Telephone", "Accreditation Scheme's Email", 'Date of assessment', 'Date of certificate' ] # Check we have all the expected keys for key in expected_keys: if key not in assessment_data: raise ValueError(f"Missing key: {key}") # The wall types of the property property_features_table = address_res.find("tbody", class_="govuk-table__body") property_features_table = property_features_table.find_all("tr") property_components = self.extract_property_components(property_features_table) # Extract walls self.walls = [x["description"] for x in property_components if x["component_name"] == "Wall"] # Finally, we format the recommendations recommendations = self.format_recommendations(recommendations, assessment_data, sap_2012_date) # 4) Low and zero carbon energy sources low_carbon_energy_sources = self.extract_low_carbon_sources(address_res) # 5) Pull out the EPC data epc_data = self.extract_epc_data(address_res) # Pull out the address information which can be found in the box with the class "epc-address" # We split it up on break tags addr = address_res.find("p", class_="epc-address").get_text(separator="\n").strip() lines = addr.split("\n") if len(lines) > 2: address1 = lines[0] address2 = lines[1] postcode = lines[-1] else: address1 = lines[0] address2 = "" postcode = lines[-1] resulting_data = { 'epc_certificate': rrn, 'current_epc_rating': current_rating.split(' ')[-6], 'current_epc_efficiency': current_sap, 'potential_epc_rating': potential_rating.split(' ')[-6], "potential_epc_efficiency": int(potential_rating.split(' ')[-1]), "heating_text": heating_text, "hot_water_text": hot_water_text, "recommendations": recommendations, "property_components": property_components, "epc_data": epc_data, **assessment_data, **low_carbon_energy_sources, "page_source": epc_page_source, # Add in address a postcode from the page - covers use cases where we are given RRN "address1": address1, "address2": address2, "postcode": postcode, } if return_page: # We return the page text as well, which can be parsed again later return resulting_data, epc_page_source return resulting_data def format_recommendations(self, recommendations, assessment_data, sap_2012_date=None): """ This function converts the recommendations to a format that we can use in the engine as a non-intrusive survey :param recommendations: The recommendations from the EPC :param assessment_data: The assessment data from the EPC :param sap_2012_date: The date of the SAP 2012 update """ measure_map = { "Internal or external wall insulation": ["internal_wall_insulation", "external_wall_insulation"], "Hot water cylinder insulation": ["hot_water_tank_insulation"], "Hot water cylinder thermostat": ["cylinder_thermostat"], "High performance external doors": ["insulated_doors"], "Floor insulation (solid floor)": ["solid_floor_insulation"], "Floor insulation (suspended floor)": ["suspended_floor_insulation"], "Double glazed windows": ["double_glazing"], "Cavity wall insulation": ["cavity_wall_insulation"], "Replace boiler with new condensing boiler": ["boiler_upgrade"], "Floor insulation": ["floor_insulation"], # Recommendation typically associated to older EPCs "Heating controls (programmer, room thermostat and TRVs)": [ "roomstat_programmer_trvs", "time_temperature_zone_control" ], "Low energy lighting": ["low_energy_lighting"], "Increase loft insulation to 270 mm": ["loft_insulation"], "Heating controls (thermostatic radiator valves)": [ "roomstat_programmer_trvs", "time_temperature_zone_control" ], "Solar water heating": ["solar_water_heating"], "Solar photovoltaic panels, 2.5 kWp": ["solar_pv"], "Heating controls (room thermostat and TRVs)": [ "roomstat_programmer_trvs", "time_temperature_zone_control" ], "Change heating to gas condensing boiler": ["boiler_upgrade"], "Fan assisted storage heaters and dual immersion cylinder": ["high_heat_retention_storage_heaters"], "Flat roof or sloping ceiling insulation": ["flat_roof_insulation", "sloping_ceiling_insulation"], "Heating controls (room thermostat)": [ "roomstat_programmer_trvs", "time_temperature_zone_control" ], "Band A condensing boiler": ["boiler_upgrade"], "Double glazing": ["double_glazing"], "Flue gas heat recovery device in conjunction with boiler": ["flue_gas_heat_recovery"], "Wind turbine": ["wind_turbine"], "Loft insulation": ["loft_insulation"], "Solar photovoltaic (PV) panels": ["solar_pv"], "Party wall insulation": ["party_wall_insulation"], 'Draught proofing': ["draught_proofing"], "Roof insulation recommendation": [], "Cavity wall insulation recommendation": [], "Windows draught proofing": [], "Low energy lighting for all fixed outlets": ["low_energy_lighting"], "Cylinder thermostat recommendation": [], "Heating controls recommendation": [], "Replace boiler with Band A condensing boiler": ["boiler_upgrade"], "Band A condensing gas boiler": ["boiler_upgrade"], "Install Band A condensing heating unit": ["boiler_upgrade"], "Solar panel recommendation": [], "Double glazing recommendation": [], "Solid wall insulation recommendation": [], "Fuel change recommendation": [], "PV Cells recommendation": [], "Replacement glazing units": ["double_glazing"], "Heating controls (time and temperature zone control)": ["time_temperature_zone_control"], "High heat retention storage heaters": ["high_heat_retention_storage_heaters"], "Gas condensing boiler": ["boiler_upgrade"], "Change room heaters to condensing boiler": ["boiler_upgrade"], "Cylinder thermostat": ["cylinder_thermostat"], "Heat recovery system for mixer showers": ["heat_recovery_shower"], "Room-in-roof insulation": ["room_in_roof_insulation"], "Fan assisted storage heaters": [], "Fan-assisted storage heaters": [], "Step 1:": [], "Step 2:": [], 'Step 3:': [], 'Step 4:': [], 'Step 5:': [], "Biomass stove with boiler": [], "Replace boiler with biomass boiler": [], "Heating controls (room thermostat and thermostatic radiator valves)": [ "roomstat_programmer_trvs", "time_temperature_zone_control" ], "Heating controls (programmer, and thermostatic radiator valves)": [ "roomstat_programmer_trvs", "time_temperature_zone_control" ], "Heating controls (programmer and TRVs)": [ "roomstat_programmer_trvs", "time_temperature_zone_control" ], "Heating controls (programmer and room thermostat)": [ "roomstat_programmer_trvs", "time_temperature_zone_control" ], "Replacement warm air unit": [], "Secondary glazing": ["secondary_glazing"], "Condensing heating unit": ["boiler_upgrade"], '???': [], 'Solar photovoltaic panels, 2.5kWp': ["solar_pv"], 'Heating controls (programmer, room thermostat and thermostatic radiator valves)': [ "roomstat_programmer_trvs", "time_temperature_zone_control" ], 'Translation missing: en.improvement_code.41.title': [], "Condensing boiler (separate from the range cooker)": ["boiler_upgrade"], "Heating controls (programmer and thermostatic radiator valves)": [ "roomstat_programmer_trvs", "time_temperature_zone_control" ], 'Heating controls (programmer room thermostat and thermostatic radiator valves)': [ "roomstat_programmer_trvs", "time_temperature_zone_control" ], "Internal wall insulation": ["internal_wall_insulation"], "High heat retention storage heaters and dual immersion cylinder and dual rate meter": [ "high_heat_retention_storage_heaters" ], "High heat retention storage heaters and dual rate meter": [ "high_heat_retention_storage_heaters" ], "Increase loft insulation to 250mm": ["loft_insulation"], "Solar photovoltaics panels, 25% of roof area": ["solar_pv"], 'Air or ground source heat pump': ["air_source_heat_pump"], "Add PV Battery": ["solar_pv_battery"], "Add PV diverter": ["solar_pv_diverter"], # Don't have a recommendation yet "Draughtproof single-glazed windows": ["double_glazing"], "Upgrade heating controls": ["roomstat_programmer_trvs", "time_temperature_zone_control"], "Low energy lighting recommendation": ["low_energy_lighting"], "Install cavity wall insulation": ["cavity_wall_insulation"], "Install solar water heating": ["solar_water_heating"], 'Install photovoltaics, 25% of roof area': ["solar_pv"], } survey = True if sap_2012_date is not None: certificate_date = datetime.strptime(assessment_data["Date of certificate"], "%d %B %Y") if certificate_date < pd.to_datetime(sap_2012_date): survey = False formatted_recommendations = [] for rec in recommendations: mapped = measure_map[rec["measure"]] for measure in mapped: if measure == "cavity_wall_insulation" and "solid brick" in self.walls[0].lower(): measure = "extension_cavity_wall_insulation" to_append = { "type": measure, "sap_points": rec["sap_points"], "survey": survey, } if measure == "solar_pv": to_append["suitable"] = True formatted_recommendations.append(to_append) return formatted_recommendations @classmethod def get_from_epc(cls, epc, epc_page_source=None, rrn=None, address_postal_town=None, sap_rating=None): if epc_page_source is not None and rrn is None: raise ValueError("rrn must be provided if epc_page_source is provided") searcher = cls( address=epc["address"], postcode=epc["postcode"], address_postal_town=address_postal_town, sap_rating=sap_rating ) find_epc_data = searcher.retrieve_newest_find_my_epc_data(epc_page_source=epc_page_source, rrn=rrn) non_invasive_recommendations = { "uprn": epc["uprn"], "address": epc["address"], "postcode": epc["postcode"], "recommendations": find_epc_data.get("recommendations", []), } lodgment_date = find_epc_data.get("Date of certificate", None) if not pd.isnull(lodgment_date): lodgment_date = str(datetime.strptime(str(lodgment_date), "%d %B %Y")) # We need to add the patch information patch = { "current-energy-rating": find_epc_data.get("current_epc_rating"), "current-energy-efficiency": find_epc_data.get("current_epc_efficiency"), "potential-energy-rating": find_epc_data.get("potential_epc_rating"), "potential-energy-efficiency": find_epc_data.get("potential_epc_efficiency"), **find_epc_data.get("epc_data", {}), "lodgement-date": lodgment_date } page_source = { "rrn": find_epc_data.get("epc_certificate"), "page_source": find_epc_data.get("page_source") } property_components = find_epc_data.get("property_components", []) return non_invasive_recommendations, patch, page_source, property_components @classmethod def get_from_epc_with_fallback( cls, epc, epc_page, rrn, cleaned_address=None, config_address=None, address_postal_town=None ): """ Attempt get_from_epc with: 1) Original EPC 2) EPC with cleaned address 3) EPC with configured address in that order. """ # The data we'll use to attempt retrieval # 1) Original attempts = [epc] # 2) Cleaned if cleaned_address: modified = deepcopy(epc) for k in ["address", "address1"]: modified[k] = cleaned_address attempts.append(modified) # 3) Config address fallback if config_address: modified = deepcopy(epc) for k in ["address", "address1"]: modified[k] = config_address attempts.append(modified) sap_rating = float(epc["current-energy-efficiency"]) # Iterate attempts last_error = None for idx, attempt in enumerate(attempts, start=1): try: return cls.get_from_epc( attempt, epc_page, rrn=rrn, address_postal_town=address_postal_town, sap_rating=sap_rating ) except Exception as e: last_error = e logger.error(f"Attempt {idx} failed: {e}") raise RuntimeError(f"All EPC retrieval attempts failed: {last_error}")