mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
357 lines
16 KiB
Python
357 lines
16 KiB
Python
import pandas as pd
|
||
import requests
|
||
from bs4 import BeautifulSoup
|
||
from datetime import datetime
|
||
|
||
|
||
class RetrieveFindMyEpc:
|
||
SEARCH_POSTCODE_URL = (
|
||
"https://find-energy-certificate.service.gov.uk/find-a-certificate/search-by-postcode?postcode={postcode_input}"
|
||
)
|
||
BASE_ENERGY_URL = "https://find-energy-certificate.service.gov.uk"
|
||
|
||
HEADERS = {
|
||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
|
||
'Chrome/111.0.0.0 Safari/537.36'
|
||
}
|
||
|
||
def __init__(self, address: str, postcode: str):
|
||
"""
|
||
This class is tasked with retrieving the latest EPC data from the find my epc website
|
||
:param address: The address of the property
|
||
:param postcode: The postcode of the property
|
||
"""
|
||
self.address = address
|
||
self.postcode = postcode
|
||
|
||
self.address_cleaned = self.address.replace(",", "").replace(" ", "").lower()
|
||
self.walls = []
|
||
|
||
@staticmethod
|
||
def extract_low_carbon_sources(soup):
|
||
# Find the section header
|
||
section_header = soup.find("h3", string="Low and zero carbon energy sources")
|
||
if not section_header:
|
||
return {}
|
||
|
||
# Locate the list following the header
|
||
energy_list = section_header.find_next("ul")
|
||
|
||
# Extract the list items
|
||
sources = {item.get_text(strip=True): True for item in energy_list.find_all("li")}
|
||
return sources
|
||
|
||
def retrieve_newest_find_my_epc_data(self, sap_2012_date=None):
|
||
"""
|
||
For a post code and address, we pull out all the required data from the find my epc website
|
||
"""
|
||
|
||
postcode_input = self.postcode.replace(" ", "+")
|
||
postcode_search = self.SEARCH_POSTCODE_URL.format(postcode_input=postcode_input)
|
||
postcode_response = requests.get(postcode_search, headers=self.HEADERS)
|
||
|
||
postcode_res = BeautifulSoup(postcode_response.text, features="html.parser")
|
||
rows = postcode_res.find_all('tr', class_='govuk-table__row')
|
||
|
||
extracted_table = []
|
||
for row in rows:
|
||
# Extract the address and URL
|
||
address_tag = row.find('a', class_='govuk-link')
|
||
if address_tag is None:
|
||
continue
|
||
extracted_address = None
|
||
extracted_address_url = None
|
||
if address_tag:
|
||
extracted_address = address_tag.text.strip()
|
||
extracted_address_url = address_tag['href']
|
||
|
||
extracted_address_cleaned = extracted_address.replace(",", "").replace(" ", "").lower()
|
||
if not extracted_address_cleaned.startswith(self.address_cleaned):
|
||
continue
|
||
|
||
# If the address is a match, we can extract the data
|
||
|
||
# Extract the expiry date
|
||
expiry_date_tag = row.find('td', class_='govuk-table__cell date')
|
||
expiry_date = None
|
||
if expiry_date_tag is not None:
|
||
expiry_date = expiry_date_tag.parent.find('span').text.strip()
|
||
|
||
extracted_table.append(
|
||
{
|
||
"extracted_address": extracted_address,
|
||
"extracted_address_url": extracted_address_url,
|
||
"expiry_date": datetime.strptime(expiry_date, '%d %B %Y'),
|
||
}
|
||
)
|
||
|
||
if not extracted_table:
|
||
raise ValueError("No EPC found")
|
||
|
||
if len(extracted_table) > 1:
|
||
# We take the one with the most recent expiry date
|
||
extracted_table = sorted(extracted_table, key=lambda x: x['expiry_date'], reverse=True)
|
||
|
||
chosen_epc = self.BASE_ENERGY_URL + extracted_table[0]['extracted_address_url']
|
||
epc_certificate = chosen_epc.split('/')[-1]
|
||
|
||
address_response = requests.get(chosen_epc, headers=self.HEADERS)
|
||
address_res = BeautifulSoup(address_response.text, features="html.parser")
|
||
|
||
# Key data we want to retrieve:
|
||
# 1) Rating
|
||
# 2) Bills estimates
|
||
# 3) Recommendations and SAP points
|
||
# 4) Low and zero carbon energy sources
|
||
# 5) The wall types of the property - used for determining if we have an extension wall insulation#
|
||
# recommendation
|
||
|
||
ratings = address_res.find('desc', {'id': 'svg-desc'}).text
|
||
current_rating = ratings.split(".")[0]
|
||
potential_rating = ratings.split(".")[1]
|
||
current_sap = int(current_rating.split(' ')[-1])
|
||
|
||
# Retrieve the energy consumption
|
||
bills = address_res.find('div', {'id': 'bills-affected'})
|
||
bills_list = bills.find_all('li')
|
||
if not bills_list:
|
||
# If this is the case, it's usually becaue the EPC was very old. Early EPCs did not have this information
|
||
heating_text = None
|
||
hot_water_text = None
|
||
else:
|
||
heating_text = bills_list[0].text
|
||
hot_water_text = bills_list[1].text
|
||
|
||
# Retrieve the recommendations and SAP points
|
||
recommendations = []
|
||
recommendations_div = address_res.find('div', class_='epb-recommended-improvements')
|
||
if recommendations_div:
|
||
# Find all h3 headers for each step and extract their related information
|
||
step_headers = recommendations_div.find_all('h3', class_='govuk-heading-m')
|
||
previous_sap_score = current_sap
|
||
previous_epc = current_rating.split(' ')[-6]
|
||
for step_num, step_header in enumerate(step_headers, start=1):
|
||
# Extract the step title (the measure)
|
||
measure_title = step_header.text.strip().replace(f"Step {step_num}: ", "")
|
||
|
||
# Find the div containing the potential rating within the same section
|
||
potential_rating_div = step_header.find_next(
|
||
'div', class_='epb-recommended-improvements__potential-rating'
|
||
)
|
||
|
||
# Check if the potential rating div is found
|
||
if potential_rating_div:
|
||
# Extract the rating text within the SVG text element
|
||
extracted_rating_text = potential_rating_div.find('text', class_='govuk-!-font-weight-bold')
|
||
if extracted_rating_text is not None:
|
||
rating_text = extracted_rating_text.text.strip()
|
||
else:
|
||
rating_text = " ".join([str(previous_sap_score), previous_epc])
|
||
# Parse the rating text to separate the numeric rating and EPC letter
|
||
new_rating = int(rating_text.split()[0])
|
||
new_epc = rating_text.split()[1]
|
||
|
||
# Append the information as a dictionary to the recommendations list
|
||
recommendations.append({
|
||
"step": step_num,
|
||
"measure": measure_title,
|
||
"new_rating": new_rating,
|
||
"new_epc": new_epc,
|
||
"sap_points": new_rating - previous_sap_score
|
||
})
|
||
previous_sap_score = new_rating
|
||
previous_epc = new_epc
|
||
|
||
# Search for the assessment informaton
|
||
assessment_information = address_res.find('div', {'id': 'information'})
|
||
# Parse this information
|
||
rows = assessment_information.find_all('div', class_='govuk-summary-list__row')
|
||
# Create a dictionary to hold the parsed information
|
||
assessment_data = {}
|
||
for row in rows:
|
||
key = row.find('dt').text.strip()
|
||
if key == "Type of assessment":
|
||
# We dont reliably extract this
|
||
continue
|
||
value_tag = row.find('dd')
|
||
|
||
# Check if value contains a link (email)
|
||
if value_tag.find('a'):
|
||
value = value_tag.find('a').text.strip()
|
||
elif value_tag.find('summary'):
|
||
value = value_tag.find('span').text.strip()
|
||
else:
|
||
value = value_tag.text.strip()
|
||
|
||
# These are keys that we have for both the surveyor and the acreditation scheme. Firstly, we'll
|
||
# get the surveyor's name and email so we make that information clear
|
||
if key in ["Telephone", "Email"]:
|
||
if "Assessor's " + key not in assessment_data:
|
||
assessment_data["Assessor's " + key] = value
|
||
else:
|
||
assessment_data["Accreditation Scheme's " + key] = value
|
||
continue
|
||
|
||
assessment_data[key] = value
|
||
|
||
expected_keys = [
|
||
'Assessor’s name',
|
||
"Assessor's Telephone",
|
||
"Assessor's Email",
|
||
'Assessor’s ID',
|
||
'Accreditation scheme',
|
||
'Assessor’s declaration',
|
||
"Accreditation Scheme's Telephone",
|
||
"Accreditation Scheme's Email",
|
||
'Date of assessment',
|
||
'Date of certificate'
|
||
]
|
||
# Check we have all the expected keys
|
||
for key in expected_keys:
|
||
if key not in assessment_data:
|
||
raise ValueError(f"Missing key: {key}")
|
||
|
||
# The wall types of the property
|
||
property_features_table = address_res.find("tbody", class_="govuk-table__body")
|
||
property_features_table = property_features_table.find_all("tr")
|
||
|
||
# Extract wall types
|
||
self.walls = []
|
||
for row in property_features_table:
|
||
cells = row.find_all("td")
|
||
if row.find("th").text.strip() == "Wall":
|
||
self.walls.append(cells[0].text.strip())
|
||
|
||
# Finally, we format the recommendations
|
||
recommendations = self.format_recommendations(recommendations, assessment_data, sap_2012_date)
|
||
|
||
# 4) Low and zero carbon energy sources
|
||
low_carbon_energy_sources = self.extract_low_carbon_sources(address_res)
|
||
|
||
resulting_data = {
|
||
'epc_certificate': epc_certificate,
|
||
'current_epc_rating': current_rating.split(' ')[-6],
|
||
'current_epc_efficiency': current_sap,
|
||
'potential_epc_rating': potential_rating.split(' ')[-6],
|
||
"potential_epc_efficiency": int(potential_rating.split(' ')[-1]),
|
||
"heating_text": heating_text,
|
||
"hot_water_text": hot_water_text,
|
||
"recommendations": recommendations,
|
||
**assessment_data,
|
||
**low_carbon_energy_sources
|
||
}
|
||
|
||
return resulting_data
|
||
|
||
def format_recommendations(self, recommendations, assessment_data, sap_2012_date=None):
|
||
"""
|
||
This function converts the recommendations to a format that we can use in the engine as a non-intrusive survey
|
||
:param recommendations: The recommendations from the EPC
|
||
:param assessment_data: The assessment data from the EPC
|
||
:param sap_2012_date: The date of the SAP 2012 update
|
||
"""
|
||
|
||
measure_map = {
|
||
"Internal or external wall insulation": ["internal_wall_insulation", "external_wall_insulation"],
|
||
"Hot water cylinder insulation": ["hot_water_tank_insulation"],
|
||
"Hot water cylinder thermostat": ["cylinder_thermostat"],
|
||
"High performance external doors": ["insulated_doors"],
|
||
"Floor insulation (solid floor)": ["solid_floor_insulation"],
|
||
"Floor insulation (suspended floor)": ["suspended_floor_insulation"],
|
||
"Double glazed windows": ["double_glazing"],
|
||
"Cavity wall insulation": ["cavity_wall_insulation"],
|
||
"Replace boiler with new condensing boiler": ["boiler_upgrade"],
|
||
"Floor insulation": ["floor_insulation"], # Recommendation typically associated to older EPCs
|
||
"Heating controls (programmer, room thermostat and TRVs)": [
|
||
"roomstat_programmer_trvs", "time_temperature_zone_control"
|
||
],
|
||
"Low energy lighting": ["low_energy_lighting"],
|
||
"Increase loft insulation to 270 mm": ["loft_insulation"],
|
||
"Heating controls (thermostatic radiator valves)": [
|
||
"roomstat_programmer_trvs", "time_temperature_zone_control"
|
||
],
|
||
"Solar water heating": ["solar_water_heating"],
|
||
"Solar photovoltaic panels, 2.5 kWp": ["solar_pv"],
|
||
"Heating controls (room thermostat and TRVs)": [
|
||
"roomstat_programmer_trvs", "time_temperature_zone_control"
|
||
],
|
||
"Change heating to gas condensing boiler": ["boiler_upgrade"],
|
||
"Fan assisted storage heaters and dual immersion cylinder": ["high_heat_retention_storage_heater"],
|
||
"Flat roof or sloping ceiling insulation": ["flat_roof_insulation"],
|
||
"Heating controls (room thermostat)": [
|
||
"roomstat_programmer_trvs", "time_temperature_zone_control"
|
||
],
|
||
"Band A condensing boiler": ["boiler_upgrade"],
|
||
"Double glazing": ["double_glazing"],
|
||
"Flue gas heat recovery device in conjunction with boiler": ["flue_gas_heat_recovery"],
|
||
"Wind turbine": ["wind_turbine"],
|
||
"Loft insulation": ["loft_insulation"],
|
||
"Solar photovoltaic (PV) panels": ["solar_pv"],
|
||
"Party wall insulation": ["party_wall_insulation"],
|
||
'Draught proofing': ["draught_proofing"],
|
||
"Roof insulation recommendation": [],
|
||
"Cavity wall insulation recommendation": [],
|
||
"Windows draught proofing": [],
|
||
"Low energy lighting for all fixed outlets": ["low_energy_lighting"],
|
||
"Cylinder thermostat recommendation": [],
|
||
"Heating controls recommendation": [],
|
||
"Replace boiler with Band A condensing boiler": ["boiler_upgrade"],
|
||
"Band A condensing gas boiler": ["boiler_upgrade"],
|
||
"Solar panel recommendation": [],
|
||
"Double glazing recommendation": [],
|
||
"Solid wall insulation recommendation": [],
|
||
"Fuel change recommendation": [],
|
||
"PV Cells recommendation": [],
|
||
"Replacement glazing units": ["double_glazing"],
|
||
"Heating controls (time and temperature zone control)": ["time_temperature_zone_control"],
|
||
"High heat retention storage heaters": ["high_heat_retention_storage_heater"],
|
||
"Gas condensing boiler": ["boiler_upgrade"],
|
||
"Change room heaters to condensing boiler": ["boiler_upgrade"],
|
||
"Cylinder thermostat": ["cylinder_thermostat"],
|
||
"Heat recovery system for mixer showers": ["heat_recovery_shower"],
|
||
"Room-in-roof insulation": ["room_in_roof_insulation"],
|
||
"Fan assisted storage heaters": [],
|
||
"Fan-assisted storage heaters": [],
|
||
"Step 1:": [],
|
||
"Step 2:": [],
|
||
'Step 3:': [],
|
||
"Biomass stove with boiler": [],
|
||
"Replace boiler with biomass boiler": [],
|
||
"Heating controls (room thermostat and thermostatic radiator valves)": [
|
||
"roomstat_programmer_trvs", "time_temperature_zone_control"
|
||
],
|
||
"Heating controls (programmer, and thermostatic radiator valves)": [
|
||
"roomstat_programmer_trvs", "time_temperature_zone_control"
|
||
],
|
||
"Heating controls (programmer and TRVs)": [
|
||
"roomstat_programmer_trvs", "time_temperature_zone_control"
|
||
],
|
||
"Heating controls (programmer and room thermostat)": [
|
||
"roomstat_programmer_trvs", "time_temperature_zone_control"
|
||
],
|
||
"Replacement warm air unit": [],
|
||
"Secondary glazing": ["secondary_glazing"]
|
||
}
|
||
|
||
survey = True
|
||
if sap_2012_date is not None:
|
||
certificate_date = datetime.strptime(assessment_data["Date of certificate"], "%d %B %Y")
|
||
if certificate_date < pd.to_datetime(sap_2012_date):
|
||
survey = False
|
||
|
||
formatted_recommendations = []
|
||
for rec in recommendations:
|
||
mapped = measure_map[rec["measure"]]
|
||
for measure in mapped:
|
||
if measure == "cavity_wall_insulation" and "solid brick" in self.walls[0].lower():
|
||
measure = "extension_cavity_wall_insulation"
|
||
to_append = {
|
||
"type": measure,
|
||
"sap_points": rec["sap_points"],
|
||
"survey": survey,
|
||
}
|
||
if measure == "solar_pv":
|
||
to_append["suitable"] = True
|
||
formatted_recommendations.append(to_append)
|
||
|
||
return formatted_recommendations
|