mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
added a patch method to scraping epc data
This commit is contained in:
parent
fd2600b9ba
commit
2d71ad25ef
14 changed files with 564 additions and 621 deletions
2
.idea/Model.iml
generated
2
.idea/Model.iml
generated
|
|
@ -7,7 +7,7 @@
|
|||
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
|
||||
</content>
|
||||
<orderEntry type="jdk" jdkName="AssetList" jdkType="Python SDK" />
|
||||
<orderEntry type="jdk" jdkName="Fastapi-backend" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
<component name="PyNamespacePackagesService">
|
||||
|
|
|
|||
2
.idea/misc.xml
generated
2
.idea/misc.xml
generated
|
|
@ -3,7 +3,7 @@
|
|||
<component name="Black">
|
||||
<option name="sdkName" value="Python 3.10 (backend)" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="AssetList" project-jdk-type="Python SDK" />
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Fastapi-backend" project-jdk-type="Python SDK" />
|
||||
<component name="PyCharmProfessionalAdvertiser">
|
||||
<option name="shown" value="true" />
|
||||
</component>
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -10,6 +10,7 @@ from asset_list.mappings.built_form import BUILT_FORM_MAPPINGS
|
|||
from asset_list.mappings.walls import WALL_CONSTRUCTION_MAPPINGS
|
||||
from asset_list.mappings.heating_systems import HEATING_MAPPINGS
|
||||
from asset_list.mappings.exising_pv import EXISTING_PV_MAPPINGS
|
||||
from asset_list.mappings.roof import ROOF_CONSTRUCTION_MAPPINGS
|
||||
from asset_list.utils import get_data
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
|
@ -88,6 +89,63 @@ def app():
|
|||
# - We want: fully insulated property (all wall types), EPC D or below (floors should be solid)
|
||||
# - Or the insulation required is loft/cavity (floors should be solid)
|
||||
|
||||
# Torus
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Torus/Phase 1"
|
||||
data_filename = "Torus Property Asset List - Phase 1.xlsx"
|
||||
sheet_name = "TORUS"
|
||||
postcode_column = 'Postcode'
|
||||
fulladdress_column = None
|
||||
address1_column = "AddressLine1"
|
||||
address1_method = None
|
||||
address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = "Property Age"
|
||||
landlord_os_uprn = "NatUPRN"
|
||||
landlord_property_type = "Property Type"
|
||||
landlord_built_form = "Built Form"
|
||||
landlord_wall_construction = "Wall Construction"
|
||||
landlord_roof_construction = "Roof Construction"
|
||||
landlord_heating_system = "Space Heating Source"
|
||||
landlord_existing_pv = "Low Carbon Technology (Solar PV)"
|
||||
landlord_property_id = "UPRN"
|
||||
landlord_sap = "SAP Score"
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
outcomes_postcode = None
|
||||
outcomes_houseno = None
|
||||
outcomes_id = None
|
||||
outcomes_address = None
|
||||
master_filepaths = []
|
||||
master_to_asset_list_filepath = None
|
||||
phase = True
|
||||
|
||||
# Ealing - houses
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing"
|
||||
data_filename = "Ealing_rechecked_cleaned_05042025.csv"
|
||||
sheet_name = None
|
||||
postcode_column = 'Postcode'
|
||||
fulladdress_column = "Address"
|
||||
address1_column = None
|
||||
address1_method = "house_number_extraction"
|
||||
address_cols_to_concat = []
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = "Year Built"
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "Property Type Code"
|
||||
landlord_built_form = None
|
||||
landlord_wall_construction = None
|
||||
landlord_heating_system = None
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "Property ref"
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
outcomes_postcode = None
|
||||
outcomes_houseno = None
|
||||
outcomes_id = None
|
||||
outcomes_address = None
|
||||
master_filepaths = []
|
||||
master_to_asset_list_filepath = None
|
||||
|
||||
# Southern Midlands
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/Midlands Properties - Apr 2025"
|
||||
data_filename = "Southern Housing Midlands Property List - combined.xlsx"
|
||||
|
|
@ -446,8 +504,11 @@ def app():
|
|||
landlord_property_type=landlord_property_type,
|
||||
landlord_built_form=landlord_built_form,
|
||||
landlord_wall_construction=landlord_wall_construction,
|
||||
landlord_roof_construction=landlord_roof_construction,
|
||||
landlord_heating_system=landlord_heating_system,
|
||||
landlord_existing_pv=landlord_existing_pv
|
||||
landlord_existing_pv=landlord_existing_pv,
|
||||
landlord_sap=landlord_sap,
|
||||
phase=phase
|
||||
)
|
||||
asset_list.init_standardise()
|
||||
|
||||
|
|
@ -486,6 +547,13 @@ def app():
|
|||
).items()
|
||||
if k not in EXISTING_PV_MAPPINGS
|
||||
}
|
||||
new_roof_construction_map = {
|
||||
k: v for k, v in (
|
||||
asset_list.variable_mappings[asset_list.landlord_roof_construction] if
|
||||
asset_list.landlord_roof_construction else {}
|
||||
).items()
|
||||
if k not in ROOF_CONSTRUCTION_MAPPINGS
|
||||
}
|
||||
|
||||
asset_list.apply_standardiation()
|
||||
|
||||
|
|
@ -511,7 +579,7 @@ def app():
|
|||
epc_api_only = False
|
||||
force_retrieve_data = False
|
||||
skip = None # Used to skip already completed chunks
|
||||
chunk_size = 5000
|
||||
chunk_size = 1000
|
||||
filename = "Chunk {i}.csv"
|
||||
download_folder = os.path.join(data_folder, "Chunks")
|
||||
if not os.path.exists(download_folder):
|
||||
|
|
@ -529,8 +597,6 @@ def app():
|
|||
if any(x in folder_contents for x in downloaded_files):
|
||||
skip = max([i for i in chunk_indexes if filename.format(i=i) in folder_contents])
|
||||
|
||||
# folder_contents = [f for f in folder_contents if "nodata" not in f and f.endswith(".csv")]
|
||||
|
||||
for i in range(0, len(asset_list.standardised_asset_list), chunk_size):
|
||||
print(f"Processing chunk {i} to {i + chunk_size}")
|
||||
if skip is not None and not force_retrieve_data:
|
||||
|
|
|
|||
|
|
@ -80,5 +80,32 @@ BUILT_FORM_MAPPINGS = {
|
|||
'House: MidTerrace': 'mid-terrace',
|
||||
'House: EndTerrace': 'end-terrace',
|
||||
'Bungalow: EndTerrace': 'end-terrace',
|
||||
'Bungalow: MidTerrace': 'mid-terrace'
|
||||
'Bungalow: MidTerrace': 'mid-terrace',
|
||||
'Flat: Semi Detached: Mid Floor': 'semi-detached',
|
||||
'Maisonette: Mid Terrace: Top Floor': 'mid-terrace',
|
||||
'Flat: Enclosed Mid Terrace: Mid Floor': 'mid-terrace',
|
||||
'Flat: Enclosed Mid Terrace: Ground Floor': 'mid-terrace',
|
||||
'Flat: Detached: Ground Floor': 'detached',
|
||||
'Flat: Detached: Mid Floor': 'detached',
|
||||
'Flat: Detached: Top Floor': 'detached',
|
||||
'Flat: Enclosed End Terrace: Mid Floor': 'end-terrace',
|
||||
'Bungalow: Detached': 'detached',
|
||||
'Maisonette: End Terrace: Mid Floor': 'end-terrace',
|
||||
'Maisonette: Detached: Top Floor': 'detached',
|
||||
'Flat: Enclosed End Terrace: Ground Floor': 'end-terrace',
|
||||
'Flat: Enclosed Mid Terrace: Top Floor': 'mid-terrace',
|
||||
'House: EnclosedEndTerrace': 'end-terrace',
|
||||
'3 Ext. Wall Flat': 'semi-detached',
|
||||
'Bungalow Detached': 'detached',
|
||||
'Bungalow End Terrace': 'end-terrace',
|
||||
'Bungalow Mid Terrace': 'mid-terrace',
|
||||
'Bungalow Semi Detached': 'detached',
|
||||
'Maisonette 2 Ext. Wall': 'mid-terrace',
|
||||
'Maisonette 3 Ext. Wall': 'semi-detached',
|
||||
'End-terrace': 'end-terrace',
|
||||
'Mid-terrace': 'mid-terrace',
|
||||
'Semi-detached': 'semi-detached',
|
||||
'Detached': 'detached',
|
||||
'Flat / maisonette': 'unknown',
|
||||
'2014 onwards': 'unknown'
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
import numpy as np
|
||||
|
||||
STANDARD_EXISTING_PV = {
|
||||
"already has PV", "no PV", "unknown"
|
||||
}
|
||||
|
|
@ -9,4 +11,10 @@ EXISTING_PV_MAPPINGS = {
|
|||
"yes": "already has PV",
|
||||
True: "already has PV",
|
||||
False: "no PV",
|
||||
np.nan: 'unknown',
|
||||
'PV: 2kWp array': 'already has PV',
|
||||
'PV: 25% roof area, PV: 3.6kWp array': 'already has PV',
|
||||
'PV: 10% roof area, PV: 2kWp array': 'already has PV',
|
||||
'PV: 50% roof area': 'already has PV',
|
||||
'Solar PV': 'already has PV'
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,7 +21,9 @@ STANDARD_HEATING_SYSTEMS = {
|
|||
'oil fuel',
|
||||
'solid fuel',
|
||||
'gas combi boiler',
|
||||
'unknown'
|
||||
'unknown',
|
||||
"electric ceiling",
|
||||
"electric underfloor"
|
||||
}
|
||||
|
||||
HEATING_MAPPINGS = {
|
||||
|
|
@ -143,5 +145,30 @@ HEATING_MAPPINGS = {
|
|||
'Boiler: A rated Regular Boiler Electricity: Electricity': 'electric boiler',
|
||||
'Community Heating Systems: Community boilers only (RdSAP) Gas: Mains Gas (Community)': 'communal gas boiler',
|
||||
'Boiler: A rated Combi Gas: Mains Gas': 'gas condensing combi',
|
||||
'Boiler: A rated CPSU Electricity: Electricity': 'electric boiler'
|
||||
'Boiler: A rated CPSU Electricity: Electricity': 'electric boiler',
|
||||
'Heat Pump: Electric Heat pumps: Ground source heat pump with flow temperature <= 35°C': 'ground source heat pump',
|
||||
'Heat Pump: Electric Heat pumps: Ground source heat pump in other cases': 'ground source heat pump',
|
||||
'Electric Storage Systems: High heat retention storage heaters': 'high heat retention storage heaters',
|
||||
'Heat Pump: Electric Heat pumps: Air source heat pump with flow temperature <= 35°C': 'air source heat pump',
|
||||
'Electric (direct acting) room heaters: Panel, convector or radiant heaters': 'room heaters',
|
||||
'Boiler: C rated Combi': 'gas combi boiler',
|
||||
'Boiler: B rated Regular Boiler': 'gas condensing boiler',
|
||||
'Boiler: E rated Combi': 'gas combi boiler',
|
||||
'Boiler: A rated Combi': 'gas combi boiler',
|
||||
'Boiler: E rated Regular Boiler': 'gas condensing boiler',
|
||||
'Community Heating Systems: Community boilers only (RdSAP)': 'district heating',
|
||||
'Boiler: C rated Regular Boiler': 'gas condensing boiler',
|
||||
'Boiler: A rated Regular Boiler': 'gas condensing boiler',
|
||||
'Electric Storage Systems: Fan storage heaters': 'electric storage heaters',
|
||||
'Boiler: F rated Combi': 'gas combi boiler',
|
||||
|
||||
'Room heaters': 'room heaters',
|
||||
'Room Heaters': 'room heaters',
|
||||
'Boiler': 'gas condensing boiler',
|
||||
'Heat Pump (Wet)': 'air source heat pump',
|
||||
'Community Heating': 'district heating',
|
||||
'Heat pump (wet)': 'air source heat pump',
|
||||
'Electric ceiling heating': 'electric ceiling',
|
||||
'Electric under floor heating': 'electric underfloor',
|
||||
'Community heating': 'district heating'
|
||||
}
|
||||
|
|
|
|||
|
|
@ -136,5 +136,20 @@ PROPERTY_MAPPING = {
|
|||
'Flat: Semi Detached: Top Floor': 'flat',
|
||||
'Flat: Mid Terrace: Ground Floor': 'flat',
|
||||
'Bungalow: MidTerrace': 'bungalow',
|
||||
'Flat: Enclosed End Terrace: Top Floor': 'flat'
|
||||
'Flat: Enclosed End Terrace: Top Floor': 'flat',
|
||||
'Flat: Semi Detached: Mid Floor': 'flat',
|
||||
'Maisonette: Mid Terrace: Top Floor': 'maisonette',
|
||||
'House: EnclosedEndTerrace': 'house',
|
||||
'Flat: Detached: Ground Floor': 'flat',
|
||||
'Flat: Detached: Mid Floor': 'flat',
|
||||
'Flat: Detached: Top Floor': 'flat',
|
||||
'Bungalow: Detached': 'bungalow',
|
||||
'Maisonette: End Terrace: Mid Floor': 'maisonette',
|
||||
'Maisonette: Detached: Top Floor': 'maisonette',
|
||||
'Flat: Enclosed Mid Terrace: Mid Floor': 'flat',
|
||||
'Flat: Enclosed Mid Terrace: Ground Floor': 'flat',
|
||||
'Flat: Enclosed End Terrace: Mid Floor': 'flat',
|
||||
'Flat: Enclosed End Terrace: Ground Floor': 'flat',
|
||||
'Flat: Enclosed Mid Terrace: Top Floor': 'flat',
|
||||
'2013 onwards': 'unknown'
|
||||
}
|
||||
|
|
|
|||
26
asset_list/mappings/roof.py
Normal file
26
asset_list/mappings/roof.py
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
import numpy as np
|
||||
|
||||
STANDARD_ROOF_CONSTRUCTIONS = {
|
||||
"pitched access to loft",
|
||||
"pitched no access to loft",
|
||||
"pitched unknown access to loft",
|
||||
"piched unknown insulation",
|
||||
"pitched insulated",
|
||||
"another dwelling above",
|
||||
"flat unknown insulation",
|
||||
"unknown insulated",
|
||||
"unknown",
|
||||
}
|
||||
|
||||
ROOF_CONSTRUCTION_MAPPINGS = {
|
||||
'Flat': 'flat unknown insulation',
|
||||
'Pitched (access to loft)': 'pitched access to loft',
|
||||
'Pitched (no access to loft)': 'pitched no access to loft',
|
||||
'Another dwelling above': 'another dwelling above',
|
||||
'Same dwelling above': 'another dwelling above',
|
||||
'As-built': 'unknown',
|
||||
'ND (inferred)': 'unknown',
|
||||
'2018 onwards': 'unknown',
|
||||
'Pitched (vaulted ceiling)': 'pitched insulated',
|
||||
np.nan: "unknown"
|
||||
}
|
||||
|
|
@ -147,5 +147,15 @@ WALL_CONSTRUCTION_MAPPINGS = {
|
|||
'Cavity: AsBuilt (1983-1995), Cavity: FilledCavity': 'filled cavity',
|
||||
'SolidBrick: AsBuilt': 'solid brick unknown insulation',
|
||||
'Cavity: FilledCavity': 'filled cavity',
|
||||
'SolidBrick: Internal': 'insulated solid brick'
|
||||
'SolidBrick: Internal': 'insulated solid brick',
|
||||
'Cavity: External': 'filled cavity',
|
||||
'Sandstone: Internal': 'sandstone or limestone',
|
||||
'Cavity: AsBuilt (Pre 1976)': 'cavity unknown insulation',
|
||||
'System build': 'system built',
|
||||
'Solid brick': 'solid brick unknown insulation',
|
||||
'Stone': 'sandstone or limestone',
|
||||
'Timber frame': 'timber frame unknown insulation',
|
||||
'2017 onwards': 'new build - average thermal transmittance',
|
||||
'ND (inferred)': 'unknown',
|
||||
'Flat / maisonette': 'other'
|
||||
}
|
||||
|
|
|
|||
|
|
@ -755,6 +755,10 @@ class SearchEpc:
|
|||
"photo-supply"]
|
||||
)
|
||||
|
||||
estimated_epc["co2-emiss-curr-per-floor-area"] = (
|
||||
estimated_epc["co2-emissions-current"] / estimated_epc["total-floor-area"]
|
||||
)
|
||||
|
||||
estimated_epc["postcode"] = self.postcode
|
||||
if not self.uprn:
|
||||
# Update self.uprn too
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ from dotenv import load_dotenv
|
|||
from utils.s3 import save_csv_to_s3
|
||||
from etl.find_my_epc.AssetListEpcData import AssetListEpcData
|
||||
|
||||
PORTFOLIO_ID = 138
|
||||
PORTFOLIO_ID = 140
|
||||
USER_ID = 8
|
||||
|
||||
load_dotenv(dotenv_path="backend/.env")
|
||||
|
|
@ -19,14 +19,17 @@ def app():
|
|||
|
||||
asset_list = [
|
||||
{
|
||||
"address": "42 Rippolson Road",
|
||||
"postcode": "SE18 1NS",
|
||||
"uprn": 100020999275,
|
||||
"address": "Brow Cottage",
|
||||
"postcode": "YO18 7PZ",
|
||||
"uprn": 10007630752,
|
||||
"property_type": "House",
|
||||
"built_form": "Semi-Detached",
|
||||
"patch": True
|
||||
},
|
||||
{
|
||||
"address": "66 Riverdale Road",
|
||||
"postcode": "DA8 1PX",
|
||||
"uprn": 100020235516
|
||||
"address": "Wyburn",
|
||||
"postcode": "DT1 2LL",
|
||||
"uprn": 100040630290
|
||||
},
|
||||
]
|
||||
asset_list = pd.DataFrame(asset_list)
|
||||
|
|
@ -46,6 +49,7 @@ def app():
|
|||
)
|
||||
asset_list_epc_client.get_data()
|
||||
asset_list_epc_client.get_non_invasive_recommendations()
|
||||
asset_list_epc_client.get_patch()
|
||||
|
||||
# Store non-invasive recommendations in S3
|
||||
non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv"
|
||||
|
|
@ -55,14 +59,24 @@ def app():
|
|||
file_name=non_invasive_recommendations_filename
|
||||
)
|
||||
|
||||
# Store patches in S3
|
||||
patches_filename = ""
|
||||
if asset_list_epc_client.patches:
|
||||
patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.csv"
|
||||
save_csv_to_s3(
|
||||
dataframe=pd.DataFrame(asset_list_epc_client.patches),
|
||||
bucket_name="retrofit-plan-inputs-dev",
|
||||
file_name=patches_filename
|
||||
)
|
||||
|
||||
valuation_data = [
|
||||
{
|
||||
"valuation": 469_000,
|
||||
"uprn": 100020999275,
|
||||
"uprn": 10007630752,
|
||||
},
|
||||
{
|
||||
"valuation": 382_000,
|
||||
"uprn": 100020235516
|
||||
"valuation": 373_000,
|
||||
"uprn": 100040630290
|
||||
},
|
||||
]
|
||||
# Store valuation data to s3
|
||||
|
|
@ -80,7 +94,7 @@ def app():
|
|||
"goal_value": "C",
|
||||
"trigger_file_path": filename,
|
||||
"already_installed_file_path": "",
|
||||
"patches_file_path": "",
|
||||
"patches_file_path": patches_filename,
|
||||
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
|
||||
"valuation_file_path": valuation_filename,
|
||||
"scenario_name": "Full package remote assessment",
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ class AssetListEpcData:
|
|||
|
||||
self.extracted_data = None
|
||||
self.non_invasive_recommendations = None
|
||||
self.patches = None
|
||||
|
||||
@staticmethod
|
||||
def check_asset_list(asset_list):
|
||||
|
|
@ -52,6 +53,21 @@ class AssetListEpcData:
|
|||
} for r in self.extracted_data
|
||||
]
|
||||
|
||||
def get_patch(self):
|
||||
"""
|
||||
|
||||
:return:
|
||||
"""
|
||||
if self.extracted_data is None:
|
||||
raise ValueError("extracted data is missing - run get_data first")
|
||||
|
||||
self.patches = [
|
||||
{
|
||||
"uprn": r.get("uprn"),
|
||||
**r.get("patch")
|
||||
} for r in self.extracted_data if r.get("patch")
|
||||
]
|
||||
|
||||
def get_data(self):
|
||||
|
||||
logger.info("Retrieving data for given asset list")
|
||||
|
|
@ -67,11 +83,18 @@ class AssetListEpcData:
|
|||
postcode=pc,
|
||||
uprn=home.get("uprn"),
|
||||
auth_token=self.epc_auth_token,
|
||||
os_api_key=""
|
||||
os_api_key="",
|
||||
)
|
||||
epc_searcher.ordnance_survey_client.property_type = home.get("property_type")
|
||||
epc_searcher.ordnance_survey_client.built_form = home.get("built_form")
|
||||
epc_searcher.find_property(skip_os=True)
|
||||
|
||||
if epc_searcher.newest_epc is None:
|
||||
continue
|
||||
|
||||
if not pd.isnull(home.get("patch")):
|
||||
epc_searcher.newest_epc["address1"] = add1
|
||||
|
||||
# Attempt both methods:
|
||||
try:
|
||||
find_epc_searcher = RetrieveFindMyEpc(
|
||||
|
|
@ -89,14 +112,22 @@ class AssetListEpcData:
|
|||
time.sleep(0.5)
|
||||
# We need uprn
|
||||
|
||||
extracted_data.append(
|
||||
{
|
||||
"uprn": home.get("uprn"),
|
||||
"address": home["address"],
|
||||
"postcode": home["postcode"],
|
||||
**find_epc_data,
|
||||
to_append = {
|
||||
"uprn": home.get("uprn"),
|
||||
"address": home["address"],
|
||||
"postcode": home["postcode"],
|
||||
**find_epc_data,
|
||||
}
|
||||
if not pd.isnull(home.get("patch")):
|
||||
to_append["patch"] = {
|
||||
"current-energy-rating": find_epc_data["current_epc_rating"],
|
||||
"current-energy-efficiency": find_epc_data["current_epc_efficiency"],
|
||||
"potential-energy-rating": find_epc_data["potential_epc_rating"],
|
||||
"potential-energy-efficiency": find_epc_data["potential_epc_efficiency"],
|
||||
**find_epc_data["epc_data"]
|
||||
}
|
||||
)
|
||||
|
||||
extracted_data.append(to_append)
|
||||
|
||||
self.extracted_data = extracted_data
|
||||
logger.info("Data Extrction complete")
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import re
|
||||
import pandas as pd
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
|
@ -45,6 +46,85 @@ class RetrieveFindMyEpc:
|
|||
sources = {item.get_text(strip=True): True for item in energy_list.find_all("li")}
|
||||
return sources
|
||||
|
||||
@staticmethod
|
||||
def get_text(elem):
|
||||
return elem.get_text(strip=True) if elem else None
|
||||
|
||||
def extract_epc_data(self, soup):
|
||||
|
||||
results = {}
|
||||
|
||||
# 1. Total floor area
|
||||
results['total-floor-area'] = int(self.get_text(
|
||||
soup.find("dt", string="Total floor area").find_next_sibling("dd")
|
||||
).split(" ")[0])
|
||||
|
||||
# Table with features
|
||||
rows = soup.select("table.govuk-table tbody tr")
|
||||
|
||||
rating_map = {
|
||||
"Very poor": "Very Poor",
|
||||
"Very good": "Very Good"
|
||||
}
|
||||
|
||||
def get_feature_row_text(feature_name, index=0):
|
||||
matches = [row for row in rows if row.find("th") and feature_name in row.find("th").text]
|
||||
if len(matches) > index:
|
||||
cells = matches[index].find_all("td")
|
||||
description = self.get_text(cells[0])
|
||||
rating = self.get_text(cells[1])
|
||||
return description, rating_map.get(rating, rating)
|
||||
return None, None
|
||||
|
||||
# 2-3. First wall description and rating
|
||||
results['walls-description'], results['walls-energy-eff'] = get_feature_row_text("Wall", 0)
|
||||
|
||||
# 4-5. First roof description and rating
|
||||
results['roof-description'], results['roof-energy-eff'] = get_feature_row_text("Roof", 0)
|
||||
|
||||
# 6-7. Windows description and rating
|
||||
results['windows-description'], results['windows-energy-eff'] = get_feature_row_text("Window")
|
||||
|
||||
# 8-9. Main heating description and rating
|
||||
results['mainheat-description'], results['mainheat-energy-eff'] = get_feature_row_text("Main heating")
|
||||
|
||||
# 10-11. Main heating control description and rating
|
||||
results['mainheatcont-description'], results['mainheatc-energy-eff'] = get_feature_row_text(
|
||||
"Main heating control"
|
||||
)
|
||||
|
||||
# 12-13. Hot water description and rating
|
||||
results['hotwater-description'], results['hot-water-energy-ef'] = get_feature_row_text("Hot water")
|
||||
|
||||
# 14-15. Lighting description and rating
|
||||
results['lighting-description'], results['lighting-energy-eff'] = get_feature_row_text("Lighting")
|
||||
|
||||
# 16. Floor description
|
||||
results['floor-description'], _ = get_feature_row_text("Floor")
|
||||
|
||||
# 17. Secondary heating description
|
||||
results['secondheat-description'], _ = get_feature_row_text("Secondary heating")
|
||||
|
||||
# 18. Primary energy use
|
||||
p_energy = soup.find(string=lambda t: "primary energy use for this property per year" in t.lower())
|
||||
# We should always have this
|
||||
match = re.search(r"(\d+)\s+kilowatt", p_energy)
|
||||
results['energy-consumption-current'] = int(match.group(1)) if match else None
|
||||
|
||||
# 19. Current CO2 emissions
|
||||
co2_now = soup.find("dd", id="eir-property-produces")
|
||||
# We should always have this
|
||||
match = re.search(r"([\d.]+)", co2_now.text)
|
||||
results['co2-emissions-current'] = float(match.group(1)) if match else None
|
||||
# Need co2-emiss-curr-per-floor-area
|
||||
|
||||
# 20. Potential CO2 emissions
|
||||
co2_pot = soup.find("dd", id="eir-potential-production")
|
||||
match = re.search(r"([\d.]+)", co2_pot.text)
|
||||
results['co2-emissions-potential'] = float(match.group(1)) if match else None
|
||||
|
||||
return results
|
||||
|
||||
def retrieve_newest_find_my_epc_data(self, sap_2012_date=None):
|
||||
"""
|
||||
For a post code and address, we pull out all the required data from the find my epc website
|
||||
|
|
@ -115,6 +195,9 @@ class RetrieveFindMyEpc:
|
|||
potential_rating = ratings.split(".")[1]
|
||||
current_sap = int(current_rating.split(' ')[-1])
|
||||
|
||||
# Floor area
|
||||
address_res.find()
|
||||
|
||||
# Retrieve the energy consumption
|
||||
bills = address_res.find('div', {'id': 'bills-affected'})
|
||||
bills_list = bills.find_all('li')
|
||||
|
|
@ -232,6 +315,9 @@ class RetrieveFindMyEpc:
|
|||
# 4) Low and zero carbon energy sources
|
||||
low_carbon_energy_sources = self.extract_low_carbon_sources(address_res)
|
||||
|
||||
# 5) Pull out the EPC data
|
||||
epc_data = self.extract_epc_data(address_res)
|
||||
|
||||
resulting_data = {
|
||||
'epc_certificate': epc_certificate,
|
||||
'current_epc_rating': current_rating.split(' ')[-6],
|
||||
|
|
@ -241,8 +327,9 @@ class RetrieveFindMyEpc:
|
|||
"heating_text": heating_text,
|
||||
"hot_water_text": hot_water_text,
|
||||
"recommendations": recommendations,
|
||||
"epc_data": epc_data,
|
||||
**assessment_data,
|
||||
**low_carbon_energy_sources
|
||||
**low_carbon_energy_sources,
|
||||
}
|
||||
|
||||
return resulting_data
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue