Merge pull request #512 from Hestia-Homes/eco-eligiblity-bug

Allow no valuation data
This commit is contained in:
KhalimCK 2025-10-27 16:26:12 +00:00 committed by GitHub
commit d7ca1ea2bb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 431 additions and 54 deletions

View file

@ -309,6 +309,17 @@ class AssetList:
'NAME OF SURVEYOR'
]
# Solar non-intrusive fields
NON_INTRUSIVES_SOLAR_COLNAMES = [
'PV, ACCESS ISSUE, SEE NOTES', 'ROOF ORIENTATION',
'AREA (m²) OF ROOF WHERE PV WILL BE SITUATED ', 'SHADING',
'Roof Tiles - CONCRETE/SLATE/ROSEMARY',
'NO. OF PANELS (Typical size of 420W panel is 1mx1.7m and need 30cm all the way around panels)',
'SCAFFOLD REQUIRED? IF YES, ARE THERE ANY SURROUNDING ACCESS ISSUES - PLEASE DESCRIBE',
'IF PANELS ARE GOING ON REAR PLEASE CHECK FOR SPACE FOR SCAFFOLDING - DESCRIBE ANY ISSUES BELOW',
'DATE', 'NAME OF SURVEYOR'
]
NON_INTRUSIVES_ELIGIBILITY_COLUMN = "Eligibility (Red/Yellow/Green)"
OLD_FORMAT_NON_INTRUSIVE_COLNAMES = ['WFT Findings', 'ECO Eligibility']
@ -461,6 +472,8 @@ class AssetList:
self.new_format_non_insturives_present_v2 = 'TILE HUNG' in self.raw_asset_list.columns
self.solar_non_intrusives_present = "AREA (m²) OF ROOF WHERE PV WILL BE SITUATED" in self.raw_asset_list.columns
# Names of columns
self.landlord_property_id = landlord_property_id
self.address1_colname = address1_colname
@ -774,6 +787,9 @@ class AssetList:
if self.new_format_non_insturives_present_v2:
non_intrusive_columns += self.NON_INTRUSIVES_NEW_FORMAT_COLNAMES_V2
if self.solar_non_intrusives_present:
non_intrusive_columns += self.NON_INTRUSIVES_SOLAR_COLNAMES
if self.old_format_non_intrusives_present:
# We check if we have the ECO Eligibility column, which we might not have
non_intrusive_columns = [
@ -946,7 +962,7 @@ class AssetList:
if self.phase:
# We filter on just the properties that have had an inspection
if self.new_format_non_insturives_present_v2:
if self.new_format_non_insturives_present_v2 or self.solar_non_intrusives_present:
self.standardised_asset_list = self.standardised_asset_list[
~self.standardised_asset_list['NAME OF SURVEYOR'].isin(
["YET TO BE SURVEYED", "", None]
@ -1341,10 +1357,10 @@ class AssetList:
# for identifying cavity jobs
if self.non_intrusives_present and not self.old_format_non_intrusives_present:
if self.new_format_non_insturives_present_v2:
if self.new_format_non_insturives_present_v2 or self.solar_non_intrusives_present:
existing_solar_non_intrusives_check = (
self.standardised_asset_list["non-intrusives: ROOF ORIENTATION"].str.strip().isin(
["ALREADY HAS SOLAR PV"]
["ALREADY HAS SOLAR PV", "ALREADY HAS PV"]
)
)
else:

View file

@ -59,6 +59,176 @@ def app():
Property UPRN
"""
#
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Cambridge/"
data_filename = "22.10_Cambridge_west addresses.xlsx"
sheet_name = "Asset List"
postcode_column = 'Postcode'
address1_column = None
address1_method = "house_number_extraction"
fulladdress_column = "Full Address"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = None
landlord_built_form = None
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "id"
landlord_sap = None
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
outcomes_address = None
master_filepaths = []
master_id_colnames = []
master_to_asset_list_filepath = None
phase = False
ecosurv_landlords = None
asset_list_header = 0
landlord_block_reference = None
# Property Box
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NRLA/Property Box"
data_filename = "Property Box Finance Portfolio.xlsx"
sheet_name = "Sheet1"
postcode_column = 'Postcode'
address1_column = None
address1_method = "house_number_extraction"
fulladdress_column = "Address 1"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = None
landlord_built_form = None
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "row_id"
landlord_sap = None
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
outcomes_address = None
master_filepaths = []
master_id_colnames = []
master_to_asset_list_filepath = None
phase = False
ecosurv_landlords = None
asset_list_header = 0
landlord_block_reference = "block_id"
# CDS - able-to-pay
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/CDS/Able to pay"
data_filename = "CDS_ASSET LIST_(2314).xlsx"
sheet_name = "Sheet1"
postcode_column = 'Property Address - Postcode'
address1_column = "Property Address - Line 1"
address1_method = None
fulladdress_column = "Property Address - Line 1"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = None
landlord_built_form = None
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "row_id"
landlord_sap = None
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
outcomes_address = None
master_filepaths = []
master_id_colnames = []
master_to_asset_list_filepath = None
phase = False
ecosurv_landlords = None
asset_list_header = 0
landlord_block_reference = None
# Hyde - solar
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hyde/Solar"
data_filename = "Domna Property Analysis HYDE (Chichester Removed)V2-Completed.xlsx"
sheet_name = "Electric Property Inspections"
postcode_column = 'Postcode'
address1_column = None # Is only patchily populated so we create it
address1_method = 'house_number_extraction'
fulladdress_column = "Address"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = "Property Type"
landlord_built_form = "Property Type"
landlord_wall_construction = "Walls "
landlord_roof_construction = "Roofs"
landlord_heating_system = "Heating"
landlord_existing_pv = None
landlord_property_id = "Address ID"
landlord_sap = "SAP"
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
outcomes_address = None
master_filepaths = []
master_id_colnames = []
master_to_asset_list_filepath = None
phase = False
ecosurv_landlords = None
asset_list_header = 0
landlord_block_reference = None
# Hyde cavity
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hyde/Cavity"
data_filename = "Domna Property Analysis HYDE (Chichester Removed)V2-Completed.xlsx"
sheet_name = "Cavity Inspections"
postcode_column = 'Postcode'
address1_column = None # Is only patchily populated so we create it
address1_method = 'house_number_extraction'
fulladdress_column = "Address"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = "Property Type"
landlord_built_form = "Property Type"
landlord_wall_construction = "Walls "
landlord_roof_construction = "Roofs"
landlord_heating_system = "Heating"
landlord_existing_pv = None
landlord_property_id = "Address ID"
landlord_sap = "SAP"
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
outcomes_address = None
master_filepaths = []
master_id_colnames = []
master_to_asset_list_filepath = None
phase = False
ecosurv_landlords = None
asset_list_header = 0
landlord_block_reference = None
# CDS - Sept 2025
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/CDS/September 2025 Programme"
data_filename = "Founder Estates CDS.xlsx"

View file

@ -439,5 +439,23 @@ BUILT_FORM_MAPPINGS = {
'Chalet - Wheelchair': 'unknown',
'Studio Flat': 'unknown',
'Bungalow - Attached': 'semi-detached',
'ND': 'unknown'
'ND': 'unknown',
'Maisonette: Mid Terrace: Mid Floor': 'mid-floor',
'Maisonette: Semi Detached: Ground Floor': 'semi-detached',
'Maisonette: Enclosed Mid Terrace: Ground Floor': 'enclosed mid-terrace',
'Maisonette: Enclosed End Terrace: Ground Floor': 'end-terrace',
'Maisonette: Mid Terrace: Ground Floor': 'mid-terrace',
'Flat: Semi Detached: Basement': 'semi-detached',
'Maisonette: Semi Detached: Top Floor': 'semi-detached',
'Maisonette: Enclosed Mid Terrace: Mid Floor': 'enclosed mid-terrace',
'Flat: Detached: Basement': 'detached',
'Maisonette: Enclosed Mid Terrace: Top Floor': 'enclosed mid-terrace',
'Maisonette: End Terrace: Top Floor': 'top-floor',
'House: Mid Terrace: Ground Floor': 'ground floor',
'Maisonette: Semi Detached: Mid Floor': 'detached',
'Maisonette: Detached: Mid Floor': 'detached',
'Bungalow: EnclosedMidTerrace': 'enclosed mid-terrace'
}

View file

@ -477,6 +477,23 @@ HEATING_MAPPINGS = {
'Heat networks Heat networks (mains gas)': 'communal heating',
'ND Oil': 'oil fuel',
'Boiler Biofuel': 'boiler - other fuel'
'Boiler Biofuel': 'boiler - other fuel',
'Electric (direct acting) room heaters: Water- or oil-filled radiators': 'room heaters',
'Other: Electric ceiling heating': 'electric ceiling',
'Heat Pump: Electric Heat pumps: Air source heat pump with flow temperature <= 35°C': 'air source heat pump',
'Oil room heaters: Room heater, 2000 or later': 'room heaters',
'Electric Underfloor Heating: In screed above insulation (standard or off peak)': 'electric underfloor',
'Heat Pump: Electric Heat pumps: Air source heat pump in other cases': 'air source heat pump',
'Electric Storage Systems: Old (large volume) storage heaters': 'electric storage heaters',
'Gas (including LPG) room heaters: Condensing gas fire': 'room heaters',
'Solid fuel room heaters: Open fire in grate': 'solid fuel',
'Solid fuel room heaters: Open fire with back boiler (no radiators)': 'solid fuel',
'Community Heating Systems: Community heat pump (RdSAP)': 'communal heating',
'Gas (including LPG) room heaters: Gas fire, open flue, 1980 or later (open fronted), sitting proud of, '
'and sealed to, fireplace opening': 'room heaters',
'Boiler: A rated Regular Boiler, System 2: Boiler: C rated Regular Boiler': 'boiler - other fuel',
'Boiler: G rated Combi': 'gas condensing combi'
}

View file

@ -343,5 +343,23 @@ PROPERTY_MAPPING = {
'bungalow': 'bungalow',
'flat': 'flat',
'FLA': 'flat',
'HOU': 'house'
'HOU': 'house',
'Maisonette: Mid Terrace: Mid Floor': 'maisonette',
'Maisonette: Semi Detached: Ground Floor': 'maisonette',
'Maisonette: Enclosed Mid Terrace: Ground Floor': 'maisonette',
'Maisonette: Enclosed End Terrace: Ground Floor': 'maisonette',
'Maisonette: Mid Terrace: Ground Floor': 'maisonette',
'Flat: Semi Detached: Basement': 'flat',
'Maisonette: Semi Detached: Top Floor': 'maisonette',
'Maisonette: Enclosed Mid Terrace: Mid Floor': 'maisonette',
'Flat: Detached: Basement': 'flat',
'Maisonette: Enclosed Mid Terrace: Top Floor': 'maisonette',
'Maisonette: End Terrace: Top Floor': 'maisonette',
'House: Mid Terrace: Ground Floor': 'house',
'Bungalow: EnclosedMidTerrace': 'bungalow',
'Maisonette: Semi Detached: Mid Floor': 'maisonette',
'Maisonette: Detached: Mid Floor': 'maisonette'
}

View file

@ -275,5 +275,30 @@ ROOF_CONSTRUCTION_MAPPINGS = {
'Pitched (vaulted ceiling) Non-joist': 'pitched unknown insulation',
'ND (inferred) ND (inferred)': 'unknown',
'Flat Non-joist': 'flat insulated',
'Same dwelling above N/A': 'another dwelling above'
'Same dwelling above N/A': 'another dwelling above',
'Flat: As Built, PitchedNormalLoftAccess: Unknown': 'flat unknown insulation',
'PitchedNormalLoftAccess: Unknown, PitchedNormalNoLoftAccess: Unknown': 'pitched unknown insulation',
'PitchedNormalLoftAccess: 400mm+': 'pitched insulated',
'AnotherDwellingAbove: 150mm': 'another dwelling above',
'Flat: 150mm': 'flat insulated',
'AnotherDwellingAbove: 50mm': 'another dwelling above',
'PitchedNormalNoLoftAccess: As Built': 'pitched no access to loft',
'PitchedNormalLoftAccess: 250mm, PitchedWithSlopingCeiling: As Built': 'pitched insulated',
'PitchedNormalLoftAccess: 200mm, PitchedWithSlopingCeiling: As Built': 'pitched insulated',
'PitchedNormalLoftAccess: 350mm': 'pitched insulated',
'PitchedNormalNoLoftAccess: 270mm': 'pitched no access to loft',
'AnotherDwellingAbove: 100mm': 'another dwelling above',
'PitchedWithSlopingCeiling: Unknown': 'piched unknown insulation',
'AnotherDwellingAbove: Unknown, Flat: As Built': 'another dwelling above',
'Flat: Unknown, PitchedNormalLoftAccess: 25mm': 'flat unknown insulation',
'SameDwellingAbove: Unknown': 'another dwelling above',
'Flat: Unknown': 'flat unknown insulation',
'Flat: 50mm, PitchedNormalLoftAccess: 100mm': 'flat insulated',
'Flat: As Built, PitchedNormalLoftAccess: 250mm, PitchedWithSlopingCeiling: As Built': 'flat unknown insulation',
'Flat: As Built, PitchedNormalLoftAccess: 400mm+': 'flat unknown insulation',
'PitchedWithSlopingCeiling: As Built': 'pitched insulated',
'PitchedNormalLoftAccess: As Built': 'pitched unknown insulation',
}

View file

@ -353,4 +353,7 @@ WALL_CONSTRUCTION_MAPPINGS = {
'System built As-built': "uninsulated system built",
'System built Internal': 'insulated system built',
'Cavity: AsBuilt (1976-1982), TimberFrame: AsBuilt': 'cavity unknown insulation',
'Cavity: FilledCavityPlusExternal': 'filled cavity'
}

View file

@ -347,7 +347,8 @@ class SearchEpc:
# We update the data with the correct uprn
if self.uprn:
for x in api_response["response"]["rows"]:
x["uprn"] = self.uprn
if pd.isnull(x["uprn"]):
x["uprn"] = self.uprn
data["rows"].extend(api_response["response"]["rows"])
@ -357,6 +358,8 @@ class SearchEpc:
row for row in data["rows"]
if row["lmk-key"] not in seen and not seen.add(row["lmk-key"])
]
# Overwrite the data
self.data = data
if data["rows"]:
api_response["msg"] = self.SUCCESS

View file

@ -145,14 +145,17 @@ def extract_portfolio_aggregation_data(
cost = sum([r["total"] for r in default_recommendations])
sap_point_improvement = sum([r["sap_points"] for r in default_recommendations])
lower_bound_valuation_uplift = (
property_value_increase_ranges[p.id]["lower_bound_increased_value"] -
property_value_increase_ranges[p.id]["current_value"]
)
upper_bound_valuation_uplift = (
property_value_increase_ranges[p.id]["upper_bound_increased_value"] -
property_value_increase_ranges[p.id]["current_value"]
)
if not pd.isnull(property_value_increase_ranges[p.id]["current_value"]):
lower_bound_valuation_uplift = (
property_value_increase_ranges[p.id]["lower_bound_increased_value"] -
property_value_increase_ranges[p.id]["current_value"]
)
upper_bound_valuation_uplift = (
property_value_increase_ranges[p.id]["upper_bound_increased_value"] -
property_value_increase_ranges[p.id]["current_value"]
)
else:
lower_bound_valuation_uplift, upper_bound_valuation_uplift = 0, 0
agg_data.append({
"pre_retrofit_epc": p.data["current-energy-rating"],
@ -523,6 +526,7 @@ async def model_engine(body: PlanTriggerRequest):
plan_input["built_form"] = plan_input["built_form"].map(built_form_map)
plan_input = plan_input.to_dict("records")
else:
raise ValueError("Other formats not yet supported")
@ -549,6 +553,13 @@ async def model_engine(body: PlanTriggerRequest):
# If we have patches or overrides, we should read them in here
patches, already_installed, non_invasive_recommendations, valuation_data = get_request_property_data(body)
if body.file_type == "xlsx" and body.file_format == "domna_asset_list":
# We check if we have valution data
if not valuation_data and body.valuation_file_path in [None, ""]:
# We check plan_input
if "domna_valuation" in plan_input[0]:
valuation_data = [{"uprn": x["uprn"], "valuation": x["domna_valuation"]} for x in plan_input]
cleaning_data = read_dataframe_from_s3_parquet(
bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
)
@ -563,12 +574,22 @@ async def model_engine(body: PlanTriggerRequest):
if uprn:
uprn = int(float(uprn))
address1 = config.get("address", None)
# Handle domna address list format
if pd.isnull(address1) and body.file_format == "domna_asset_list":
address1 = config.get("domna_full_address", None)
address1 = str(int(address1)) if isinstance(address1, float) else str(address1)
full_address = config["domna_full_address"] if body.file_format == "domna_asset_list" else None
epc_searcher = SearchEpc(
address1=str(config["address"]),
address1=address1,
postcode=config["postcode"],
uprn=uprn,
auth_token=get_settings().EPC_AUTH_TOKEN,
os_api_key="",
full_address=full_address
)
epc_searcher.ordnance_survey_client.built_form = config.get("built_form", None)
epc_searcher.ordnance_survey_client.property_type = config.get("property_type", None)
@ -1176,9 +1197,10 @@ async def model_engine(body: PlanTriggerRequest):
upload_funding(session, p, new_plan_id, recommendations_to_upload)
property_valuation_increases.append(
valuations["average_increased_value"] - valuations["current_value"]
)
if valuations["current_value"] > 0:
property_valuation_increases.append(
valuations["average_increased_value"] - valuations["current_value"]
)
# Commit the session after each batch
session.commit()

View file

@ -219,12 +219,19 @@ class PropertyValuation:
current_epc = property_instance.data["current-energy-rating"]
if not current_value:
# In this case, we return a % improvement rather than an absolute
relative_improvement = cls.estimate_valuation_improvement(
current_value=1,
current_epc=current_epc,
target_epc=target_epc,
total_cost=1
)
return {
"current_value": 0,
"lower_bound_increased_value": 0,
"upper_bound_increased_value": 0,
"average_increased_value": 0,
"average_increase": 0
"lower_bound_increased_value": relative_improvement["lower_bound_increased_value"] - 1,
"upper_bound_increased_value": relative_improvement["upper_bound_increased_value"] - 1,
"average_increased_value": relative_improvement["average_increased_value"] - 1,
"average_increase": relative_improvement["average_increase"]
}
return cls.estimate_valuation_improvement(current_value, current_epc, target_epc, total_cost)

View file

@ -1,38 +1,111 @@
# Initial Code
from seleniumbase import SB
from bs4 import BeautifulSoup
import pandas as pd
import time
from stealth_requests import StealthSession
import random
from multiprocessing import Pool
from tqdm import tqdm
uprns = [
100071297618,
100080893397,
100060778033,
200004793081,
100071265143,
100071297618,
100080893397,
100060778033,
200004793081,
100071265143,
]
ENGINES = ["safari", "chrome"]
estimate_list = []
for uprn in uprns:
def scrape_all_estimates(session, url):
# Rotate impersonation per request
resp = session.get(url, impersonate=ENGINES[random.randint(0, 1)])
page_source = BeautifulSoup(resp.text, "html.parser")
estimates = page_source.find_all("div", {"data-testid": "sale-estimate"})
is_blocked = len(estimates) == 0
return estimates, is_blocked
# Probably can change the timings here
time.sleep(5)
with SB(uc=True) as sb:
sb.uc_open_with_reconnect(
f"https://www.zoopla.co.uk/property/uprn/{uprn}/",
3,
def parallel_task(url):
# No impersonate argument here
with StealthSession() as session:
estimates, is_blocked = scrape_all_estimates(session, url)
while is_blocked:
print(f"Blocked by Zoopla for URL: {url}")
time.sleep(random.uniform(0, 1))
estimates, is_blocked = scrape_all_estimates(session, url)
low_estimate = estimates[0].find("span", {"data-testid": "low-estimate-blurred"}).text
middle_estimate = estimates[0].find("p", {"data-testid": "estimate-blurred"}).text
high_estimate = estimates[0].find("span", {"data-testid": "high-estimate-blurred"}).text
return {
"URL": url,
"Low Estimate": low_estimate,
"Middle Estimate": middle_estimate,
"High Estimate": high_estimate,
}
def parse_price(p):
p = p.replace("£", "").strip().lower()
if p.endswith("k"):
return float(p[:-1]) * 1000
elif p.endswith("m"):
return float(p[:-1]) * 1_000_000
else:
return float(p)
# def parallel_task(url):
# with StealthSession(impersonate=ENGINES[random.randint(0, 1)]) as session:
# estimates, is_blocked = scrape_all_estimates(session, url)
#
# while is_blocked:
# # Will need to wait and retry if blocked by Zoopla
# print(f"Blocked by Zoopla for URL: {url}")
# sleep_factor = random.uniform(0, 1) # Random delay to avoid detection
# time.sleep(sleep_factor * 1)
# estimates, is_blocked = scrape_all_estimates(session, url)
#
# low_estimate = (
# estimates[0].find("span", {"data-testid": "low-estimate-blurred"}).text
# ) # Find all span elements with data-testid="low-estimate"
# middle_estimate = (
# estimates[0].find("p", {"data-testid": "estimate-blurred"}).text
# ) # Find all span elements with data-testid="middle-estimate"
# high_estimate = (
# estimates[0].find("span", {"data-testid": "high-estimate-blurred"}).text
# ) # Find all span elements with data-testid="high-estimate-blurred"
#
# return {
# "URL": url,
# "Low Estimate": low_estimate,
# "Middle Estimate": middle_estimate,
# "High Estimate": high_estimate,
# }
if __name__ == "__main__":
# Get a SAL
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/NRLA/Property Box/Property Box Finance Portfolio - "
"Standardised.xlsx",
sheet_name="Standardised Asset List"
)
asset_list["epc_os_uprn"] = asset_list["epc_os_uprn"].astype(int).astype(str)
uprns = asset_list["epc_os_uprn"].tolist()
urls = [f"https://www.zoopla.co.uk/property/uprn/{uprn}/" for uprn in uprns]
with Pool(processes=5) as pool:
estimates_list = list(
tqdm(
pool.imap(parallel_task, urls),
total=len(urls),
)
)
soup = sb.get_beautiful_soup()
df = pd.DataFrame(estimates_list)
# Extract UPRN from URL
df["uprn"] = df["URL"].str.extract(r"uprn/(\d+)/")
df["valuation"] = df["Middle Estimate"].apply(parse_price)
df.to_csv("zoopla_estimates.csv", index=False)
estimates = soup.find_all("div", {"data-testid": "sale-estimate"})
# Can change the way we extract the text here
estimate_text = (
estimates[-1].find_all("p")[-1].find_all("span")[-1]["aria-label"]
)
estimate_list.append(estimate_text)
df["uprn"] = df["uprn"].astype(int).astype(str)
asset_list.merge(df[["uprn", "valuation"]], left_on="epc_os_uprn", right_on="uprn", how="left").to_excel(
"Property Box Finance Portfolio - Standardised - with valuations.xlsx", index=False
)

View file

@ -0,0 +1,5 @@
beautifulsoup4>=4.12.0
pandas>=2.0.0
stealth-requests>=1.0.7
tqdm>=4.65.0
openpyxl