Model/etl/webscrape/Zoopla.py
2024-08-20 16:28:39 +01:00

38 lines
831 B
Python

# Initial Code
from seleniumbase import SB
import time
uprns = [
100071297618,
100080893397,
100060778033,
200004793081,
100071265143,
100071297618,
100080893397,
100060778033,
200004793081,
100071265143,
]
estimate_list = []
for uprn in uprns:
# Probably can change the timings here
time.sleep(5)
with SB(uc=True) as sb:
sb.uc_open_with_reconnect(
f"https://www.zoopla.co.uk/property/uprn/{uprn}/",
3,
)
soup = sb.get_beautiful_soup()
estimates = soup.find_all("div", {"data-testid": "sale-estimate"})
# Can change the way we extract the text here
estimate_text = (
estimates[-1].find_all("p")[-1].find_all("span")[-1]["aria-label"]
)
estimate_list.append(estimate_text)