Merge branch 'main' of https://github.com/Hestia-Homes/Model into kwh-bills-model-fixes

This commit is contained in:
Khalim Conn-Kowlessar 2024-08-20 17:45:29 +01:00
commit 061ecb2c4a

38
etl/webscrape/Zoopla.py Normal file
View file

@ -0,0 +1,38 @@
# Initial Code
from seleniumbase import SB
import time
uprns = [
100071297618,
100080893397,
100060778033,
200004793081,
100071265143,
100071297618,
100080893397,
100060778033,
200004793081,
100071265143,
]
estimate_list = []
for uprn in uprns:
# Probably can change the timings here
time.sleep(5)
with SB(uc=True) as sb:
sb.uc_open_with_reconnect(
f"https://www.zoopla.co.uk/property/uprn/{uprn}/",
3,
)
soup = sb.get_beautiful_soup()
estimates = soup.find_all("div", {"data-testid": "sale-estimate"})
# Can change the way we extract the text here
estimate_text = (
estimates[-1].find_all("p")[-1].find_all("span")[-1]["aria-label"]
)
estimate_list.append(estimate_text)