mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Merge branch 'main' of https://github.com/Hestia-Homes/Model into kwh-bills-model-fixes
This commit is contained in:
commit
061ecb2c4a
1 changed files with 38 additions and 0 deletions
38
etl/webscrape/Zoopla.py
Normal file
38
etl/webscrape/Zoopla.py
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
# Initial Code
|
||||
|
||||
from seleniumbase import SB
|
||||
import time
|
||||
|
||||
uprns = [
|
||||
100071297618,
|
||||
100080893397,
|
||||
100060778033,
|
||||
200004793081,
|
||||
100071265143,
|
||||
100071297618,
|
||||
100080893397,
|
||||
100060778033,
|
||||
200004793081,
|
||||
100071265143,
|
||||
]
|
||||
|
||||
estimate_list = []
|
||||
|
||||
for uprn in uprns:
|
||||
|
||||
# Probably can change the timings here
|
||||
time.sleep(5)
|
||||
with SB(uc=True) as sb:
|
||||
sb.uc_open_with_reconnect(
|
||||
f"https://www.zoopla.co.uk/property/uprn/{uprn}/",
|
||||
3,
|
||||
)
|
||||
|
||||
soup = sb.get_beautiful_soup()
|
||||
|
||||
estimates = soup.find_all("div", {"data-testid": "sale-estimate"})
|
||||
# Can change the way we extract the text here
|
||||
estimate_text = (
|
||||
estimates[-1].find_all("p")[-1].find_all("span")[-1]["aria-label"]
|
||||
)
|
||||
estimate_list.append(estimate_text)
|
||||
Loading…
Add table
Reference in a new issue