From a69ec1dd6b53422490bf9171c5965573f7f110c5 Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Tue, 20 Aug 2024 16:28:39 +0100 Subject: [PATCH] add basic script for scraping zoopla --- etl/webscrape/Zoopla.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 etl/webscrape/Zoopla.py diff --git a/etl/webscrape/Zoopla.py b/etl/webscrape/Zoopla.py new file mode 100644 index 00000000..bb86c759 --- /dev/null +++ b/etl/webscrape/Zoopla.py @@ -0,0 +1,38 @@ +# Initial Code + +from seleniumbase import SB +import time + +uprns = [ + 100071297618, + 100080893397, + 100060778033, + 200004793081, + 100071265143, + 100071297618, + 100080893397, + 100060778033, + 200004793081, + 100071265143, +] + +estimate_list = [] + +for uprn in uprns: + + # Probably can change the timings here + time.sleep(5) + with SB(uc=True) as sb: + sb.uc_open_with_reconnect( + f"https://www.zoopla.co.uk/property/uprn/{uprn}/", + 3, + ) + + soup = sb.get_beautiful_soup() + + estimates = soup.find_all("div", {"data-testid": "sale-estimate"}) + # Can change the way we extract the text here + estimate_text = ( + estimates[-1].find_all("p")[-1].find_all("span")[-1]["aria-label"] + ) + estimate_list.append(estimate_text)