From a69ec1dd6b53422490bf9171c5965573f7f110c5 Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Tue, 20 Aug 2024 16:28:39 +0100
Subject: [PATCH] add basic script for scraping zoopla

---
 etl/webscrape/Zoopla.py | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 etl/webscrape/Zoopla.py

diff --git a/etl/webscrape/Zoopla.py b/etl/webscrape/Zoopla.py
new file mode 100644
index 00000000..bb86c759
--- /dev/null
+++ b/etl/webscrape/Zoopla.py
@@ -0,0 +1,38 @@
+# Initial Code
+
+from seleniumbase import SB
+import time
+
+uprns = [
+    100071297618,
+    100080893397,
+    100060778033,
+    200004793081,
+    100071265143,
+    100071297618,
+    100080893397,
+    100060778033,
+    200004793081,
+    100071265143,
+]
+
+estimate_list = []
+
+for uprn in uprns:
+
+    # Probably can change the timings here
+    time.sleep(5)
+    with SB(uc=True) as sb:
+        sb.uc_open_with_reconnect(
+            f"https://www.zoopla.co.uk/property/uprn/{uprn}/",
+            3,
+        )
+
+        soup = sb.get_beautiful_soup()
+
+        estimates = soup.find_all("div", {"data-testid": "sale-estimate"})
+        # Can change the way we extract the text here
+        estimate_text = (
+            estimates[-1].find_all("p")[-1].find_all("span")[-1]["aria-label"]
+        )
+        estimate_list.append(estimate_text)