diff --git a/.idea/Model.iml b/.idea/Model.iml
index b0f9c00d..4413bb06 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
-
+
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 1122b380..6f308057 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
-
+
diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py
index d30205ae..44fa7142 100644
--- a/etl/customers/goldman/property_ownership.py
+++ b/etl/customers/goldman/property_ownership.py
@@ -3,6 +3,7 @@ import pandas as pd
from tqdm import tqdm
import Levenshtein
from backend.SearchEpc import SearchEpc
+from utils.s3 import read_dataframe_from_s3_parquet
# Average value of a property in the midlands in 2024 was £238,000. Since these are EPC F & G properties, we assume
# £207,000 since they trade at a discount. This is based on the rightmove study where moving from an EPC F/G -> C has a
@@ -248,6 +249,13 @@ def app():
"""
This script is for scoping property ownership for EPC F & G rated properties in Birmingam, for Goldman Sachs
"""
+
+ # TODO: This property:
+ # https://epc.opendatacommunities.org/domestic/search?address=&postcode=&local-authority=&constituency
+ # =&uprn=100031179243&from-month=1&from-year=2008&to-month=12&to-year=2024
+ # is actually listed in two local authorities causing us to think it's an EPC F & G property, but it's
+ # it's actually EPC E. Need to handle this, probably by reading in all of the EPC data, concatenating together
+ # and performing a singular filter for most recent EPC by UPRN
# paths = [
# "local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv",
# "local_data/all-domestic-certificates/domestic-E08000031-Wolverhampton/certificates.csv",
@@ -477,6 +485,35 @@ def app():
portfolio_epc_data_50m.to_excel("portfolio_epc_data_50m 28th May.xlsx", index=False)
portfolio_epc_data_20m.to_excel("portfolio_epc_data_20m 28th May.xlsx", index=False)
+ # We check if any of these properties are in a conservation area
+ valuations = pd.read_excel("property value.xlsx")
+
+ uprn_filenames = read_dataframe_from_s3_parquet(
+ bucket_name="retrofit-data-dev", file_key="spatial/filename_meta.parquet"
+ )
+
+ geospatial_data = []
+ for _, row in tqdm(valuations.iterrows(), total=len(valuations)):
+ filtered_df = uprn_filenames[
+ (uprn_filenames["lower"] <= row["UPRN"])
+ & (uprn_filenames["upper"] >= row["UPRN"])
+ ]
+ if filtered_df.empty:
+ raise Exception("No match found")
+
+ filename = filtered_df.iloc[0]["filenames"]
+
+ spatial_data = read_dataframe_from_s3_parquet(
+ bucket_name="retrofit-data-dev", file_key=f"spatial/{filename}"
+ )
+ spatial = spatial_data[
+ spatial_data["UPRN"] == row["UPRN"]
+ ][["UPRN", "conservation_status", "is_listed_building", "is_heritage_building"]]
+ geospatial_data.append(spatial.to_dict("records")[0])
+
+ geospatial_data = pd.DataFrame(geospatial_data)
+ geospatial_data.to_excel("geospatial_data.xlsx", index=False)
+
def company_aggregation():
company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_04.csv")
diff --git a/etl/property_valuation/requirements.txt b/etl/property_valuation/requirements.txt
new file mode 100644
index 00000000..8a4a1924
--- /dev/null
+++ b/etl/property_valuation/requirements.txt
@@ -0,0 +1,7 @@
+seleniumbase
+beautifulsoup4
+requests
+pandas
+tqdm
+openpyxl
+undetected_chromedriver
\ No newline at end of file
diff --git a/etl/property_valuation/scrape_valuations.py b/etl/property_valuation/scrape_valuations.py
new file mode 100644
index 00000000..67713a4e
--- /dev/null
+++ b/etl/property_valuation/scrape_valuations.py
@@ -0,0 +1,83 @@
+import requests
+import random
+import time
+import pandas as pd
+from bs4 import BeautifulSoup
+from tqdm import tqdm
+from seleniumbase import Driver
+from seleniumbase import page_actions
+
+import undetected_chromedriver as webdriver
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.common.by import By
+from selenium.webdriver.common.keys import Keys
+import time
+import pandas as pd
+
+BASE_URL = "https://www.zoopla.co.uk/property/uprn/{uprn}/"
+
+
+def initialize_driver():
+ driver = Driver(headless=True, uc=True) # Set headless to True if you want headless mode
+ return driver
+
+
+def app():
+ # Read in the starting asset list
+ asset_list = pd.read_excel("portfolio_epc_data_50m 28th May.xlsx")
+ asset_list = asset_list[["UPRN", "ADDRESS", "POSTCODE"]]
+
+ # asset_list.to_excel("property value.xlsx", index=False)
+
+ # Generate the list of urls
+ urls = [BASE_URL.format(uprn=uprn) for uprn in asset_list["UPRN"]]
+
+ driver = webdriver.Chrome()
+
+ driver = initialize_driver()
+ driver.set_page_load_timeout(30) # Increase page load timeout
+
+ result = []
+ for i, (url, uprn) in tqdm(enumerate(zip(urls, asset_list["UPRN"].tolist())), total=len(urls)):
+
+ # Every 10 requests sleep for an extra 7 seconds
+ if len(result) % 10 == 0 and len(result) != 0:
+ time.sleep(7)
+
+ try:
+
+ driver.get(url)
+ page_actions.wait_for_element_visible(driver, "p[data-testid='estimate-blurred']", timeout=30)
+
+ price_element = driver.find_element("css selector", "p[data-testid='estimate-blurred']")
+ price = price_element.get_text(strip=True)
+
+ low_price_element = driver.find_element("css selector", "span[data-testid='low-estimate-blurred']")
+ low_price = low_price_element.get_text(strip=True)
+
+ high_price_element = driver.find_element("css selector", "span[data-testid='high-estimate-blurred']")
+ high_price = high_price_element.get_text(strip=True)
+
+ result.append(
+ {
+ "UPRN": uprn,
+ "price": price,
+ "lower_estimate": low_price,
+ "upper_estimate": high_price
+ }
+ )
+
+ # Sleep a random amount of time between 5 and 20 seconds
+ sleep_time = 5 + (15 * random.random())
+ time.sleep(sleep_time)
+
+ except Exception as e:
+ print(f"Failed to retrieve data for UPRN {uprn} at iteration {i}: {e}")
+
+ # Store the result depending on where we are
+ savepoint = pd.DataFrame(result)
+ savepoint.to_csv(f"savepoint_index_{i}.csv", index=False)
+
+
+if __name__ == "__main__":
+ app()
diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py
index 243a5edb..939bef80 100644
--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@@ -55,23 +55,24 @@ class WallRecommendations(Definitions):
NEW_BUILD_INSULATED = 0.75
# These are the ending descriptions we consider for walls with external insulation
+ # This maps the clean descriptions to the ending descriptions
EXTERNALLY_INSULATED_WALL_DESCRIPTIONS = {
- "solid_brick": "Solid brick, with external insulation",
- "cob": "Cob, with external insulation",
- "system_built": "System built, with external insulation",
- "granite_or_whinstone": 'Granite or whinstone, with external insulation',
- "sandstone_or_limestone": 'Sandstone or limestone, with external insulation',
- "timber_frame": "Timber frame, with external insulation"
+ "Cavity wall, as built, insulated": "Cavity wall, filled cavity and external insulation",
+ "Solid brick, as built, no insulation": "Solid brick, with external insulation",
+ "Cob, as built": "Cob, with external insulation",
+ "System built, as built, no insulation": "System built, with external insulation",
+ "Granite or whinstone, as built, no insulation": 'Granite or whinstone, with external insulation',
+ "Timber frame, as built, no insulation": "Timber frame, with external insulation",
}
# These are the ending descriptions we consider for walls with internal insulation
INTERNALLY_INSULATED_WALL_DESCRIPTIONS = {
- "solid_brick": "Solid brick, with internal insulation",
- "cob": "Cob, with internal insulation",
- "system_built": "System built, with internal insulation",
- "granite_or_whinstone": 'Granite or whinstone, with internal insulation',
- "sandstone_or_limestone": 'Sandstone or limestone, with internal insulation',
- "timber_frame": "Timber frame, with internal insulation"
+ "Cavity wall, as built, insulated": "Cavity wall, filled cavity and internal insulation",
+ "Solid brick, as built, no insulation": "Solid brick, with internal insulation",
+ "Cob, as built": "Cob, with internal insulation",
+ "System built, as built, no insulation": "System built, with internal insulation",
+ "Granite or whinstone, as built, no insulation": 'Granite or whinstone, with internal insulation',
+ "Timber frame, as built, no insulation": "Timber frame, with internal insulation",
}
def __init__(
@@ -302,17 +303,14 @@ class WallRecommendations(Definitions):
wall_ending_config = WallAttributes("Cavity wall, filled cavity").process()
- simulation_config = {}
- if self.property.data["walls-energy-eff"] not in ["Good", "Very Good"]:
- simulation_config = {
- "walls_energy_eff_ending": "Good",
- "walls_thermal_transmittance_ending": new_u_value
- }
-
walls_simulation_config = check_simulation_difference(
new_config=wall_ending_config, old_config=self.property.walls, prefix="walls_"
)
+ simulation_config = self.set_starting_simulation_config(
+ wall_ending_config=wall_ending_config
+ )
+
simulation_config = {**simulation_config, **walls_simulation_config}
recommendations.append(
@@ -340,30 +338,35 @@ class WallRecommendations(Definitions):
self.recommendations = recommendations
def get_internal_external_wall_description(self, description_map, new_u_value):
- if self.property.walls["is_solid_brick"]:
- return description_map["solid_brick"]
-
- if self.property.walls["is_cob"]:
- return description_map["cob"]
-
- if self.property.walls["is_system_built"]:
- return description_map["system_built"]
-
- if self.property.walls["is_granite_or_whinstone"]:
- return description_map["granite_or_whinstone"]
-
- if self.property.walls["is_sandstone_or_limestone"]:
- return description_map["sandstone_or_limestone"]
-
- if self.property.walls["is_timber_frame"]:
- return description_map["timber_frame"]
if "Average thermal transmittance" in self.property.walls["clean_description"]:
if new_u_value is None:
raise ValueError("New u value is None")
return f'Average thermal transmittance {new_u_value} W/m-¦K'
- raise NotImplementedError("Not implemented yet")
+ return description_map[self.property.walls["clean_description"]]
+
+ def set_starting_simulation_config(self, wall_ending_config):
+ """
+ Helper function to set the starting simulation config
+ """
+
+ simulation_config = {}
+ if self.property.data["walls-energy-eff"] not in ["Good", "Very Good"]:
+ simulation_config = {
+ "walls_energy_eff_ending": "Good"
+ }
+
+ # We check if we have double insulation in any instances
+ double_insulation = (
+ (wall_ending_config["is_filled_cavity"] and wall_ending_config["external_insulation"]) or
+ (wall_ending_config["is_filled_cavity"] and wall_ending_config["internal_insulation"]) or
+ (wall_ending_config["external_insulation"] and wall_ending_config["internal_insulation"])
+ )
+ if double_insulation:
+ simulation_config["walls_energy_eff_ending"] = "Very Good"
+
+ return simulation_config
def _find_insulation(self, u_value, insulation_materials, non_insulation_materials, phase):
@@ -425,16 +428,14 @@ class WallRecommendations(Definitions):
wall_ending_config = WallAttributes(new_description).process()
- simulation_config = {}
- if self.property.data["walls-energy-eff"] not in ["Good", "Very Good"]:
- simulation_config = {
- "walls_energy_eff_ending": "Good"
- }
-
walls_simulation_config = check_simulation_difference(
new_config=wall_ending_config, old_config=self.property.walls, prefix="walls_"
)
+ simulation_config = self.set_starting_simulation_config(
+ wall_ending_config=wall_ending_config
+ )
+
simulation_config = {
**walls_simulation_config,
**simulation_config,