updating how we set the simulation config for walls'

This commit is contained in:
Khalim Conn-Kowlessar 2024-05-30 11:55:52 +01:00
parent 27f4e4634f
commit 54b6761803
6 changed files with 173 additions and 45 deletions

2
.idea/Model.iml generated
View file

@ -7,7 +7,7 @@
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
</content>
<orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyNamespacePackagesService">

2
.idea/misc.xml generated
View file

@ -3,7 +3,7 @@
<component name="Black">
<option name="sdkName" value="Python 3.10 (backend)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
<component name="PythonCompatibilityInspectionAdvertiser">
<option name="version" value="3" />
</component>

View file

@ -3,6 +3,7 @@ import pandas as pd
from tqdm import tqdm
import Levenshtein
from backend.SearchEpc import SearchEpc
from utils.s3 import read_dataframe_from_s3_parquet
# Average value of a property in the midlands in 2024 was £238,000. Since these are EPC F & G properties, we assume
# £207,000 since they trade at a discount. This is based on the rightmove study where moving from an EPC F/G -> C has a
@ -248,6 +249,13 @@ def app():
"""
This script is for scoping property ownership for EPC F & G rated properties in Birmingam, for Goldman Sachs
"""
# TODO: This property:
# https://epc.opendatacommunities.org/domestic/search?address=&postcode=&local-authority=&constituency
# =&uprn=100031179243&from-month=1&from-year=2008&to-month=12&to-year=2024
# is actually listed in two local authorities causing us to think it's an EPC F & G property, but it's
# it's actually EPC E. Need to handle this, probably by reading in all of the EPC data, concatenating together
# and performing a singular filter for most recent EPC by UPRN
# paths = [
# "local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv",
# "local_data/all-domestic-certificates/domestic-E08000031-Wolverhampton/certificates.csv",
@ -477,6 +485,35 @@ def app():
portfolio_epc_data_50m.to_excel("portfolio_epc_data_50m 28th May.xlsx", index=False)
portfolio_epc_data_20m.to_excel("portfolio_epc_data_20m 28th May.xlsx", index=False)
# We check if any of these properties are in a conservation area
valuations = pd.read_excel("property value.xlsx")
uprn_filenames = read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev", file_key="spatial/filename_meta.parquet"
)
geospatial_data = []
for _, row in tqdm(valuations.iterrows(), total=len(valuations)):
filtered_df = uprn_filenames[
(uprn_filenames["lower"] <= row["UPRN"])
& (uprn_filenames["upper"] >= row["UPRN"])
]
if filtered_df.empty:
raise Exception("No match found")
filename = filtered_df.iloc[0]["filenames"]
spatial_data = read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev", file_key=f"spatial/{filename}"
)
spatial = spatial_data[
spatial_data["UPRN"] == row["UPRN"]
][["UPRN", "conservation_status", "is_listed_building", "is_heritage_building"]]
geospatial_data.append(spatial.to_dict("records")[0])
geospatial_data = pd.DataFrame(geospatial_data)
geospatial_data.to_excel("geospatial_data.xlsx", index=False)
def company_aggregation():
company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_04.csv")

View file

@ -0,0 +1,7 @@
seleniumbase
beautifulsoup4
requests
pandas
tqdm
openpyxl
undetected_chromedriver

View file

@ -0,0 +1,83 @@
import requests
import random
import time
import pandas as pd
from bs4 import BeautifulSoup
from tqdm import tqdm
from seleniumbase import Driver
from seleniumbase import page_actions
import undetected_chromedriver as webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd
BASE_URL = "https://www.zoopla.co.uk/property/uprn/{uprn}/"
def initialize_driver():
driver = Driver(headless=True, uc=True) # Set headless to True if you want headless mode
return driver
def app():
# Read in the starting asset list
asset_list = pd.read_excel("portfolio_epc_data_50m 28th May.xlsx")
asset_list = asset_list[["UPRN", "ADDRESS", "POSTCODE"]]
# asset_list.to_excel("property value.xlsx", index=False)
# Generate the list of urls
urls = [BASE_URL.format(uprn=uprn) for uprn in asset_list["UPRN"]]
driver = webdriver.Chrome()
driver = initialize_driver()
driver.set_page_load_timeout(30) # Increase page load timeout
result = []
for i, (url, uprn) in tqdm(enumerate(zip(urls, asset_list["UPRN"].tolist())), total=len(urls)):
# Every 10 requests sleep for an extra 7 seconds
if len(result) % 10 == 0 and len(result) != 0:
time.sleep(7)
try:
driver.get(url)
page_actions.wait_for_element_visible(driver, "p[data-testid='estimate-blurred']", timeout=30)
price_element = driver.find_element("css selector", "p[data-testid='estimate-blurred']")
price = price_element.get_text(strip=True)
low_price_element = driver.find_element("css selector", "span[data-testid='low-estimate-blurred']")
low_price = low_price_element.get_text(strip=True)
high_price_element = driver.find_element("css selector", "span[data-testid='high-estimate-blurred']")
high_price = high_price_element.get_text(strip=True)
result.append(
{
"UPRN": uprn,
"price": price,
"lower_estimate": low_price,
"upper_estimate": high_price
}
)
# Sleep a random amount of time between 5 and 20 seconds
sleep_time = 5 + (15 * random.random())
time.sleep(sleep_time)
except Exception as e:
print(f"Failed to retrieve data for UPRN {uprn} at iteration {i}: {e}")
# Store the result depending on where we are
savepoint = pd.DataFrame(result)
savepoint.to_csv(f"savepoint_index_{i}.csv", index=False)
if __name__ == "__main__":
app()

View file

@ -55,23 +55,24 @@ class WallRecommendations(Definitions):
NEW_BUILD_INSULATED = 0.75
# These are the ending descriptions we consider for walls with external insulation
# This maps the clean descriptions to the ending descriptions
EXTERNALLY_INSULATED_WALL_DESCRIPTIONS = {
"solid_brick": "Solid brick, with external insulation",
"cob": "Cob, with external insulation",
"system_built": "System built, with external insulation",
"granite_or_whinstone": 'Granite or whinstone, with external insulation',
"sandstone_or_limestone": 'Sandstone or limestone, with external insulation',
"timber_frame": "Timber frame, with external insulation"
"Cavity wall, as built, insulated": "Cavity wall, filled cavity and external insulation",
"Solid brick, as built, no insulation": "Solid brick, with external insulation",
"Cob, as built": "Cob, with external insulation",
"System built, as built, no insulation": "System built, with external insulation",
"Granite or whinstone, as built, no insulation": 'Granite or whinstone, with external insulation',
"Timber frame, as built, no insulation": "Timber frame, with external insulation",
}
# These are the ending descriptions we consider for walls with internal insulation
INTERNALLY_INSULATED_WALL_DESCRIPTIONS = {
"solid_brick": "Solid brick, with internal insulation",
"cob": "Cob, with internal insulation",
"system_built": "System built, with internal insulation",
"granite_or_whinstone": 'Granite or whinstone, with internal insulation',
"sandstone_or_limestone": 'Sandstone or limestone, with internal insulation',
"timber_frame": "Timber frame, with internal insulation"
"Cavity wall, as built, insulated": "Cavity wall, filled cavity and internal insulation",
"Solid brick, as built, no insulation": "Solid brick, with internal insulation",
"Cob, as built": "Cob, with internal insulation",
"System built, as built, no insulation": "System built, with internal insulation",
"Granite or whinstone, as built, no insulation": 'Granite or whinstone, with internal insulation',
"Timber frame, as built, no insulation": "Timber frame, with internal insulation",
}
def __init__(
@ -302,17 +303,14 @@ class WallRecommendations(Definitions):
wall_ending_config = WallAttributes("Cavity wall, filled cavity").process()
simulation_config = {}
if self.property.data["walls-energy-eff"] not in ["Good", "Very Good"]:
simulation_config = {
"walls_energy_eff_ending": "Good",
"walls_thermal_transmittance_ending": new_u_value
}
walls_simulation_config = check_simulation_difference(
new_config=wall_ending_config, old_config=self.property.walls, prefix="walls_"
)
simulation_config = self.set_starting_simulation_config(
wall_ending_config=wall_ending_config
)
simulation_config = {**simulation_config, **walls_simulation_config}
recommendations.append(
@ -340,30 +338,35 @@ class WallRecommendations(Definitions):
self.recommendations = recommendations
def get_internal_external_wall_description(self, description_map, new_u_value):
if self.property.walls["is_solid_brick"]:
return description_map["solid_brick"]
if self.property.walls["is_cob"]:
return description_map["cob"]
if self.property.walls["is_system_built"]:
return description_map["system_built"]
if self.property.walls["is_granite_or_whinstone"]:
return description_map["granite_or_whinstone"]
if self.property.walls["is_sandstone_or_limestone"]:
return description_map["sandstone_or_limestone"]
if self.property.walls["is_timber_frame"]:
return description_map["timber_frame"]
if "Average thermal transmittance" in self.property.walls["clean_description"]:
if new_u_value is None:
raise ValueError("New u value is None")
return f'Average thermal transmittance {new_u_value} W/m-¦K'
raise NotImplementedError("Not implemented yet")
return description_map[self.property.walls["clean_description"]]
def set_starting_simulation_config(self, wall_ending_config):
"""
Helper function to set the starting simulation config
"""
simulation_config = {}
if self.property.data["walls-energy-eff"] not in ["Good", "Very Good"]:
simulation_config = {
"walls_energy_eff_ending": "Good"
}
# We check if we have double insulation in any instances
double_insulation = (
(wall_ending_config["is_filled_cavity"] and wall_ending_config["external_insulation"]) or
(wall_ending_config["is_filled_cavity"] and wall_ending_config["internal_insulation"]) or
(wall_ending_config["external_insulation"] and wall_ending_config["internal_insulation"])
)
if double_insulation:
simulation_config["walls_energy_eff_ending"] = "Very Good"
return simulation_config
def _find_insulation(self, u_value, insulation_materials, non_insulation_materials, phase):
@ -425,16 +428,14 @@ class WallRecommendations(Definitions):
wall_ending_config = WallAttributes(new_description).process()
simulation_config = {}
if self.property.data["walls-energy-eff"] not in ["Good", "Very Good"]:
simulation_config = {
"walls_energy_eff_ending": "Good"
}
walls_simulation_config = check_simulation_difference(
new_config=wall_ending_config, old_config=self.property.walls, prefix="walls_"
)
simulation_config = self.set_starting_simulation_config(
wall_ending_config=wall_ending_config
)
simulation_config = {
**walls_simulation_config,
**simulation_config,