mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
updating how we set the simulation config for walls'
This commit is contained in:
parent
27f4e4634f
commit
54b6761803
6 changed files with 173 additions and 45 deletions
2
.idea/Model.iml
generated
2
.idea/Model.iml
generated
|
|
@ -7,7 +7,7 @@
|
|||
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
|
||||
</content>
|
||||
<orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
|
||||
<orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
<component name="PyNamespacePackagesService">
|
||||
|
|
|
|||
2
.idea/misc.xml
generated
2
.idea/misc.xml
generated
|
|
@ -3,7 +3,7 @@
|
|||
<component name="Black">
|
||||
<option name="sdkName" value="Python 3.10 (backend)" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
|
||||
<component name="PythonCompatibilityInspectionAdvertiser">
|
||||
<option name="version" value="3" />
|
||||
</component>
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ import pandas as pd
|
|||
from tqdm import tqdm
|
||||
import Levenshtein
|
||||
from backend.SearchEpc import SearchEpc
|
||||
from utils.s3 import read_dataframe_from_s3_parquet
|
||||
|
||||
# Average value of a property in the midlands in 2024 was £238,000. Since these are EPC F & G properties, we assume
|
||||
# £207,000 since they trade at a discount. This is based on the rightmove study where moving from an EPC F/G -> C has a
|
||||
|
|
@ -248,6 +249,13 @@ def app():
|
|||
"""
|
||||
This script is for scoping property ownership for EPC F & G rated properties in Birmingam, for Goldman Sachs
|
||||
"""
|
||||
|
||||
# TODO: This property:
|
||||
# https://epc.opendatacommunities.org/domestic/search?address=&postcode=&local-authority=&constituency
|
||||
# =&uprn=100031179243&from-month=1&from-year=2008&to-month=12&to-year=2024
|
||||
# is actually listed in two local authorities causing us to think it's an EPC F & G property, but it's
|
||||
# it's actually EPC E. Need to handle this, probably by reading in all of the EPC data, concatenating together
|
||||
# and performing a singular filter for most recent EPC by UPRN
|
||||
# paths = [
|
||||
# "local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv",
|
||||
# "local_data/all-domestic-certificates/domestic-E08000031-Wolverhampton/certificates.csv",
|
||||
|
|
@ -477,6 +485,35 @@ def app():
|
|||
portfolio_epc_data_50m.to_excel("portfolio_epc_data_50m 28th May.xlsx", index=False)
|
||||
portfolio_epc_data_20m.to_excel("portfolio_epc_data_20m 28th May.xlsx", index=False)
|
||||
|
||||
# We check if any of these properties are in a conservation area
|
||||
valuations = pd.read_excel("property value.xlsx")
|
||||
|
||||
uprn_filenames = read_dataframe_from_s3_parquet(
|
||||
bucket_name="retrofit-data-dev", file_key="spatial/filename_meta.parquet"
|
||||
)
|
||||
|
||||
geospatial_data = []
|
||||
for _, row in tqdm(valuations.iterrows(), total=len(valuations)):
|
||||
filtered_df = uprn_filenames[
|
||||
(uprn_filenames["lower"] <= row["UPRN"])
|
||||
& (uprn_filenames["upper"] >= row["UPRN"])
|
||||
]
|
||||
if filtered_df.empty:
|
||||
raise Exception("No match found")
|
||||
|
||||
filename = filtered_df.iloc[0]["filenames"]
|
||||
|
||||
spatial_data = read_dataframe_from_s3_parquet(
|
||||
bucket_name="retrofit-data-dev", file_key=f"spatial/{filename}"
|
||||
)
|
||||
spatial = spatial_data[
|
||||
spatial_data["UPRN"] == row["UPRN"]
|
||||
][["UPRN", "conservation_status", "is_listed_building", "is_heritage_building"]]
|
||||
geospatial_data.append(spatial.to_dict("records")[0])
|
||||
|
||||
geospatial_data = pd.DataFrame(geospatial_data)
|
||||
geospatial_data.to_excel("geospatial_data.xlsx", index=False)
|
||||
|
||||
|
||||
def company_aggregation():
|
||||
company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_04.csv")
|
||||
|
|
|
|||
7
etl/property_valuation/requirements.txt
Normal file
7
etl/property_valuation/requirements.txt
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
seleniumbase
|
||||
beautifulsoup4
|
||||
requests
|
||||
pandas
|
||||
tqdm
|
||||
openpyxl
|
||||
undetected_chromedriver
|
||||
83
etl/property_valuation/scrape_valuations.py
Normal file
83
etl/property_valuation/scrape_valuations.py
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
import requests
|
||||
import random
|
||||
import time
|
||||
import pandas as pd
|
||||
from bs4 import BeautifulSoup
|
||||
from tqdm import tqdm
|
||||
from seleniumbase import Driver
|
||||
from seleniumbase import page_actions
|
||||
|
||||
import undetected_chromedriver as webdriver
|
||||
from selenium.webdriver.chrome.service import Service
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.common.keys import Keys
|
||||
import time
|
||||
import pandas as pd
|
||||
|
||||
BASE_URL = "https://www.zoopla.co.uk/property/uprn/{uprn}/"
|
||||
|
||||
|
||||
def initialize_driver():
|
||||
driver = Driver(headless=True, uc=True) # Set headless to True if you want headless mode
|
||||
return driver
|
||||
|
||||
|
||||
def app():
|
||||
# Read in the starting asset list
|
||||
asset_list = pd.read_excel("portfolio_epc_data_50m 28th May.xlsx")
|
||||
asset_list = asset_list[["UPRN", "ADDRESS", "POSTCODE"]]
|
||||
|
||||
# asset_list.to_excel("property value.xlsx", index=False)
|
||||
|
||||
# Generate the list of urls
|
||||
urls = [BASE_URL.format(uprn=uprn) for uprn in asset_list["UPRN"]]
|
||||
|
||||
driver = webdriver.Chrome()
|
||||
|
||||
driver = initialize_driver()
|
||||
driver.set_page_load_timeout(30) # Increase page load timeout
|
||||
|
||||
result = []
|
||||
for i, (url, uprn) in tqdm(enumerate(zip(urls, asset_list["UPRN"].tolist())), total=len(urls)):
|
||||
|
||||
# Every 10 requests sleep for an extra 7 seconds
|
||||
if len(result) % 10 == 0 and len(result) != 0:
|
||||
time.sleep(7)
|
||||
|
||||
try:
|
||||
|
||||
driver.get(url)
|
||||
page_actions.wait_for_element_visible(driver, "p[data-testid='estimate-blurred']", timeout=30)
|
||||
|
||||
price_element = driver.find_element("css selector", "p[data-testid='estimate-blurred']")
|
||||
price = price_element.get_text(strip=True)
|
||||
|
||||
low_price_element = driver.find_element("css selector", "span[data-testid='low-estimate-blurred']")
|
||||
low_price = low_price_element.get_text(strip=True)
|
||||
|
||||
high_price_element = driver.find_element("css selector", "span[data-testid='high-estimate-blurred']")
|
||||
high_price = high_price_element.get_text(strip=True)
|
||||
|
||||
result.append(
|
||||
{
|
||||
"UPRN": uprn,
|
||||
"price": price,
|
||||
"lower_estimate": low_price,
|
||||
"upper_estimate": high_price
|
||||
}
|
||||
)
|
||||
|
||||
# Sleep a random amount of time between 5 and 20 seconds
|
||||
sleep_time = 5 + (15 * random.random())
|
||||
time.sleep(sleep_time)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Failed to retrieve data for UPRN {uprn} at iteration {i}: {e}")
|
||||
|
||||
# Store the result depending on where we are
|
||||
savepoint = pd.DataFrame(result)
|
||||
savepoint.to_csv(f"savepoint_index_{i}.csv", index=False)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app()
|
||||
|
|
@ -55,23 +55,24 @@ class WallRecommendations(Definitions):
|
|||
NEW_BUILD_INSULATED = 0.75
|
||||
|
||||
# These are the ending descriptions we consider for walls with external insulation
|
||||
# This maps the clean descriptions to the ending descriptions
|
||||
EXTERNALLY_INSULATED_WALL_DESCRIPTIONS = {
|
||||
"solid_brick": "Solid brick, with external insulation",
|
||||
"cob": "Cob, with external insulation",
|
||||
"system_built": "System built, with external insulation",
|
||||
"granite_or_whinstone": 'Granite or whinstone, with external insulation',
|
||||
"sandstone_or_limestone": 'Sandstone or limestone, with external insulation',
|
||||
"timber_frame": "Timber frame, with external insulation"
|
||||
"Cavity wall, as built, insulated": "Cavity wall, filled cavity and external insulation",
|
||||
"Solid brick, as built, no insulation": "Solid brick, with external insulation",
|
||||
"Cob, as built": "Cob, with external insulation",
|
||||
"System built, as built, no insulation": "System built, with external insulation",
|
||||
"Granite or whinstone, as built, no insulation": 'Granite or whinstone, with external insulation',
|
||||
"Timber frame, as built, no insulation": "Timber frame, with external insulation",
|
||||
}
|
||||
|
||||
# These are the ending descriptions we consider for walls with internal insulation
|
||||
INTERNALLY_INSULATED_WALL_DESCRIPTIONS = {
|
||||
"solid_brick": "Solid brick, with internal insulation",
|
||||
"cob": "Cob, with internal insulation",
|
||||
"system_built": "System built, with internal insulation",
|
||||
"granite_or_whinstone": 'Granite or whinstone, with internal insulation',
|
||||
"sandstone_or_limestone": 'Sandstone or limestone, with internal insulation',
|
||||
"timber_frame": "Timber frame, with internal insulation"
|
||||
"Cavity wall, as built, insulated": "Cavity wall, filled cavity and internal insulation",
|
||||
"Solid brick, as built, no insulation": "Solid brick, with internal insulation",
|
||||
"Cob, as built": "Cob, with internal insulation",
|
||||
"System built, as built, no insulation": "System built, with internal insulation",
|
||||
"Granite or whinstone, as built, no insulation": 'Granite or whinstone, with internal insulation',
|
||||
"Timber frame, as built, no insulation": "Timber frame, with internal insulation",
|
||||
}
|
||||
|
||||
def __init__(
|
||||
|
|
@ -302,17 +303,14 @@ class WallRecommendations(Definitions):
|
|||
|
||||
wall_ending_config = WallAttributes("Cavity wall, filled cavity").process()
|
||||
|
||||
simulation_config = {}
|
||||
if self.property.data["walls-energy-eff"] not in ["Good", "Very Good"]:
|
||||
simulation_config = {
|
||||
"walls_energy_eff_ending": "Good",
|
||||
"walls_thermal_transmittance_ending": new_u_value
|
||||
}
|
||||
|
||||
walls_simulation_config = check_simulation_difference(
|
||||
new_config=wall_ending_config, old_config=self.property.walls, prefix="walls_"
|
||||
)
|
||||
|
||||
simulation_config = self.set_starting_simulation_config(
|
||||
wall_ending_config=wall_ending_config
|
||||
)
|
||||
|
||||
simulation_config = {**simulation_config, **walls_simulation_config}
|
||||
|
||||
recommendations.append(
|
||||
|
|
@ -340,30 +338,35 @@ class WallRecommendations(Definitions):
|
|||
self.recommendations = recommendations
|
||||
|
||||
def get_internal_external_wall_description(self, description_map, new_u_value):
|
||||
if self.property.walls["is_solid_brick"]:
|
||||
return description_map["solid_brick"]
|
||||
|
||||
if self.property.walls["is_cob"]:
|
||||
return description_map["cob"]
|
||||
|
||||
if self.property.walls["is_system_built"]:
|
||||
return description_map["system_built"]
|
||||
|
||||
if self.property.walls["is_granite_or_whinstone"]:
|
||||
return description_map["granite_or_whinstone"]
|
||||
|
||||
if self.property.walls["is_sandstone_or_limestone"]:
|
||||
return description_map["sandstone_or_limestone"]
|
||||
|
||||
if self.property.walls["is_timber_frame"]:
|
||||
return description_map["timber_frame"]
|
||||
|
||||
if "Average thermal transmittance" in self.property.walls["clean_description"]:
|
||||
if new_u_value is None:
|
||||
raise ValueError("New u value is None")
|
||||
return f'Average thermal transmittance {new_u_value} W/m-¦K'
|
||||
|
||||
raise NotImplementedError("Not implemented yet")
|
||||
return description_map[self.property.walls["clean_description"]]
|
||||
|
||||
def set_starting_simulation_config(self, wall_ending_config):
|
||||
"""
|
||||
Helper function to set the starting simulation config
|
||||
"""
|
||||
|
||||
simulation_config = {}
|
||||
if self.property.data["walls-energy-eff"] not in ["Good", "Very Good"]:
|
||||
simulation_config = {
|
||||
"walls_energy_eff_ending": "Good"
|
||||
}
|
||||
|
||||
# We check if we have double insulation in any instances
|
||||
double_insulation = (
|
||||
(wall_ending_config["is_filled_cavity"] and wall_ending_config["external_insulation"]) or
|
||||
(wall_ending_config["is_filled_cavity"] and wall_ending_config["internal_insulation"]) or
|
||||
(wall_ending_config["external_insulation"] and wall_ending_config["internal_insulation"])
|
||||
)
|
||||
if double_insulation:
|
||||
simulation_config["walls_energy_eff_ending"] = "Very Good"
|
||||
|
||||
return simulation_config
|
||||
|
||||
def _find_insulation(self, u_value, insulation_materials, non_insulation_materials, phase):
|
||||
|
||||
|
|
@ -425,16 +428,14 @@ class WallRecommendations(Definitions):
|
|||
|
||||
wall_ending_config = WallAttributes(new_description).process()
|
||||
|
||||
simulation_config = {}
|
||||
if self.property.data["walls-energy-eff"] not in ["Good", "Very Good"]:
|
||||
simulation_config = {
|
||||
"walls_energy_eff_ending": "Good"
|
||||
}
|
||||
|
||||
walls_simulation_config = check_simulation_difference(
|
||||
new_config=wall_ending_config, old_config=self.property.walls, prefix="walls_"
|
||||
)
|
||||
|
||||
simulation_config = self.set_starting_simulation_config(
|
||||
wall_ending_config=wall_ending_config
|
||||
)
|
||||
|
||||
simulation_config = {
|
||||
**walls_simulation_config,
|
||||
**simulation_config,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue