mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Merge pull request #540 from Hestia-Homes/eco-eligiblity-bug
Fixing boiler upgrade recommendation
This commit is contained in:
commit
82b6e39334
3 changed files with 90 additions and 21 deletions
|
|
@ -43,18 +43,6 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest):
|
|||
logger.error("Failed to parse request body: %s", e)
|
||||
return {"message": "Invalid request"}, 400
|
||||
|
||||
# TODO: Warm up the lambdas here
|
||||
# from backend.ml_models.api import ModelApi
|
||||
# model_api = ModelApi(
|
||||
# portfolio_id=body.portfolio_id,
|
||||
# timestamp="2020-01-01T00:00:00",
|
||||
# prediction_buckets=[],
|
||||
# max_retries=1
|
||||
# )
|
||||
# await model_api.async_warm_up_lambdas(
|
||||
# model_prefies=model_api.KWH_MODEL_PREFIXES + model_api.MODEL_PREFIXES
|
||||
# )
|
||||
|
||||
# If file_format is domna_asset_list and type is xlsx, read and chunk it
|
||||
if data.get("file_format") == "domna_asset_list" and data.get("file_type") == "xlsx":
|
||||
try:
|
||||
|
|
@ -94,6 +82,7 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest):
|
|||
data["scenario_id"] = scenario_id
|
||||
|
||||
for i in range(total_chunks):
|
||||
# Create an entry in the request logs table
|
||||
index_start = i * chunk_size
|
||||
index_end = min((i + 1) * chunk_size, total_rows)
|
||||
|
||||
|
|
|
|||
|
|
@ -6,6 +6,8 @@ import random
|
|||
import os
|
||||
from multiprocessing import Pool
|
||||
from tqdm import tqdm
|
||||
import re
|
||||
import json
|
||||
|
||||
ENGINES = ["safari", "chrome"]
|
||||
CACHE_DIR = "zoopla_cache"
|
||||
|
|
@ -17,13 +19,69 @@ def random_delay():
|
|||
time.sleep(random.uniform(0.5, 2))
|
||||
|
||||
|
||||
def extract_feature(soup, icon_id):
|
||||
tag = soup.find("use", href=f"#{icon_id}")
|
||||
if tag:
|
||||
parent = tag.find_parent("div", class_="_1pbf8i53")
|
||||
if parent:
|
||||
text = parent.get_text(strip=True)
|
||||
return text
|
||||
return None
|
||||
|
||||
|
||||
def extract_embedded_json(text):
|
||||
"""
|
||||
Extract embedded property JSON containing attributes, energy, estimates, and sales history.
|
||||
"""
|
||||
# Try to grab everything after "attributes"
|
||||
match = re.search(
|
||||
r'"attributes"\s*:\s*\{.*?\}\s*,.*?"historicSales".*?\]',
|
||||
text,
|
||||
re.DOTALL
|
||||
)
|
||||
if match:
|
||||
snippet = "{" + match.group(0) + "}"
|
||||
snippet = re.sub(r"\\u0022", '"', snippet)
|
||||
snippet = re.sub(r",(\s*[}\]])", r"\1", snippet)
|
||||
try:
|
||||
return json.loads(snippet)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# fallback for independent keys
|
||||
result = {}
|
||||
for key in [
|
||||
"attributes", "energy", "rentEstimate",
|
||||
"saleEstimate", "saleHistory", "historicSales"
|
||||
]:
|
||||
key_match = re.search(rf'"{key}"\s*:\s*(\{{.*?\}}|\[.*?\])', text, re.DOTALL)
|
||||
if key_match:
|
||||
try:
|
||||
result[key] = json.loads(key_match.group(1))
|
||||
except Exception:
|
||||
pass
|
||||
return result
|
||||
|
||||
|
||||
def scrape_all_estimates(session, url):
|
||||
"""Scrape valuation estimates for one Zoopla property URL."""
|
||||
resp = session.get(url, impersonate=random.choice(ENGINES))
|
||||
html = resp.text
|
||||
page_source = BeautifulSoup(resp.text, "html.parser")
|
||||
estimates = page_source.find_all("div", {"data-testid": "sale-estimate"})
|
||||
|
||||
data = extract_embedded_json(html)
|
||||
|
||||
is_blocked = len(estimates) == 0
|
||||
return estimates, is_blocked, resp.text
|
||||
|
||||
return {
|
||||
"estimates": estimates,
|
||||
"is_blocked": is_blocked,
|
||||
"response_html": html,
|
||||
"attributes": data.get("attributes"),
|
||||
"rent": data.get("rentEstimate"),
|
||||
"historicSales": data.get("historicSales"),
|
||||
}
|
||||
|
||||
|
||||
def extract_estimates(estimates):
|
||||
|
|
@ -50,19 +108,36 @@ def parallel_task(url):
|
|||
html = open(cache_path, "r").read()
|
||||
page_source = BeautifulSoup(html, "html.parser")
|
||||
estimates = page_source.find_all("div", {"data-testid": "sale-estimate"})
|
||||
data = extract_embedded_json(html)
|
||||
history_sales = data.get("historicSales", [{}])
|
||||
if len(history_sales) == 0:
|
||||
history_sales = [{}]
|
||||
|
||||
if estimates:
|
||||
low, mid, high = extract_estimates(estimates)
|
||||
return {"URL": url, "Low Estimate": low, "Middle Estimate": mid, "High Estimate": high}
|
||||
return {
|
||||
"URL": url, "Low Estimate": low, "Middle Estimate": mid, "High Estimate": high,
|
||||
**data.get("attributes", {}), **data.get("rentEstimate", {}),
|
||||
**history_sales[0]
|
||||
}
|
||||
|
||||
# Otherwise scrape live
|
||||
with StealthSession() as session:
|
||||
attempts = 0
|
||||
while attempts < 5:
|
||||
estimates, is_blocked, html = scrape_all_estimates(session, url)
|
||||
if not is_blocked and estimates:
|
||||
open(cache_path, "w").write(html)
|
||||
low, mid, high = extract_estimates(estimates)
|
||||
return {"URL": url, "Low Estimate": low, "Middle Estimate": mid, "High Estimate": high}
|
||||
output = scrape_all_estimates(session, url)
|
||||
if not output["is_blocked"] and output["estimates"]:
|
||||
open(cache_path, "w").write(output["html"])
|
||||
low, mid, high = extract_estimates(output["estimates"])
|
||||
history_sales = output.get("historicSales", [{}])
|
||||
if len(history_sales) == 0:
|
||||
history_sales = [{}]
|
||||
return {
|
||||
"URL": url, "Low Estimate": low, "Middle Estimate": mid, "High Estimate": high,
|
||||
**output.get("attributes", {}),
|
||||
**output.get("rent", {}),
|
||||
**history_sales[0]
|
||||
}
|
||||
attempts += 1
|
||||
print(f"[Attempt {attempts}] Blocked or empty for {url}")
|
||||
random_delay()
|
||||
|
|
@ -108,6 +183,9 @@ if __name__ == "__main__":
|
|||
)
|
||||
|
||||
df = pd.DataFrame(estimates_list)
|
||||
|
||||
print(df.head())
|
||||
|
||||
df["uprn"] = df["URL"].str.extract(r"uprn/(\d+)/")
|
||||
df["valuation"] = df["Middle Estimate"].apply(parse_price)
|
||||
|
||||
|
|
|
|||
|
|
@ -1257,9 +1257,11 @@ class HeatingRecommender:
|
|||
# If there is not a system change, we add the boiler recommendation at point.
|
||||
self.heating_recommendations.extend([boiler_recommendation])
|
||||
|
||||
if system_change:
|
||||
if system_change and len(boiler_recommendation):
|
||||
# We combine the heating and controls recommendations, in the case of a system change
|
||||
# If this is true, we set SAP points to None and survey to False for the boiler recommendation
|
||||
# If this is true, we set SAP points to None and survey to False for the boiler recommendation.
|
||||
# We check if we actually have a boiler recommendation as we may not if the heating and hot water
|
||||
# are already efficient enough
|
||||
|
||||
combined_recommendations = []
|
||||
for controls_recommendation in controls_recommender.recommendation:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue