From 4ffe09f5b8b8359bbbaf2d57105b1737aa9239c7 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 21 Apr 2026 22:50:47 +0100 Subject: [PATCH] created triggering backend script and handling find epc errors --- backend/scripts/retrigger_backend.py | 74 ++++++++++++++++++++++++++++ etl/find_my_epc/RetrieveFindMyEpc.py | 3 +- etl/webscrape/Zoopla.py | 20 ++++---- 3 files changed, 86 insertions(+), 11 deletions(-) create mode 100644 backend/scripts/retrigger_backend.py diff --git a/backend/scripts/retrigger_backend.py b/backend/scripts/retrigger_backend.py new file mode 100644 index 00000000..4ee0d06a --- /dev/null +++ b/backend/scripts/retrigger_backend.py @@ -0,0 +1,74 @@ +import csv +import ast +import re +import json +import boto3 +from backend.app.db.models.tasks import SourceEnum +from backend.app.db.functions.tasks.Tasks import TasksInterface + +SQS_QUEUE_URL = "https://sqs.eu-west-2.amazonaws.com/337213553626/model-engine-queue" +file_path = "/Users/khalimconn-kowlessar/Downloads/assessment-model-log-export-2026-04-21T19-57-35.csv" + +payloads = [] + + +def clean_js_object(js_str): + # Extract object after "Triggering plan with body:" + match = re.search(r"Triggering plan with body:\s*(\{.*\})", js_str, re.DOTALL) + if not match: + return None + + obj = match.group(1) + + # Convert JS → Python + obj = re.sub(r"(\w+):", r'"\1":', obj) # quote keys + obj = obj.replace("true", "True") + obj = obj.replace("false", "False") + obj = obj.replace("null", "None") + + return obj + + +with open(file_path, newline='', encoding='utf-8') as csvfile: + reader = csv.DictReader(csvfile) + + for row in reader: + message = row.get("message", "") + + if "Triggering plan with body:" in message: + cleaned = clean_js_object(message) + if cleaned: + try: + payload = ast.literal_eval(cleaned) + payloads.append(payload) + except Exception as e: + print("Failed to parse:", e) + +# Print ready-to-copy output +print("payloads = [") +for p in payloads: + print(f" {p},") +print("]") + +# For this, we don't retrigger portfolio 685 since that was internal +payloads_to_retrigger = [x for x in payloads if x["portfolio_id"] != "685"] + +sqs_client = boto3.client("sqs") + +# Re-trigger: +for payload in payloads_to_retrigger: + task_id, subtask_id = TasksInterface.create_task( + task_source="backend/plan/router.py:trigger_plan_entrypoint", + service="plan_engine", + inputs=payload, + task_only=False, + source=SourceEnum.PORTFOLIO, + source_id=str(payload["portfolio_id"]), + ) + payload["task_id"] = str(task_id) + payload["subtask_id"] = str(subtask_id) + message_body = json.dumps(payload) + response = sqs_client.send_message( + QueueUrl=SQS_QUEUE_URL, MessageBody=message_body + ) + print(f"SQS message sent. Message ID: {response.get('MessageId')}") diff --git a/etl/find_my_epc/RetrieveFindMyEpc.py b/etl/find_my_epc/RetrieveFindMyEpc.py index e6e4e5fd..594c5347 100644 --- a/etl/find_my_epc/RetrieveFindMyEpc.py +++ b/etl/find_my_epc/RetrieveFindMyEpc.py @@ -784,6 +784,7 @@ class RetrieveFindMyEpc: "Install cavity wall insulation": ["cavity_wall_insulation"], "Install solar water heating": ["solar_water_heating"], 'Install photovoltaics, 25% of roof area': ["solar_pv"], + "Internal and cavity wall insulation": ["internal_wall_insulation"], } survey = True @@ -794,7 +795,7 @@ class RetrieveFindMyEpc: formatted_recommendations = [] for rec in recommendations: - mapped = measure_map[rec["measure"]] + mapped = measure_map.get(rec["measure"], []) for measure in mapped: if measure == "cavity_wall_insulation" and "solid brick" in self.walls[0].lower(): measure = "extension_cavity_wall_insulation" diff --git a/etl/webscrape/Zoopla.py b/etl/webscrape/Zoopla.py index 9d15e019..f458bd02 100644 --- a/etl/webscrape/Zoopla.py +++ b/etl/webscrape/Zoopla.py @@ -164,16 +164,16 @@ def parse_price(p): if __name__ == "__main__": asset_list = pd.read_excel( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " - "Project/modelling_sample.xlsx", - sheet_name="Standardised Asset List" + "/Users/khalimconn-kowlessar/Downloads/Book.xlsx", + sheet_name="PFH" ) - asset_list = asset_list[~pd.isnull(asset_list["epc_os_uprn"])] - asset_list = asset_list.drop_duplicates("epc_os_uprn") - asset_list["epc_os_uprn"] = asset_list["epc_os_uprn"].astype(int).astype(str) + asset_list = asset_list[~pd.isnull(asset_list["UPRN"])] + asset_list = asset_list.drop_duplicates("UPRN") + asset_list = asset_list[~asset_list["UPRN"].isin(["Check address"])] + asset_list["UPRN"] = asset_list["UPRN"].astype(int).astype(str) - uprns = asset_list["epc_os_uprn"].tolist() + uprns = asset_list["UPRN"].tolist() urls = [f"https://www.zoopla.co.uk/property/uprn/{uprn}/" for uprn in uprns] with Pool(processes=2) as pool: @@ -188,14 +188,14 @@ if __name__ == "__main__": df.to_csv("zoopla_estimates.csv", index=False) merged = asset_list.merge( - df[["uprn", "valuation"]], - left_on="epc_os_uprn", + df, + left_on="UPRN", right_on="uprn", how="left" ) merged.to_excel( - "20251029 AL Portfolio - Standardised - with valuations.xlsx", + "BH - with enhanced property data", index=False )