From 942c2923daf5a18d528ef47847bc2b2b2b8d512e Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 8 Apr 2026 10:55:48 +0000 Subject: [PATCH 01/32] initial setup --- .../hubspot_trigger_orchestrator/handler.py | 61 +++++++++++++++++++ .../hubspot_deal_differ.py | 21 +++++++ ...ot_trigger_orchestrator_trigger_request.py | 5 ++ etl/hubspot/hubspotDataTodB.py | 6 +- etl/hubspot/scripts/scraper/main.py | 10 +-- 5 files changed, 96 insertions(+), 7 deletions(-) create mode 100644 backend/hubspot_trigger_orchestrator/handler.py create mode 100644 backend/hubspot_trigger_orchestrator/hubspot_deal_differ.py create mode 100644 backend/hubspot_trigger_orchestrator/hubspot_trigger_orchestrator_trigger_request.py diff --git a/backend/hubspot_trigger_orchestrator/handler.py b/backend/hubspot_trigger_orchestrator/handler.py new file mode 100644 index 00000000..1f83ed80 --- /dev/null +++ b/backend/hubspot_trigger_orchestrator/handler.py @@ -0,0 +1,61 @@ +import json +from typing import Any, Dict, Mapping, Optional + +from backend.app.db.models.organisation import HubspotDealData +from backend.hubspot_trigger_orchestrator.hubspot_deal_differ import HubspotDealDiffer +from backend.hubspot_trigger_orchestrator.hubspot_trigger_orchestrator_trigger_request import ( + HubspotTriggerOrchestratorTriggerRequest, +) +from backend.utils.subtasks import task_handler +from etl.hubspot.hubspotClient import HubspotClient +from etl.hubspot.hubspotDataTodB import HubspotDataToDb +from utils.logger import setup_logger + +logger = setup_logger() + + +@task_handler() +def handler(event: Mapping[str, Any], context: Any) -> None: + + db_client = HubspotDataToDb() + hubspot_client = HubspotClient() + + for record in event.get("Records", []): + body_dict = json.loads(record["body"]) + + logger.debug("Validating request body") + payload = HubspotTriggerOrchestratorTriggerRequest.model_validate(body_dict) + logger.debug("Successfully validated request body") + + hubspot_deal_id: str = payload.hubspot_deal_id + + db_deal: Optional[HubspotDealData] = db_client.find_deal_with_deal_id( + hubspot_deal_id + ) + if not db_deal: + # new hubspot deal, no diffing to do + # TODO: trigger hubspot to db ETL + return + + hubspot_deal: Dict[str, str] + company: Optional[str] + listing: Optional[dict[str, str]] + + hubspot_deal, company, listing = hubspot_client.get_deal_info_for_db( + hubspot_deal_id + ) + + if HubspotDealDiffer.check_for_pashub_trigger( + new_deal=hubspot_deal, old_deal=db_deal + ): + # TODO: trigger pashub file fetcher + return + + if HubspotDealDiffer.check_for_db_update_trigger( + new_deal=hubspot_deal, + new_company=company, + new_listing=listing, + old_deal=db_deal, + ): + # TODO: trigger db upsert + return diff --git a/backend/hubspot_trigger_orchestrator/hubspot_deal_differ.py b/backend/hubspot_trigger_orchestrator/hubspot_deal_differ.py new file mode 100644 index 00000000..9d66c637 --- /dev/null +++ b/backend/hubspot_trigger_orchestrator/hubspot_deal_differ.py @@ -0,0 +1,21 @@ +from typing import Dict, Optional + +from backend.app.db.models.organisation import HubspotDealData + + +class HubspotDealDiffer: + + @staticmethod + def check_for_pashub_trigger( + new_deal: Dict[str, str], old_deal: HubspotDealData + ) -> bool: + raise NotImplementedError + + @staticmethod + def check_for_db_update_trigger( + new_deal: Dict[str, str], + new_company: Optional[str], + new_listing: Optional[Dict[str, str]], + old_deal: HubspotDealData, + ) -> bool: + raise NotImplementedError diff --git a/backend/hubspot_trigger_orchestrator/hubspot_trigger_orchestrator_trigger_request.py b/backend/hubspot_trigger_orchestrator/hubspot_trigger_orchestrator_trigger_request.py new file mode 100644 index 00000000..1adfa07c --- /dev/null +++ b/backend/hubspot_trigger_orchestrator/hubspot_trigger_orchestrator_trigger_request.py @@ -0,0 +1,5 @@ +from pydantic import BaseModel + + +class HubspotTriggerOrchestratorTriggerRequest(BaseModel): + hubspot_deal_id: str diff --git a/etl/hubspot/hubspotDataTodB.py b/etl/hubspot/hubspotDataTodB.py index 6325efc2..36167bf0 100644 --- a/etl/hubspot/hubspotDataTodB.py +++ b/etl/hubspot/hubspotDataTodB.py @@ -74,7 +74,7 @@ class HubspotDataToDb: .all() ) - def find_deal_with_deal_id(self, deal_id): + def find_deal_with_deal_id(self, deal_id: str) -> Optional[HubspotDealData]: with db_read_session() as session: return ( session.query(HubspotDealData) @@ -477,7 +477,9 @@ class HubspotDataToDb: dealname=deal_data.get("dealname"), dealstage=deal_data.get("dealstage"), listing_id=listing.get("listing_id", None) if listing else None, - landlord_property_id=listing.get("owner_property_id") if listing else None, + landlord_property_id=( + listing.get("owner_property_id") if listing else None + ), uprn=listing.get("national_uprn") if listing else None, outcome=deal_data.get("outcome"), outcome_notes=deal_data.get("outcome_notes"), diff --git a/etl/hubspot/scripts/scraper/main.py b/etl/hubspot/scripts/scraper/main.py index 4f71c6d0..a003ad28 100644 --- a/etl/hubspot/scripts/scraper/main.py +++ b/etl/hubspot/scripts/scraper/main.py @@ -16,9 +16,9 @@ def handler(body: dict[str, Any], context: Any) -> None: hubspot: HubspotClient = HubspotClient() dbloader: HubspotDataToDb = HubspotDataToDb() - deal = dbloader.find_deal_with_deal_id(hubspot_deal_id) - if deal: - dbloader.update_deal_with_checks(deal, hubspot) + db_deal = dbloader.find_deal_with_deal_id(hubspot_deal_id) + if db_deal: + dbloader.update_deal_with_checks(db_deal, hubspot) else: - deal, company, listing = hubspot.get_deal_info_for_db(hubspot_deal_id) - dbloader.upsert_deal(deal, company, listing, hubspot) + hubspot_deal, company, listing = hubspot.get_deal_info_for_db(hubspot_deal_id) + dbloader.upsert_deal(hubspot_deal, company, listing, hubspot) From d6bfef59aff595c9aa3baa9a2aaccd4581d69b4d Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 8 Apr 2026 14:48:17 +0000 Subject: [PATCH 02/32] remove db update from hubspot client get method --- .../hubspot_trigger_orchestrator/handler.py | 2 +- etl/hubspot/hubspotClient.py | 10 ++---- etl/hubspot/hubspotDataTodB.py | 23 ++++++++------ etl/hubspot/scripts/scraper/main.py | 31 ++++++++++++++----- .../tests/test_hubspot_client_integration.py | 2 +- 5 files changed, 41 insertions(+), 27 deletions(-) diff --git a/backend/hubspot_trigger_orchestrator/handler.py b/backend/hubspot_trigger_orchestrator/handler.py index 1f83ed80..c79fe2b9 100644 --- a/backend/hubspot_trigger_orchestrator/handler.py +++ b/backend/hubspot_trigger_orchestrator/handler.py @@ -41,7 +41,7 @@ def handler(event: Mapping[str, Any], context: Any) -> None: company: Optional[str] listing: Optional[dict[str, str]] - hubspot_deal, company, listing = hubspot_client.get_deal_info_for_db( + hubspot_deal, company, listing = hubspot_client.get_deal_company_listing( hubspot_deal_id ) diff --git a/etl/hubspot/hubspotClient.py b/etl/hubspot/hubspotClient.py index a9ea535d..777ad482 100644 --- a/etl/hubspot/hubspotClient.py +++ b/etl/hubspot/hubspotClient.py @@ -26,10 +26,10 @@ from hubspot.crm.associations.v4.models import ( # type: ignore[reportMissingTy ForwardPaging as AssociationsPaging, NextPage as AssociationsPagingNext, ) -from etl.hubspot.hubspotDataTodB import CompanyData, HubspotDataToDb from backend.app.config import get_settings +from etl.hubspot.company_data import CompanyData from utils.logger import setup_logger import mimetypes @@ -279,18 +279,12 @@ class HubspotClient: deal_info: dict[str, str] = cast(dict[str, str], deal.properties) # type: ignore[reportUnknownMemberType] return deal_info - def get_deal_info_for_db( + def get_deal_company_listing( self, deal_id: str ) -> tuple[dict[str, str], Optional[str], Optional[dict[str, str]]]: deal: dict[str, str] = self.from_deal_id_get_info(deal_id) company: Optional[str] = self.from_deal_id_get_associated_company_id(deal_id) - - if company: - company_data: CompanyData = self.get_company_information(company) - dbloader: HubspotDataToDb = HubspotDataToDb() - dbloader.upsert_company(company_data) - listing: Optional[dict[str, str]] = self.from_deal_id_get_associated_listing( deal_id ) diff --git a/etl/hubspot/hubspotDataTodB.py b/etl/hubspot/hubspotDataTodB.py index 36167bf0..49dd1685 100644 --- a/etl/hubspot/hubspotDataTodB.py +++ b/etl/hubspot/hubspotDataTodB.py @@ -2,17 +2,14 @@ from backend.app.db.connection import db_read_session from backend.app.db.models.organisation import Organisation, HubspotDealData from sqlmodel import select from datetime import datetime, timezone -from typing import TypedDict, Optional +from typing import Dict, Optional +from etl.hubspot.company_data import CompanyData +from etl.hubspot.hubspotClient import HubspotClient from etl.hubspot.s3_uploader import S3Uploader import hashlib import os -class CompanyData(TypedDict): - hs_object_id: str - name: str - - class HubspotDataToDb: def __init__(self): self.s3 = S3Uploader( @@ -98,7 +95,9 @@ class HubspotDataToDb: sha256.update(chunk) return sha256.hexdigest() - def update_deal_with_checks(self, deal_in_db, hubspot_client) -> bool: + def update_deal_with_checks( + self, deal_in_db: HubspotDealData, hubspot_client: HubspotClient + ) -> bool: """ Checks if a deal needs updating and syncs it with HubSpot. Also handles major_condition_issue_photos file upload to S3 with integrity check. @@ -112,7 +111,7 @@ class HubspotDataToDb: print(f"🔍 Checking if deal needs updating (deal_id={deal_in_db.deal_id})") - hs_deal, hs_company_id, hs_listing = hubspot_client.get_deal_info_for_db( + hs_deal, hs_company_id, hs_listing = hubspot_client.get_deal_company_listing( deal_in_db.deal_id ) @@ -346,7 +345,13 @@ class HubspotDataToDb: return True - def upsert_deal(self, deal_data, company, listing, hubspot_client): + def upsert_deal( + self, + deal_data: Dict[str, str], + company: Optional[str], + listing: Optional[dict[str, str]], + hubspot_client: HubspotClient, + ): """ Inserts or updates a deal record. Also uploads photos if present and adds S3 URL. diff --git a/etl/hubspot/scripts/scraper/main.py b/etl/hubspot/scripts/scraper/main.py index a003ad28..e5658a20 100644 --- a/etl/hubspot/scripts/scraper/main.py +++ b/etl/hubspot/scripts/scraper/main.py @@ -1,7 +1,8 @@ +from backend.app.db.models.organisation import HubspotDealData from etl.hubspot.hubspotClient import HubspotClient -from etl.hubspot.hubspotDataTodB import HubspotDataToDb +from etl.hubspot.hubspotDataTodB import CompanyData, HubspotDataToDb from backend.utils.subtasks import task_handler -from typing import Any +from typing import Any, Dict, Optional @task_handler() @@ -14,11 +15,25 @@ def handler(body: dict[str, Any], context: Any) -> None: ) hubspot_deal_id = "327170793707" - hubspot: HubspotClient = HubspotClient() - dbloader: HubspotDataToDb = HubspotDataToDb() - db_deal = dbloader.find_deal_with_deal_id(hubspot_deal_id) + hubspot_client = HubspotClient() + db_client = HubspotDataToDb() + db_deal: Optional[HubspotDealData] = db_client.find_deal_with_deal_id( + hubspot_deal_id + ) if db_deal: - dbloader.update_deal_with_checks(db_deal, hubspot) + db_client.update_deal_with_checks(db_deal, hubspot_client) else: - hubspot_deal, company, listing = hubspot.get_deal_info_for_db(hubspot_deal_id) - dbloader.upsert_deal(hubspot_deal, company, listing, hubspot) + hubspot_deal: Dict[str, str] + company: Optional[str] + listing: Optional[dict[str, str]] + + hubspot_deal, company, listing = hubspot_client.get_deal_company_listing( + hubspot_deal_id + ) + + if company: + company_data: CompanyData = hubspot_client.get_company_information(company) + db_client: HubspotDataToDb = HubspotDataToDb() + db_client.upsert_company(company_data) + + db_client.upsert_deal(hubspot_deal, company, listing, hubspot_client) diff --git a/etl/hubspot/tests/test_hubspot_client_integration.py b/etl/hubspot/tests/test_hubspot_client_integration.py index a3d8ae54..d0dd818a 100644 --- a/etl/hubspot/tests/test_hubspot_client_integration.py +++ b/etl/hubspot/tests/test_hubspot_client_integration.py @@ -71,7 +71,7 @@ class TestHubspotClientIntegration: def test_get_deal_info_for_db(self, client: HubspotClient): deal_id: str = "263490768079" - deal, company, listing = client.get_deal_info_for_db(deal_id) + deal, company, listing = client.get_deal_company_listing(deal_id) assert "dealname" in deal assert "dealstage" in deal From b968fbab448c39aaabd21251c406f5ca7c0a8f83 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 8 Apr 2026 14:48:29 +0000 Subject: [PATCH 03/32] include missing file --- etl/hubspot/company_data.py | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 etl/hubspot/company_data.py diff --git a/etl/hubspot/company_data.py b/etl/hubspot/company_data.py new file mode 100644 index 00000000..13b2ee88 --- /dev/null +++ b/etl/hubspot/company_data.py @@ -0,0 +1,6 @@ +from typing import TypedDict + + +class CompanyData(TypedDict): + hs_object_id: str + name: str From 540054e12f83514aa5baf2861235514e4450bae1 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 8 Apr 2026 14:53:11 +0000 Subject: [PATCH 04/32] rename method --- backend/hubspot_trigger_orchestrator/handler.py | 4 ++-- etl/hubspot/hubspotClient.py | 2 +- etl/hubspot/hubspotDataTodB.py | 4 ++-- etl/hubspot/scripts/scraper/main.py | 4 ++-- etl/hubspot/tests/test_hubspot_client_integration.py | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/backend/hubspot_trigger_orchestrator/handler.py b/backend/hubspot_trigger_orchestrator/handler.py index c79fe2b9..38724812 100644 --- a/backend/hubspot_trigger_orchestrator/handler.py +++ b/backend/hubspot_trigger_orchestrator/handler.py @@ -41,8 +41,8 @@ def handler(event: Mapping[str, Any], context: Any) -> None: company: Optional[str] listing: Optional[dict[str, str]] - hubspot_deal, company, listing = hubspot_client.get_deal_company_listing( - hubspot_deal_id + hubspot_deal, company, listing = ( + hubspot_client.get_deal_and_company_and_listing(hubspot_deal_id) ) if HubspotDealDiffer.check_for_pashub_trigger( diff --git a/etl/hubspot/hubspotClient.py b/etl/hubspot/hubspotClient.py index 777ad482..cedaa7f3 100644 --- a/etl/hubspot/hubspotClient.py +++ b/etl/hubspot/hubspotClient.py @@ -279,7 +279,7 @@ class HubspotClient: deal_info: dict[str, str] = cast(dict[str, str], deal.properties) # type: ignore[reportUnknownMemberType] return deal_info - def get_deal_company_listing( + def get_deal_and_company_and_listing( self, deal_id: str ) -> tuple[dict[str, str], Optional[str], Optional[dict[str, str]]]: diff --git a/etl/hubspot/hubspotDataTodB.py b/etl/hubspot/hubspotDataTodB.py index 49dd1685..e7008618 100644 --- a/etl/hubspot/hubspotDataTodB.py +++ b/etl/hubspot/hubspotDataTodB.py @@ -111,8 +111,8 @@ class HubspotDataToDb: print(f"🔍 Checking if deal needs updating (deal_id={deal_in_db.deal_id})") - hs_deal, hs_company_id, hs_listing = hubspot_client.get_deal_company_listing( - deal_in_db.deal_id + hs_deal, hs_company_id, hs_listing = ( + hubspot_client.get_deal_and_company_and_listing(deal_in_db.deal_id) ) # Soft compare key fields diff --git a/etl/hubspot/scripts/scraper/main.py b/etl/hubspot/scripts/scraper/main.py index e5658a20..d8d4a357 100644 --- a/etl/hubspot/scripts/scraper/main.py +++ b/etl/hubspot/scripts/scraper/main.py @@ -27,8 +27,8 @@ def handler(body: dict[str, Any], context: Any) -> None: company: Optional[str] listing: Optional[dict[str, str]] - hubspot_deal, company, listing = hubspot_client.get_deal_company_listing( - hubspot_deal_id + hubspot_deal, company, listing = ( + hubspot_client.get_deal_and_company_and_listing(hubspot_deal_id) ) if company: diff --git a/etl/hubspot/tests/test_hubspot_client_integration.py b/etl/hubspot/tests/test_hubspot_client_integration.py index d0dd818a..0f4b425c 100644 --- a/etl/hubspot/tests/test_hubspot_client_integration.py +++ b/etl/hubspot/tests/test_hubspot_client_integration.py @@ -71,7 +71,7 @@ class TestHubspotClientIntegration: def test_get_deal_info_for_db(self, client: HubspotClient): deal_id: str = "263490768079" - deal, company, listing = client.get_deal_company_listing(deal_id) + deal, company, listing = client.get_deal_and_company_and_listing(deal_id) assert "dealname" in deal assert "dealstage" in deal From 21ca0d7649bc2ff9be36dd236d80b3abed391894 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 8 Apr 2026 15:25:50 +0000 Subject: [PATCH 05/32] =?UTF-8?q?diff=20checker=20for=20pashub=20trigger?= =?UTF-8?q?=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pytest.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytest.ini b/pytest.ini index db7afaf5..792b27e0 100644 --- a/pytest.ini +++ b/pytest.ini @@ -3,6 +3,6 @@ pythonpath = . log_cli = true log_cli_level = INFO addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial -testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests backend/categorisation/tests backend/export/tests etl/hubspot/tests +testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests backend/categorisation/tests backend/export/tests etl/hubspot/tests backend/hubspot_trigger_orchestrator/tests markers = integration: mark a test as an integration test From 39f37f1668907db733ffa602e1a2b84c9e766fd0 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 8 Apr 2026 15:26:32 +0000 Subject: [PATCH 06/32] =?UTF-8?q?diff=20checker=20for=20pashub=20trigger?= =?UTF-8?q?=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../tests/test_hubspot_deal_differ.py | 424 ++++++++++++++++++ 1 file changed, 424 insertions(+) create mode 100644 backend/hubspot_trigger_orchestrator/tests/test_hubspot_deal_differ.py diff --git a/backend/hubspot_trigger_orchestrator/tests/test_hubspot_deal_differ.py b/backend/hubspot_trigger_orchestrator/tests/test_hubspot_deal_differ.py new file mode 100644 index 00000000..ddca766a --- /dev/null +++ b/backend/hubspot_trigger_orchestrator/tests/test_hubspot_deal_differ.py @@ -0,0 +1,424 @@ +from datetime import datetime +from typing import Dict +import uuid + +from backend.app.db.models.organisation import HubspotDealData +from backend.hubspot_trigger_orchestrator.hubspot_deal_differ import HubspotDealDiffer + + +def test_pashub_trigger__outcome_note_added__returns_false() -> None: + # arrange + deal_id = uuid.uuid4() + + old_deal = HubspotDealData( + id=deal_id, + deal_id="1", + created_at=datetime(2025, 12, 1, 12, 0, 0), + updated_at=datetime(2025, 12, 1, 12, 0, 0), + ) + new_deal: Dict[str, str] = { + "id": str(deal_id), + "deal_id": "1", + "outcome_notes": "test note", + "created_at": datetime(2025, 12, 1, 12, 0, 0).isoformat(), + "updated_at": datetime(2025, 12, 1, 12, 30, 0).isoformat(), + } + + expected_output = False + + # act + actual_output: bool = HubspotDealDiffer.check_for_pashub_trigger( + new_deal=new_deal, old_deal=old_deal + ) + + # assert + assert actual_output == expected_output + + +def test_pashub_trigger__pashub_link_changed__returns_true() -> None: + # arrange + deal_id = uuid.uuid4() + + old_deal = HubspotDealData( + id=deal_id, + deal_id="1", + pashub_link="www.google.co.uk", + created_at=datetime(2025, 12, 1, 12, 0, 0), + updated_at=datetime(2025, 12, 1, 12, 0, 0), + ) + new_deal: Dict[str, str] = { + "id": str(deal_id), + "deal_id": "1", + "pashub_link": "www.bbc.co.uk", + "created_at": datetime(2025, 12, 1, 12, 0, 0).isoformat(), + "updated_at": datetime(2025, 12, 1, 12, 30, 0).isoformat(), + } + + expected_output = True + + # act + actual_output: bool = HubspotDealDiffer.check_for_pashub_trigger( + new_deal=new_deal, old_deal=old_deal + ) + + # assert + assert actual_output == expected_output + + +def test_pashub_trigger__coordination_completed_and_pashub_link_set__returns_true() -> ( + None +): + # arrange + deal_id = uuid.uuid4() + + old_deal = HubspotDealData( + id=deal_id, + deal_id="1", + pashub_link="www.google.co.uk", + coordination_status="random", + created_at=datetime(2025, 12, 1, 12, 0, 0), + updated_at=datetime(2025, 12, 1, 12, 0, 0), + ) + new_deal: Dict[str, str] = { + "id": str(deal_id), + "deal_id": "1", + "coordination_status": "v1 ioe/mtp complete", + "pashub_link": "www.google.co.uk", + "created_at": datetime(2025, 12, 1, 12, 0, 0).isoformat(), + "updated_at": datetime(2025, 12, 1, 12, 30, 0).isoformat(), + } + + expected_output = True + + # act + actual_output: bool = HubspotDealDiffer.check_for_pashub_trigger( + new_deal=new_deal, old_deal=old_deal + ) + + # assert + assert actual_output == expected_output + + +def test_pashub_trigger__coordination_completed_and_pashub_link_set__returns_true_2() -> ( + None +): + # arrange + deal_id = uuid.uuid4() + + old_deal = HubspotDealData( + id=deal_id, + deal_id="1", + pashub_link="www.google.co.uk", + coordination_status="random", + created_at=datetime(2025, 12, 1, 12, 0, 0), + updated_at=datetime(2025, 12, 1, 12, 0, 0), + ) + new_deal: Dict[str, str] = { + "id": str(deal_id), + "deal_id": "1", + "coordination_status": "v2 ioe/mtp complete", + "pashub_link": "www.google.co.uk", + "created_at": datetime(2025, 12, 1, 12, 0, 0).isoformat(), + "updated_at": datetime(2025, 12, 1, 12, 30, 0).isoformat(), + } + + expected_output = True + + # act + actual_output: bool = HubspotDealDiffer.check_for_pashub_trigger( + new_deal=new_deal, old_deal=old_deal + ) + + # assert + assert actual_output == expected_output + + +def test_pashub_trigger__coordination_completed_and_pashub_link_not_set__returns_false() -> ( + None +): + # arrange + deal_id = uuid.uuid4() + + old_deal = HubspotDealData( + id=deal_id, + deal_id="1", + coordination_status="random", + created_at=datetime(2025, 12, 1, 12, 0, 0), + updated_at=datetime(2025, 12, 1, 12, 0, 0), + ) + new_deal: Dict[str, str] = { + "id": str(deal_id), + "deal_id": "1", + "coordination_status": "v2 ioe/mtp complete", + "created_at": datetime(2025, 12, 1, 12, 0, 0).isoformat(), + "updated_at": datetime(2025, 12, 1, 12, 30, 0).isoformat(), + } + + expected_output = False + + # act + actual_output: bool = HubspotDealDiffer.check_for_pashub_trigger( + new_deal=new_deal, old_deal=old_deal + ) + + # assert + assert actual_output == expected_output + + +def test_pashub_trigger__coordination_status_not_completed_and_pashub_link_set__returns_false() -> ( + None +): + # arrange + deal_id = uuid.uuid4() + + old_deal = HubspotDealData( + id=deal_id, + deal_id="1", + pashub_link="www.google.co.uk", + coordination_status="random", + created_at=datetime(2025, 12, 1, 12, 0, 0), + updated_at=datetime(2025, 12, 1, 12, 0, 0), + ) + new_deal: Dict[str, str] = { + "id": str(deal_id), + "deal_id": "1", + "coordination_status": "not complete", + "pashub_link": "www.google.co.uk", + "created_at": datetime(2025, 12, 1, 12, 0, 0).isoformat(), + "updated_at": datetime(2025, 12, 1, 12, 30, 0).isoformat(), + } + + expected_output = False + + # act + actual_output: bool = HubspotDealDiffer.check_for_pashub_trigger( + new_deal=new_deal, old_deal=old_deal + ) + + # assert + assert actual_output == expected_output + + +def test_pashub_trigger__design_completed_and_pashub_link_set__returns_true() -> None: + # arrange + deal_id = uuid.uuid4() + + old_deal = HubspotDealData( + id=deal_id, + deal_id="1", + pashub_link="www.google.co.uk", + created_at=datetime(2025, 12, 1, 12, 0, 0), + updated_at=datetime(2025, 12, 1, 12, 0, 0), + ) + new_deal: Dict[str, str] = { + "id": str(deal_id), + "deal_id": "1", + "pashub_link": "www.google.co.uk", + "design_status": "uploaded", + "created_at": datetime(2025, 12, 1, 12, 0, 0).isoformat(), + "updated_at": datetime(2025, 12, 1, 12, 30, 0).isoformat(), + } + + expected_output = True + + # act + actual_output: bool = HubspotDealDiffer.check_for_pashub_trigger( + new_deal=new_deal, old_deal=old_deal + ) + + # assert + assert actual_output == expected_output + + +def test_pashub_trigger__design_completed_and_pashub_link_not_set__returns_false() -> ( + None +): + # arrange + deal_id = uuid.uuid4() + + old_deal = HubspotDealData( + id=deal_id, + deal_id="1", + created_at=datetime(2025, 12, 1, 12, 0, 0), + updated_at=datetime(2025, 12, 1, 12, 0, 0), + ) + new_deal: Dict[str, str] = { + "id": str(deal_id), + "deal_id": "1", + "design_status": "uploaded", + "created_at": datetime(2025, 12, 1, 12, 0, 0).isoformat(), + "updated_at": datetime(2025, 12, 1, 12, 30, 0).isoformat(), + } + + expected_output = False + + # act + actual_output: bool = HubspotDealDiffer.check_for_pashub_trigger( + new_deal=new_deal, old_deal=old_deal + ) + + # assert + assert actual_output == expected_output + + +def test_pashub_trigger__design_not_completed_and_pashub_link_set__returns_false() -> ( + None +): + # arrange + deal_id = uuid.uuid4() + + old_deal = HubspotDealData( + id=deal_id, + deal_id="1", + pashub_link="www.google.co.uk", + created_at=datetime(2025, 12, 1, 12, 0, 0), + updated_at=datetime(2025, 12, 1, 12, 0, 0), + ) + new_deal: Dict[str, str] = { + "id": str(deal_id), + "deal_id": "1", + "pashub_link": "www.google.co.uk", + "design_status": "not uploaded", + "created_at": datetime(2025, 12, 1, 12, 0, 0).isoformat(), + "updated_at": datetime(2025, 12, 1, 12, 30, 0).isoformat(), + } + + expected_output = False + + # act + actual_output: bool = HubspotDealDiffer.check_for_pashub_trigger( + new_deal=new_deal, old_deal=old_deal + ) + + # assert + assert actual_output == expected_output + + +def test_pashub_trigger__lodgement_completed_and_pashub_link_set__returns_true() -> ( + None +): + # arrange + deal_id = uuid.uuid4() + + old_deal = HubspotDealData( + id=deal_id, + deal_id="1", + pashub_link="www.google.co.uk", + created_at=datetime(2025, 12, 1, 12, 0, 0), + updated_at=datetime(2025, 12, 1, 12, 0, 0), + ) + new_deal: Dict[str, str] = { + "id": str(deal_id), + "deal_id": "1", + "pashub_link": "www.google.co.uk", + "lodgement_status": "lodgement complete", + "created_at": datetime(2025, 12, 1, 12, 0, 0).isoformat(), + "updated_at": datetime(2025, 12, 1, 12, 30, 0).isoformat(), + } + + expected_output = True + + # act + actual_output: bool = HubspotDealDiffer.check_for_pashub_trigger( + new_deal=new_deal, old_deal=old_deal + ) + + # assert + assert actual_output == expected_output + + +def test_pashub_trigger__lodgement_completed_and_pashub_link_set__returns_true_2() -> ( + None +): + # arrange + deal_id = uuid.uuid4() + + old_deal = HubspotDealData( + id=deal_id, + deal_id="1", + pashub_link="www.google.co.uk", + created_at=datetime(2025, 12, 1, 12, 0, 0), + updated_at=datetime(2025, 12, 1, 12, 0, 0), + ) + new_deal: Dict[str, str] = { + "id": str(deal_id), + "deal_id": "1", + "pashub_link": "www.google.co.uk", + "lodgement_status": "measures lodged", + "created_at": datetime(2025, 12, 1, 12, 0, 0).isoformat(), + "updated_at": datetime(2025, 12, 1, 12, 30, 0).isoformat(), + } + + expected_output = True + + # act + actual_output: bool = HubspotDealDiffer.check_for_pashub_trigger( + new_deal=new_deal, old_deal=old_deal + ) + + # assert + assert actual_output == expected_output + + +def test_pashub_trigger__lodgement_completed_and_pashub_link_not_set__returns_false() -> ( + None +): + # arrange + deal_id = uuid.uuid4() + + old_deal = HubspotDealData( + id=deal_id, + deal_id="1", + created_at=datetime(2025, 12, 1, 12, 0, 0), + updated_at=datetime(2025, 12, 1, 12, 0, 0), + ) + new_deal: Dict[str, str] = { + "id": str(deal_id), + "deal_id": "1", + "design_status": "lodgement complete", + "created_at": datetime(2025, 12, 1, 12, 0, 0).isoformat(), + "updated_at": datetime(2025, 12, 1, 12, 30, 0).isoformat(), + } + + expected_output = False + + # act + actual_output: bool = HubspotDealDiffer.check_for_pashub_trigger( + new_deal=new_deal, old_deal=old_deal + ) + + # assert + assert actual_output == expected_output + + +def test_pashub_trigger__lodgement_not_completed_and_pashub_link_set__returns_false() -> ( + None +): + # arrange + deal_id = uuid.uuid4() + + old_deal = HubspotDealData( + id=deal_id, + deal_id="1", + pashub_link="www.google.co.uk", + created_at=datetime(2025, 12, 1, 12, 0, 0), + updated_at=datetime(2025, 12, 1, 12, 0, 0), + ) + new_deal: Dict[str, str] = { + "id": str(deal_id), + "deal_id": "1", + "pashub_link": "www.google.co.uk", + "lodgement_status": "lodgement not complete", + "created_at": datetime(2025, 12, 1, 12, 0, 0).isoformat(), + "updated_at": datetime(2025, 12, 1, 12, 30, 0).isoformat(), + } + + expected_output = False + + # act + actual_output: bool = HubspotDealDiffer.check_for_pashub_trigger( + new_deal=new_deal, old_deal=old_deal + ) + + # assert + assert actual_output == expected_output From 9f7448ac438cbfd6ece4f91d556fa58f2798abce Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 8 Apr 2026 15:35:42 +0000 Subject: [PATCH 07/32] =?UTF-8?q?pashub=20trigger=20true=20if=20pashub=20l?= =?UTF-8?q?ink=20is=20changed=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../hubspot_deal_differ.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/backend/hubspot_trigger_orchestrator/hubspot_deal_differ.py b/backend/hubspot_trigger_orchestrator/hubspot_deal_differ.py index 9d66c637..50f3af04 100644 --- a/backend/hubspot_trigger_orchestrator/hubspot_deal_differ.py +++ b/backend/hubspot_trigger_orchestrator/hubspot_deal_differ.py @@ -9,6 +9,23 @@ class HubspotDealDiffer: def check_for_pashub_trigger( new_deal: Dict[str, str], old_deal: HubspotDealData ) -> bool: + """ + Case 1: PasHub Link is updated + Case 2: Coordination is completed (and PasHub Link is populated) + Case 3: Design is completed (and PasHub Link is populated) + Case 4: Lodgement is completed (and PasHub Link is populated) + """ + new_pashub_link: Optional[str] = new_deal["pashub_link"] + # Case 1 + if not new_pashub_link: + return False + + if not old_deal.pashub_link: + return True + + if old_deal.pashub_link != new_pashub_link: + return True + raise NotImplementedError @staticmethod From ad2c979b155840f36a14b239aa1cccaa20e361ca Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 8 Apr 2026 15:35:52 +0000 Subject: [PATCH 08/32] =?UTF-8?q?pashub=20trigger=20false=20if=20pashub=20?= =?UTF-8?q?link=20not=20set=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/hubspot_trigger_orchestrator/hubspot_deal_differ.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/hubspot_trigger_orchestrator/hubspot_deal_differ.py b/backend/hubspot_trigger_orchestrator/hubspot_deal_differ.py index 50f3af04..ab2b667e 100644 --- a/backend/hubspot_trigger_orchestrator/hubspot_deal_differ.py +++ b/backend/hubspot_trigger_orchestrator/hubspot_deal_differ.py @@ -15,7 +15,7 @@ class HubspotDealDiffer: Case 3: Design is completed (and PasHub Link is populated) Case 4: Lodgement is completed (and PasHub Link is populated) """ - new_pashub_link: Optional[str] = new_deal["pashub_link"] + new_pashub_link: Optional[str] = new_deal.get("pashub_link", "") # Case 1 if not new_pashub_link: return False From 832bcd96e457a71453e1d4d2aa97a73dcba1b243 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 8 Apr 2026 15:42:29 +0000 Subject: [PATCH 09/32] =?UTF-8?q?pashub=20trigger=20true=20if=20coordinati?= =?UTF-8?q?on=20complete=20and=20pashub=20link=20set=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../hubspot_deal_differ.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/backend/hubspot_trigger_orchestrator/hubspot_deal_differ.py b/backend/hubspot_trigger_orchestrator/hubspot_deal_differ.py index ab2b667e..77208432 100644 --- a/backend/hubspot_trigger_orchestrator/hubspot_deal_differ.py +++ b/backend/hubspot_trigger_orchestrator/hubspot_deal_differ.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional +from typing import Dict, List, Optional from backend.app.db.models.organisation import HubspotDealData @@ -15,7 +15,11 @@ class HubspotDealDiffer: Case 3: Design is completed (and PasHub Link is populated) Case 4: Lodgement is completed (and PasHub Link is populated) """ - new_pashub_link: Optional[str] = new_deal.get("pashub_link", "") + new_pashub_link: str = new_deal.get("pashub_link", "") + COORDINATION_COMPLETE: List[str] = [ + "v1 ioe/mtp complete", + "v2 ioe/mtp complete", + ] # Case 1 if not new_pashub_link: return False @@ -26,6 +30,16 @@ class HubspotDealDiffer: if old_deal.pashub_link != new_pashub_link: return True + # Case 2 + new_coordination_status: str = new_deal.get("coordination_status", "") + + if ( + new_coordination_status + and new_coordination_status in COORDINATION_COMPLETE + and new_coordination_status != old_deal.coordination_status + ): + return True + raise NotImplementedError @staticmethod From 0dfd3f5238e969d4d233135c857f2084461b84c5 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 8 Apr 2026 15:45:41 +0000 Subject: [PATCH 10/32] =?UTF-8?q?pashub=20trigger=20true=20if=20design=20c?= =?UTF-8?q?omplete=20and=20pashub=20link=20set=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../hubspot_deal_differ.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/backend/hubspot_trigger_orchestrator/hubspot_deal_differ.py b/backend/hubspot_trigger_orchestrator/hubspot_deal_differ.py index 77208432..ad20aca7 100644 --- a/backend/hubspot_trigger_orchestrator/hubspot_deal_differ.py +++ b/backend/hubspot_trigger_orchestrator/hubspot_deal_differ.py @@ -15,11 +15,14 @@ class HubspotDealDiffer: Case 3: Design is completed (and PasHub Link is populated) Case 4: Lodgement is completed (and PasHub Link is populated) """ - new_pashub_link: str = new_deal.get("pashub_link", "") COORDINATION_COMPLETE: List[str] = [ "v1 ioe/mtp complete", "v2 ioe/mtp complete", ] + RETROFIT_DESIGN_COMPLETE = "uploaded" + + new_pashub_link: str = new_deal.get("pashub_link", "") + # Case 1 if not new_pashub_link: return False @@ -40,6 +43,16 @@ class HubspotDealDiffer: ): return True + # Case 3 + new_design_status: str = new_deal.get("design_status", "") + + if ( + new_design_status + and new_design_status == RETROFIT_DESIGN_COMPLETE + and new_design_status != old_deal.design_status + ): + return True + raise NotImplementedError @staticmethod From 9da0cabb0ffcbb7338d5dd0c9796234202acbb0a Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 8 Apr 2026 15:52:41 +0000 Subject: [PATCH 11/32] =?UTF-8?q?pashub=20trigger=20true=20if=20lodgement?= =?UTF-8?q?=20complete=20and=20pashub=20link=20set=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../hubspot_deal_differ.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/backend/hubspot_trigger_orchestrator/hubspot_deal_differ.py b/backend/hubspot_trigger_orchestrator/hubspot_deal_differ.py index ad20aca7..8f96ce73 100644 --- a/backend/hubspot_trigger_orchestrator/hubspot_deal_differ.py +++ b/backend/hubspot_trigger_orchestrator/hubspot_deal_differ.py @@ -20,6 +20,7 @@ class HubspotDealDiffer: "v2 ioe/mtp complete", ] RETROFIT_DESIGN_COMPLETE = "uploaded" + LODGEMENT_COMPLETE: List[str] = ["lodgement complete", "measures lodged"] new_pashub_link: str = new_deal.get("pashub_link", "") @@ -53,7 +54,17 @@ class HubspotDealDiffer: ): return True - raise NotImplementedError + # Case 4 + new_lodgement_status: str = new_deal.get("lodgement_status", "") + + if ( + new_lodgement_status + and new_lodgement_status in LODGEMENT_COMPLETE + and new_lodgement_status != old_deal.lodgement_status + ): + return True + + return False @staticmethod def check_for_db_update_trigger( From 2d0bc67731239d045c6f7106ba87e6a7b640b2ff Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 8 Apr 2026 15:59:29 +0000 Subject: [PATCH 12/32] =?UTF-8?q?diff=20checker=20for=20pashub=20trigger?= =?UTF-8?q?=20=F0=9F=9F=AA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../hubspot_deal_differ.py | 133 ++++++++++-------- 1 file changed, 72 insertions(+), 61 deletions(-) diff --git a/backend/hubspot_trigger_orchestrator/hubspot_deal_differ.py b/backend/hubspot_trigger_orchestrator/hubspot_deal_differ.py index 8f96ce73..1dd4ed51 100644 --- a/backend/hubspot_trigger_orchestrator/hubspot_deal_differ.py +++ b/backend/hubspot_trigger_orchestrator/hubspot_deal_differ.py @@ -4,67 +4,12 @@ from backend.app.db.models.organisation import HubspotDealData class HubspotDealDiffer: - - @staticmethod - def check_for_pashub_trigger( - new_deal: Dict[str, str], old_deal: HubspotDealData - ) -> bool: - """ - Case 1: PasHub Link is updated - Case 2: Coordination is completed (and PasHub Link is populated) - Case 3: Design is completed (and PasHub Link is populated) - Case 4: Lodgement is completed (and PasHub Link is populated) - """ - COORDINATION_COMPLETE: List[str] = [ - "v1 ioe/mtp complete", - "v2 ioe/mtp complete", - ] - RETROFIT_DESIGN_COMPLETE = "uploaded" - LODGEMENT_COMPLETE: List[str] = ["lodgement complete", "measures lodged"] - - new_pashub_link: str = new_deal.get("pashub_link", "") - - # Case 1 - if not new_pashub_link: - return False - - if not old_deal.pashub_link: - return True - - if old_deal.pashub_link != new_pashub_link: - return True - - # Case 2 - new_coordination_status: str = new_deal.get("coordination_status", "") - - if ( - new_coordination_status - and new_coordination_status in COORDINATION_COMPLETE - and new_coordination_status != old_deal.coordination_status - ): - return True - - # Case 3 - new_design_status: str = new_deal.get("design_status", "") - - if ( - new_design_status - and new_design_status == RETROFIT_DESIGN_COMPLETE - and new_design_status != old_deal.design_status - ): - return True - - # Case 4 - new_lodgement_status: str = new_deal.get("lodgement_status", "") - - if ( - new_lodgement_status - and new_lodgement_status in LODGEMENT_COMPLETE - and new_lodgement_status != old_deal.lodgement_status - ): - return True - - return False + COORDINATION_COMPLETE: List[str] = [ + "v1 ioe/mtp complete", + "v2 ioe/mtp complete", + ] + RETROFIT_DESIGN_COMPLETE = "uploaded" + LODGEMENT_COMPLETE: List[str] = ["lodgement complete", "measures lodged"] @staticmethod def check_for_db_update_trigger( @@ -74,3 +19,69 @@ class HubspotDealDiffer: old_deal: HubspotDealData, ) -> bool: raise NotImplementedError + + @staticmethod + def check_for_pashub_trigger( + new_deal: Dict[str, str], old_deal: HubspotDealData + ) -> bool: + new_pashub_link: str = new_deal.get("pashub_link", "") + + if not HubspotDealDiffer._has_valid_pashub_link(new_pashub_link): + return False + + if HubspotDealDiffer._new_or_updated_pashub_link(new_pashub_link, old_deal): + return True + + if HubspotDealDiffer._coordination_completed(new_deal, old_deal): + return True + + if HubspotDealDiffer._design_completed(new_deal, old_deal): + return True + + if HubspotDealDiffer._lodgement_completed(new_deal, old_deal): + return True + + return False + + @staticmethod + def _has_valid_pashub_link(new_pashub_link: str) -> bool: + return bool(new_pashub_link) + + @staticmethod + def _new_or_updated_pashub_link( + new_pashub_link: str, old_deal: HubspotDealData + ) -> bool: + if not old_deal.pashub_link: + return True + return old_deal.pashub_link != new_pashub_link + + @staticmethod + def _coordination_completed( + new_deal: Dict[str, str], old_deal: HubspotDealData + ) -> bool: + new_status: str = new_deal.get("coordination_status", "") + return ( + new_status != "" + and new_status in HubspotDealDiffer.COORDINATION_COMPLETE + and new_status != old_deal.coordination_status + ) + + @staticmethod + def _design_completed(new_deal: Dict[str, str], old_deal: HubspotDealData) -> bool: + new_status: str = new_deal.get("design_status", "") + return ( + new_status != "" + and new_status == HubspotDealDiffer.RETROFIT_DESIGN_COMPLETE + and new_status != old_deal.design_status + ) + + @staticmethod + def _lodgement_completed( + new_deal: Dict[str, str], old_deal: HubspotDealData + ) -> bool: + new_status: str = new_deal.get("lodgement_status", "") + return ( + new_status != "" + and new_status in HubspotDealDiffer.LODGEMENT_COMPLETE + and new_status != old_deal.lodgement_status + ) From f719149c03fe1827499fbc66df1facc0ab325676 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 8 Apr 2026 16:04:30 +0000 Subject: [PATCH 13/32] =?UTF-8?q?replace=20incorrect=20tests=20?= =?UTF-8?q?=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../tests/test_hubspot_deal_differ.py | 73 +------------------ 1 file changed, 4 insertions(+), 69 deletions(-) diff --git a/backend/hubspot_trigger_orchestrator/tests/test_hubspot_deal_differ.py b/backend/hubspot_trigger_orchestrator/tests/test_hubspot_deal_differ.py index ddca766a..ba6b80e4 100644 --- a/backend/hubspot_trigger_orchestrator/tests/test_hubspot_deal_differ.py +++ b/backend/hubspot_trigger_orchestrator/tests/test_hubspot_deal_differ.py @@ -165,40 +165,6 @@ def test_pashub_trigger__coordination_completed_and_pashub_link_not_set__returns assert actual_output == expected_output -def test_pashub_trigger__coordination_status_not_completed_and_pashub_link_set__returns_false() -> ( - None -): - # arrange - deal_id = uuid.uuid4() - - old_deal = HubspotDealData( - id=deal_id, - deal_id="1", - pashub_link="www.google.co.uk", - coordination_status="random", - created_at=datetime(2025, 12, 1, 12, 0, 0), - updated_at=datetime(2025, 12, 1, 12, 0, 0), - ) - new_deal: Dict[str, str] = { - "id": str(deal_id), - "deal_id": "1", - "coordination_status": "not complete", - "pashub_link": "www.google.co.uk", - "created_at": datetime(2025, 12, 1, 12, 0, 0).isoformat(), - "updated_at": datetime(2025, 12, 1, 12, 30, 0).isoformat(), - } - - expected_output = False - - # act - actual_output: bool = HubspotDealDiffer.check_for_pashub_trigger( - new_deal=new_deal, old_deal=old_deal - ) - - # assert - assert actual_output == expected_output - - def test_pashub_trigger__design_completed_and_pashub_link_set__returns_true() -> None: # arrange deal_id = uuid.uuid4() @@ -261,39 +227,6 @@ def test_pashub_trigger__design_completed_and_pashub_link_not_set__returns_false assert actual_output == expected_output -def test_pashub_trigger__design_not_completed_and_pashub_link_set__returns_false() -> ( - None -): - # arrange - deal_id = uuid.uuid4() - - old_deal = HubspotDealData( - id=deal_id, - deal_id="1", - pashub_link="www.google.co.uk", - created_at=datetime(2025, 12, 1, 12, 0, 0), - updated_at=datetime(2025, 12, 1, 12, 0, 0), - ) - new_deal: Dict[str, str] = { - "id": str(deal_id), - "deal_id": "1", - "pashub_link": "www.google.co.uk", - "design_status": "not uploaded", - "created_at": datetime(2025, 12, 1, 12, 0, 0).isoformat(), - "updated_at": datetime(2025, 12, 1, 12, 30, 0).isoformat(), - } - - expected_output = False - - # act - actual_output: bool = HubspotDealDiffer.check_for_pashub_trigger( - new_deal=new_deal, old_deal=old_deal - ) - - # assert - assert actual_output == expected_output - - def test_pashub_trigger__lodgement_completed_and_pashub_link_set__returns_true() -> ( None ): @@ -391,7 +324,7 @@ def test_pashub_trigger__lodgement_completed_and_pashub_link_not_set__returns_fa assert actual_output == expected_output -def test_pashub_trigger__lodgement_not_completed_and_pashub_link_set__returns_false() -> ( +def test_pashub_trigger__coordination_design_lodgement_not_completed_and_pashub_link_set__returns_false() -> ( None ): # arrange @@ -408,7 +341,9 @@ def test_pashub_trigger__lodgement_not_completed_and_pashub_link_set__returns_fa "id": str(deal_id), "deal_id": "1", "pashub_link": "www.google.co.uk", - "lodgement_status": "lodgement not complete", + "coordination_status": "not uploaded", + "design_status": "not uploaded", + "lodgement_status": "not uploaded", "created_at": datetime(2025, 12, 1, 12, 0, 0).isoformat(), "updated_at": datetime(2025, 12, 1, 12, 30, 0).isoformat(), } From dd0522713e85456b0b93fbb7a1d114795b1cb56d Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 9 Apr 2026 08:30:28 +0000 Subject: [PATCH 14/32] refactor upsert_deal by introducing helper methods --- etl/hubspot/hubspotDataTodB.py | 383 +++++++++++++++++---------------- 1 file changed, 198 insertions(+), 185 deletions(-) diff --git a/etl/hubspot/hubspotDataTodB.py b/etl/hubspot/hubspotDataTodB.py index e7008618..f0beeee8 100644 --- a/etl/hubspot/hubspotDataTodB.py +++ b/etl/hubspot/hubspotDataTodB.py @@ -366,109 +366,9 @@ class HubspotDataToDb: if existing: print(f"🔄 Updating existing deal (deal_id={deal_id})") + self._update_existing_deal(existing, deal_data, listing, company) - for attr, value in { - "dealname": deal_data.get("dealname"), - "dealstage": deal_data.get("dealstage"), - "listing_id": listing.get("listing_id", None) if listing else None, - "landlord_property_id": ( - listing.get("owner_property_id", None) if listing else None - ), - "uprn": listing.get("national_uprn", None) if listing else None, - "outcome": deal_data.get("outcome"), - "outcome_notes": deal_data.get("outcome_notes"), - "project_code": deal_data.get("project_code"), - "company_id": company, - "major_condition_issue_description": deal_data.get( - "major_condition_issue_description" - ), - "major_condition_issue_photos": deal_data.get( - "major_condition_issue_photos" - ), - "coordination_status": deal_data.get( - "coordination_status__stage_1_" - ), - "design_status": deal_data.get("retrofit_design_status"), - "pashub_link": deal_data.get("pashub_link"), - "sharepoint_link": deal_data.get("sharepoint_link"), - "dampmould_growth": deal_data.get("dampmould_growth"), - "damp_mould_and_repairs_comments": deal_data.get( - "damp_mould_and_repairs_comments" - ), - "pre_sap": deal_data.get("pre_sap"), - "coordinator": deal_data.get("coordinator"), - "mtp_completion_date": self._parse_hs_date( - deal_data.get("mtp_completion_date") - ), - "mtp_re_model_completion_date": self._parse_hs_date( - deal_data.get("mtp_re_model_completion_date") - ), - "ioe_v3_completion_date": self._parse_hs_date( - deal_data.get("ioe_v3_completion_date") - ), - "proposed_measures": deal_data.get("proposed_measures"), - "approved_package": deal_data.get("approved_package"), - "designer": deal_data.get("designer"), - "design_completion_date": self._parse_hs_date( - deal_data.get("design_completion_date") - ), - "actual_measures_installed": deal_data.get( - "actual_measures_installed" - ), - "installer": deal_data.get("installer"), - "installer_handover": deal_data.get("installer_handover"), - "lodgement_status": deal_data.get("lodgement_status"), - "measures_lodgement_date": self._parse_hs_date( - deal_data.get("measures_lodgement_date") - ), - "lodgement_date": self._parse_hs_date( - deal_data.get("lodgement_date") - ), - "expected_commencement_date": self._parse_hs_date( - deal_data.get("expected_commencement_date") - ), - "surveyor": deal_data.get("surveyor"), - "confirmed_survey_date": self._parse_hs_date( - deal_data.get("confirmed_survey_date") - ), - "confirmed_survey_time": deal_data.get("confirmed_survey_time"), - "surveyed_date": self._parse_hs_date( - deal_data.get("surveyed_date") - ), - "design_type": deal_data.get("design_type"), - }.items(): - setattr(existing, attr, value or getattr(existing, attr)) - - # Upload if photo exists but S3 link missing - if ( - existing.major_condition_issue_photos - and not existing.major_condition_issue_evidence_s3_url - ): - # Fetch fresh URL from HubSpot instead of using potentially expired stored URL - fresh_deal = hubspot_client.from_deal_id_get_info(existing.deal_id) - photo_url = fresh_deal.get("major_condition_issue_photos") - - if photo_url: - try: - local_file = hubspot_client.download_file_from_url( - photo_url - ) - s3_url = self.s3.upload_file( - local_file, - "retrofit-data-dev", - prefix="hubspot/awaabs_law_evidence/", - ) - existing.major_condition_issue_evidence_s3_url = s3_url - except Exception as e: - print( - f"⚠️ Failed to download photo for deal_id {existing.deal_id}: {e}" - ) - # Continue without the file — don't crash the update - finally: - if "local_file" in locals() and os.path.exists(local_file): - os.remove(local_file) - else: - print(f"⚠️ Photo URL missing for deal_id {existing.deal_id}") + self._handle_existing_photo_upload(existing, hubspot_client) session.add(existing) session.commit() @@ -477,94 +377,207 @@ class HubspotDataToDb: else: print(f"🆕 Inserting new deal (deal_id={deal_id})") - new_record = HubspotDealData( - deal_id=deal_id, - dealname=deal_data.get("dealname"), - dealstage=deal_data.get("dealstage"), - listing_id=listing.get("listing_id", None) if listing else None, - landlord_property_id=( - listing.get("owner_property_id") if listing else None - ), - uprn=listing.get("national_uprn") if listing else None, - outcome=deal_data.get("outcome"), - outcome_notes=deal_data.get("outcome_notes"), - project_code=deal_data.get("project_code"), - company_id=company, - major_condition_issue_description=deal_data.get( - "major_condition_issue_description" - ), - major_condition_issue_photos=deal_data.get( - "major_condition_issue_photos" - ), - coordination_status=deal_data.get("coordination_status__stage_1_"), - design_status=deal_data.get("retrofit_design_status"), - pashub_link=deal_data.get("pashub_link"), - sharepoint_link=deal_data.get("sharepoint_link"), - dampmould_growth=deal_data.get("dampmould_growth"), - damp_mould_and_repairs_comments=deal_data.get( - "damp_mould_and_repairs_comments" - ), - pre_sap=deal_data.get("pre_sap"), - coordinator=deal_data.get("coordinator"), - mtp_completion_date=self._parse_hs_date( - deal_data.get("mtp_completion_date") - ), - mtp_re_model_completion_date=self._parse_hs_date( - deal_data.get("mtp_re_model_completion_date") - ), - ioe_v3_completion_date=self._parse_hs_date( - deal_data.get("ioe_v3_completion_date") - ), - proposed_measures=deal_data.get("proposed_measures"), - approved_package=deal_data.get("approved_package"), - designer=deal_data.get("designer"), - design_completion_date=self._parse_hs_date( - deal_data.get("design_completion_date") - ), - actual_measures_installed=deal_data.get( - "actual_measures_installed" - ), - installer=deal_data.get("installer"), - installer_handover=deal_data.get("installer_handover"), - lodgement_status=deal_data.get("lodgement_status"), - measures_lodgement_date=self._parse_hs_date( - deal_data.get("measures_lodgement_date") - ), - lodgement_date=self._parse_hs_date(deal_data.get("lodgement_date")), - expected_commencement_date=self._parse_hs_date( - deal_data.get("expected_commencement_date") - ), - surveyor=deal_data.get("surveyor"), - confirmed_survey_date=self._parse_hs_date( - deal_data.get("confirmed_survey_date") - ), - confirmed_survey_time=deal_data.get("confirmed_survey_time"), - surveyed_date=self._parse_hs_date(deal_data.get("surveyed_date")), - design_type=deal_data.get("design_type"), + new_record: HubspotDealData = self._build_new_deal( + deal_id, deal_data, listing, company ) # Handle upload at insert time - if new_record.major_condition_issue_photos: - try: - local_file = hubspot_client.download_file_from_url( - new_record.major_condition_issue_photos - ) - s3_url = self.s3.upload_file( - local_file, - "retrofit-data-dev", - prefix="hubspot/awaabs_law_evidence/", - ) - new_record.major_condition_issue_evidence_s3_url = s3_url - except Exception as e: - print( - f"⚠️ Failed to download photo for deal_id {new_record.deal_id}: {e}" - ) - # Continue without the file — don't crash the insert - finally: - if "local_file" in locals() and os.path.exists(local_file): - os.remove(local_file) + self._handle_new_photo_upload(new_record, hubspot_client) session.add(new_record) session.commit() session.refresh(new_record) return new_record + + def _update_existing_deal( + self, + existing: HubspotDealData, + deal_data: Dict[str, str], + listing: Optional[dict[str, str]], + company: Optional[str], + ): + for attr, value in { + "dealname": deal_data.get("dealname"), + "dealstage": deal_data.get("dealstage"), + "listing_id": listing.get("listing_id", None) if listing else None, + "landlord_property_id": ( + listing.get("owner_property_id", None) if listing else None + ), + "uprn": listing.get("national_uprn", None) if listing else None, + "outcome": deal_data.get("outcome"), + "outcome_notes": deal_data.get("outcome_notes"), + "project_code": deal_data.get("project_code"), + "company_id": company, + "major_condition_issue_description": deal_data.get( + "major_condition_issue_description" + ), + "major_condition_issue_photos": deal_data.get( + "major_condition_issue_photos" + ), + "coordination_status": deal_data.get("coordination_status__stage_1_"), + "design_status": deal_data.get("retrofit_design_status"), + "pashub_link": deal_data.get("pashub_link"), + "sharepoint_link": deal_data.get("sharepoint_link"), + "dampmould_growth": deal_data.get("dampmould_growth"), + "damp_mould_and_repairs_comments": deal_data.get( + "damp_mould_and_repairs_comments" + ), + "pre_sap": deal_data.get("pre_sap"), + "coordinator": deal_data.get("coordinator"), + "mtp_completion_date": self._parse_hs_date( + deal_data.get("mtp_completion_date") + ), + "mtp_re_model_completion_date": self._parse_hs_date( + deal_data.get("mtp_re_model_completion_date") + ), + "ioe_v3_completion_date": self._parse_hs_date( + deal_data.get("ioe_v3_completion_date") + ), + "proposed_measures": deal_data.get("proposed_measures"), + "approved_package": deal_data.get("approved_package"), + "designer": deal_data.get("designer"), + "design_completion_date": self._parse_hs_date( + deal_data.get("design_completion_date") + ), + "actual_measures_installed": deal_data.get("actual_measures_installed"), + "installer": deal_data.get("installer"), + "installer_handover": deal_data.get("installer_handover"), + "lodgement_status": deal_data.get("lodgement_status"), + "measures_lodgement_date": self._parse_hs_date( + deal_data.get("measures_lodgement_date") + ), + "lodgement_date": self._parse_hs_date(deal_data.get("lodgement_date")), + "expected_commencement_date": self._parse_hs_date( + deal_data.get("expected_commencement_date") + ), + "surveyor": deal_data.get("surveyor"), + "confirmed_survey_date": self._parse_hs_date( + deal_data.get("confirmed_survey_date") + ), + "confirmed_survey_time": deal_data.get("confirmed_survey_time"), + "surveyed_date": self._parse_hs_date(deal_data.get("surveyed_date")), + "design_type": deal_data.get("design_type"), + }.items(): + setattr(existing, attr, value or getattr(existing, attr)) + + def _build_new_deal( + self, + deal_id: str, + deal_data: Dict[str, str], + listing: Optional[dict[str, str]], + company: Optional[str], + ) -> HubspotDealData: + return HubspotDealData( + deal_id=deal_id, + dealname=deal_data.get("dealname"), + dealstage=deal_data.get("dealstage"), + listing_id=listing.get("listing_id") if listing else None, + landlord_property_id=( + listing.get("owner_property_id") if listing else None + ), + uprn=listing.get("national_uprn") if listing else None, + outcome=deal_data.get("outcome"), + outcome_notes=deal_data.get("outcome_notes"), + project_code=deal_data.get("project_code"), + company_id=company, + major_condition_issue_description=deal_data.get( + "major_condition_issue_description" + ), + major_condition_issue_photos=deal_data.get("major_condition_issue_photos"), + coordination_status=deal_data.get("coordination_status__stage_1_"), + design_status=deal_data.get("retrofit_design_status"), + pashub_link=deal_data.get("pashub_link"), + sharepoint_link=deal_data.get("sharepoint_link"), + dampmould_growth=deal_data.get("dampmould_growth"), + damp_mould_and_repairs_comments=deal_data.get( + "damp_mould_and_repairs_comments" + ), + pre_sap=deal_data.get("pre_sap"), + coordinator=deal_data.get("coordinator"), + mtp_completion_date=self._parse_hs_date( + deal_data.get("mtp_completion_date") + ), + mtp_re_model_completion_date=self._parse_hs_date( + deal_data.get("mtp_re_model_completion_date") + ), + ioe_v3_completion_date=self._parse_hs_date( + deal_data.get("ioe_v3_completion_date") + ), + proposed_measures=deal_data.get("proposed_measures"), + approved_package=deal_data.get("approved_package"), + designer=deal_data.get("designer"), + design_completion_date=self._parse_hs_date( + deal_data.get("design_completion_date") + ), + actual_measures_installed=deal_data.get("actual_measures_installed"), + installer=deal_data.get("installer"), + installer_handover=deal_data.get("installer_handover"), + lodgement_status=deal_data.get("lodgement_status"), + measures_lodgement_date=self._parse_hs_date( + deal_data.get("measures_lodgement_date") + ), + lodgement_date=self._parse_hs_date(deal_data.get("lodgement_date")), + expected_commencement_date=self._parse_hs_date( + deal_data.get("expected_commencement_date") + ), + surveyor=deal_data.get("surveyor"), + confirmed_survey_date=self._parse_hs_date( + deal_data.get("confirmed_survey_date") + ), + confirmed_survey_time=deal_data.get("confirmed_survey_time"), + surveyed_date=self._parse_hs_date(deal_data.get("surveyed_date")), + design_type=deal_data.get("design_type"), + ) + + def _handle_existing_photo_upload( + self, + existing: HubspotDealData, + hubspot_client: HubspotClient, + ): + if ( + existing.major_condition_issue_photos + and not existing.major_condition_issue_evidence_s3_url + ): + fresh_deal = hubspot_client.from_deal_id_get_info(existing.deal_id) + photo_url = fresh_deal.get("major_condition_issue_photos") + + if not photo_url: + print(f"⚠️ Photo URL missing for deal_id {existing.deal_id}") + return + + self._upload_photo_to_s3(existing, photo_url, hubspot_client) + + def _handle_new_photo_upload( + self, + record: HubspotDealData, + hubspot_client: HubspotClient, + ): + if record.major_condition_issue_photos: + self._upload_photo_to_s3( + record, + record.major_condition_issue_photos, + hubspot_client, + ) + + def _upload_photo_to_s3( + self, + record: HubspotDealData, + photo_url: str, + hubspot_client: HubspotClient, + ): + try: + local_file = hubspot_client.download_file_from_url(photo_url) + + s3_url = self.s3.upload_file( + local_file, + "retrofit-data-dev", + prefix="hubspot/awaabs_law_evidence/", + ) + + record.major_condition_issue_evidence_s3_url = s3_url + + except Exception as e: + print(f"⚠️ Failed to upload photo for deal_id {record.deal_id}: {e}") + finally: + if "local_file" in locals() and os.path.exists(local_file): + os.remove(local_file) From 8ce76190442e5a7f8a9c1af038651521b7edb105 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 9 Apr 2026 09:14:20 +0000 Subject: [PATCH 15/32] refactor pashub trigger tests --- .../tests/test_hubspot_deal_differ.py | 470 ++++++++---------- etl/hubspot/hubspotDataTodB.py | 72 ++- 2 files changed, 233 insertions(+), 309 deletions(-) diff --git a/backend/hubspot_trigger_orchestrator/tests/test_hubspot_deal_differ.py b/backend/hubspot_trigger_orchestrator/tests/test_hubspot_deal_differ.py index ba6b80e4..75fa7927 100644 --- a/backend/hubspot_trigger_orchestrator/tests/test_hubspot_deal_differ.py +++ b/backend/hubspot_trigger_orchestrator/tests/test_hubspot_deal_differ.py @@ -1,359 +1,295 @@ from datetime import datetime -from typing import Dict +from typing import Any, Dict import uuid +import pytest + from backend.app.db.models.organisation import HubspotDealData from backend.hubspot_trigger_orchestrator.hubspot_deal_differ import HubspotDealDiffer -def test_pashub_trigger__outcome_note_added__returns_false() -> None: - # arrange - deal_id = uuid.uuid4() +BASE_TIME = datetime(2025, 12, 1, 12, 0, 0) - old_deal = HubspotDealData( - id=deal_id, + +def make_old_deal(**overrides: Any) -> HubspotDealData: + return HubspotDealData( + id=overrides.get("id", uuid.uuid4()), deal_id="1", - created_at=datetime(2025, 12, 1, 12, 0, 0), - updated_at=datetime(2025, 12, 1, 12, 0, 0), + created_at=BASE_TIME, + updated_at=BASE_TIME, + **{k: v for k, v in overrides.items() if k != "id"}, ) - new_deal: Dict[str, str] = { + + +def make_new_deal(deal_id: uuid.UUID, **overrides: Any) -> Dict[str, str]: + return { "id": str(deal_id), "deal_id": "1", - "outcome_notes": "test note", - "created_at": datetime(2025, 12, 1, 12, 0, 0).isoformat(), + "created_at": BASE_TIME.isoformat(), "updated_at": datetime(2025, 12, 1, 12, 30, 0).isoformat(), + **overrides, } - expected_output = False - # act - actual_output: bool = HubspotDealDiffer.check_for_pashub_trigger( - new_deal=new_deal, old_deal=old_deal +# ------------------------------------- +# Random change we aren't interested in +# ------------------------------------- + + +@pytest.mark.parametrize( + "new_overrides,expected", + [ + ({"outcome_notes": "test note"}, False), + ], +) +def test_pashub_trigger__outcome_note_added__returns_false( + new_overrides: Dict[str, str], + expected: bool, +) -> None: + deal_id = uuid.uuid4() + old_deal = make_old_deal(id=deal_id) + new_deal = make_new_deal(deal_id, **new_overrides) + + assert ( + HubspotDealDiffer.check_for_pashub_trigger( + new_deal=new_deal, + old_deal=old_deal, + ) + == expected ) - # assert - assert actual_output == expected_output + +# ------------------------- +# Pashub link changes +# ------------------------- -def test_pashub_trigger__pashub_link_changed__returns_true() -> None: - # arrange +@pytest.mark.parametrize( + "old_overrides,new_overrides,expected", + [ + ( + {"pashub_link": "www.google.co.uk"}, + {"pashub_link": "www.bbc.co.uk"}, + True, + ), + ], +) +def test_pashub_trigger__pashub_link_changed__returns_true( + old_overrides: Dict[str, str], + new_overrides: Dict[str, str], + expected: bool, +) -> None: + deal_id = uuid.uuid4() + old_deal = make_old_deal(id=deal_id, **old_overrides) + new_deal = make_new_deal(deal_id, **new_overrides) + + assert ( + HubspotDealDiffer.check_for_pashub_trigger( + new_deal=new_deal, + old_deal=old_deal, + ) + == expected + ) + + +# ------------------------- +# Coordination +# ------------------------- + + +@pytest.mark.parametrize( + "coordination_status,expected", + [ + ("v1 ioe/mtp complete", True), + ("v2 ioe/mtp complete", True), + ], +) +def test_pashub_trigger__coordination_completed_and_pashub_link_set__returns_true( + coordination_status: str, + expected: bool, +) -> None: deal_id = uuid.uuid4() - old_deal = HubspotDealData( + old_deal = make_old_deal( id=deal_id, - deal_id="1", - pashub_link="www.google.co.uk", - created_at=datetime(2025, 12, 1, 12, 0, 0), - updated_at=datetime(2025, 12, 1, 12, 0, 0), - ) - new_deal: Dict[str, str] = { - "id": str(deal_id), - "deal_id": "1", - "pashub_link": "www.bbc.co.uk", - "created_at": datetime(2025, 12, 1, 12, 0, 0).isoformat(), - "updated_at": datetime(2025, 12, 1, 12, 30, 0).isoformat(), - } - - expected_output = True - - # act - actual_output: bool = HubspotDealDiffer.check_for_pashub_trigger( - new_deal=new_deal, old_deal=old_deal - ) - - # assert - assert actual_output == expected_output - - -def test_pashub_trigger__coordination_completed_and_pashub_link_set__returns_true() -> ( - None -): - # arrange - deal_id = uuid.uuid4() - - old_deal = HubspotDealData( - id=deal_id, - deal_id="1", pashub_link="www.google.co.uk", coordination_status="random", - created_at=datetime(2025, 12, 1, 12, 0, 0), - updated_at=datetime(2025, 12, 1, 12, 0, 0), - ) - new_deal: Dict[str, str] = { - "id": str(deal_id), - "deal_id": "1", - "coordination_status": "v1 ioe/mtp complete", - "pashub_link": "www.google.co.uk", - "created_at": datetime(2025, 12, 1, 12, 0, 0).isoformat(), - "updated_at": datetime(2025, 12, 1, 12, 30, 0).isoformat(), - } - - expected_output = True - - # act - actual_output: bool = HubspotDealDiffer.check_for_pashub_trigger( - new_deal=new_deal, old_deal=old_deal ) - # assert - assert actual_output == expected_output - - -def test_pashub_trigger__coordination_completed_and_pashub_link_set__returns_true_2() -> ( - None -): - # arrange - deal_id = uuid.uuid4() - - old_deal = HubspotDealData( - id=deal_id, - deal_id="1", + new_deal = make_new_deal( + deal_id, pashub_link="www.google.co.uk", - coordination_status="random", - created_at=datetime(2025, 12, 1, 12, 0, 0), - updated_at=datetime(2025, 12, 1, 12, 0, 0), - ) - new_deal: Dict[str, str] = { - "id": str(deal_id), - "deal_id": "1", - "coordination_status": "v2 ioe/mtp complete", - "pashub_link": "www.google.co.uk", - "created_at": datetime(2025, 12, 1, 12, 0, 0).isoformat(), - "updated_at": datetime(2025, 12, 1, 12, 30, 0).isoformat(), - } - - expected_output = True - - # act - actual_output: bool = HubspotDealDiffer.check_for_pashub_trigger( - new_deal=new_deal, old_deal=old_deal + coordination_status=coordination_status, ) - # assert - assert actual_output == expected_output + assert ( + HubspotDealDiffer.check_for_pashub_trigger( + new_deal=new_deal, + old_deal=old_deal, + ) + == expected + ) def test_pashub_trigger__coordination_completed_and_pashub_link_not_set__returns_false() -> ( None ): - # arrange deal_id = uuid.uuid4() - old_deal = HubspotDealData( + old_deal = make_old_deal( id=deal_id, - deal_id="1", coordination_status="random", - created_at=datetime(2025, 12, 1, 12, 0, 0), - updated_at=datetime(2025, 12, 1, 12, 0, 0), - ) - new_deal: Dict[str, str] = { - "id": str(deal_id), - "deal_id": "1", - "coordination_status": "v2 ioe/mtp complete", - "created_at": datetime(2025, 12, 1, 12, 0, 0).isoformat(), - "updated_at": datetime(2025, 12, 1, 12, 30, 0).isoformat(), - } - - expected_output = False - - # act - actual_output: bool = HubspotDealDiffer.check_for_pashub_trigger( - new_deal=new_deal, old_deal=old_deal ) - # assert - assert actual_output == expected_output + new_deal = make_new_deal( + deal_id, + coordination_status="v2 ioe/mtp complete", + ) + + assert ( + HubspotDealDiffer.check_for_pashub_trigger( + new_deal=new_deal, + old_deal=old_deal, + ) + is False + ) + + +# ------------------------- +# Design +# ------------------------- def test_pashub_trigger__design_completed_and_pashub_link_set__returns_true() -> None: - # arrange deal_id = uuid.uuid4() - old_deal = HubspotDealData( + old_deal = make_old_deal( id=deal_id, - deal_id="1", pashub_link="www.google.co.uk", - created_at=datetime(2025, 12, 1, 12, 0, 0), - updated_at=datetime(2025, 12, 1, 12, 0, 0), - ) - new_deal: Dict[str, str] = { - "id": str(deal_id), - "deal_id": "1", - "pashub_link": "www.google.co.uk", - "design_status": "uploaded", - "created_at": datetime(2025, 12, 1, 12, 0, 0).isoformat(), - "updated_at": datetime(2025, 12, 1, 12, 30, 0).isoformat(), - } - - expected_output = True - - # act - actual_output: bool = HubspotDealDiffer.check_for_pashub_trigger( - new_deal=new_deal, old_deal=old_deal ) - # assert - assert actual_output == expected_output + new_deal = make_new_deal( + deal_id, + pashub_link="www.google.co.uk", + design_status="uploaded", + ) + + assert ( + HubspotDealDiffer.check_for_pashub_trigger( + new_deal=new_deal, + old_deal=old_deal, + ) + is True + ) def test_pashub_trigger__design_completed_and_pashub_link_not_set__returns_false() -> ( None ): - # arrange deal_id = uuid.uuid4() - old_deal = HubspotDealData( - id=deal_id, - deal_id="1", - created_at=datetime(2025, 12, 1, 12, 0, 0), - updated_at=datetime(2025, 12, 1, 12, 0, 0), - ) - new_deal: Dict[str, str] = { - "id": str(deal_id), - "deal_id": "1", - "design_status": "uploaded", - "created_at": datetime(2025, 12, 1, 12, 0, 0).isoformat(), - "updated_at": datetime(2025, 12, 1, 12, 30, 0).isoformat(), - } + old_deal = make_old_deal(id=deal_id) - expected_output = False - - # act - actual_output: bool = HubspotDealDiffer.check_for_pashub_trigger( - new_deal=new_deal, old_deal=old_deal + new_deal = make_new_deal( + deal_id, + design_status="uploaded", ) - # assert - assert actual_output == expected_output + assert ( + HubspotDealDiffer.check_for_pashub_trigger( + new_deal=new_deal, + old_deal=old_deal, + ) + is False + ) -def test_pashub_trigger__lodgement_completed_and_pashub_link_set__returns_true() -> ( - None -): - # arrange +# ------------------------- +# Lodgement +# ------------------------- + + +@pytest.mark.parametrize( + "lodgement_status,expected", + [ + ("lodgement complete", True), + ("measures lodged", True), + ], +) +def test_pashub_trigger__lodgement_completed_and_pashub_link_set__returns_true( + lodgement_status: str, + expected: bool, +) -> None: deal_id = uuid.uuid4() - old_deal = HubspotDealData( + old_deal = make_old_deal( id=deal_id, - deal_id="1", pashub_link="www.google.co.uk", - created_at=datetime(2025, 12, 1, 12, 0, 0), - updated_at=datetime(2025, 12, 1, 12, 0, 0), - ) - new_deal: Dict[str, str] = { - "id": str(deal_id), - "deal_id": "1", - "pashub_link": "www.google.co.uk", - "lodgement_status": "lodgement complete", - "created_at": datetime(2025, 12, 1, 12, 0, 0).isoformat(), - "updated_at": datetime(2025, 12, 1, 12, 30, 0).isoformat(), - } - - expected_output = True - - # act - actual_output: bool = HubspotDealDiffer.check_for_pashub_trigger( - new_deal=new_deal, old_deal=old_deal ) - # assert - assert actual_output == expected_output - - -def test_pashub_trigger__lodgement_completed_and_pashub_link_set__returns_true_2() -> ( - None -): - # arrange - deal_id = uuid.uuid4() - - old_deal = HubspotDealData( - id=deal_id, - deal_id="1", + new_deal = make_new_deal( + deal_id, pashub_link="www.google.co.uk", - created_at=datetime(2025, 12, 1, 12, 0, 0), - updated_at=datetime(2025, 12, 1, 12, 0, 0), - ) - new_deal: Dict[str, str] = { - "id": str(deal_id), - "deal_id": "1", - "pashub_link": "www.google.co.uk", - "lodgement_status": "measures lodged", - "created_at": datetime(2025, 12, 1, 12, 0, 0).isoformat(), - "updated_at": datetime(2025, 12, 1, 12, 30, 0).isoformat(), - } - - expected_output = True - - # act - actual_output: bool = HubspotDealDiffer.check_for_pashub_trigger( - new_deal=new_deal, old_deal=old_deal + lodgement_status=lodgement_status, ) - # assert - assert actual_output == expected_output + assert ( + HubspotDealDiffer.check_for_pashub_trigger( + new_deal=new_deal, + old_deal=old_deal, + ) + == expected + ) def test_pashub_trigger__lodgement_completed_and_pashub_link_not_set__returns_false() -> ( None ): - # arrange deal_id = uuid.uuid4() - old_deal = HubspotDealData( - id=deal_id, - deal_id="1", - created_at=datetime(2025, 12, 1, 12, 0, 0), - updated_at=datetime(2025, 12, 1, 12, 0, 0), - ) - new_deal: Dict[str, str] = { - "id": str(deal_id), - "deal_id": "1", - "design_status": "lodgement complete", - "created_at": datetime(2025, 12, 1, 12, 0, 0).isoformat(), - "updated_at": datetime(2025, 12, 1, 12, 30, 0).isoformat(), - } + old_deal = make_old_deal(id=deal_id) - expected_output = False - - # act - actual_output: bool = HubspotDealDiffer.check_for_pashub_trigger( - new_deal=new_deal, old_deal=old_deal + new_deal = make_new_deal( + deal_id, + design_status="lodgement complete", ) - # assert - assert actual_output == expected_output + assert ( + HubspotDealDiffer.check_for_pashub_trigger( + new_deal=new_deal, + old_deal=old_deal, + ) + is False + ) + + +# ------------------------- +# Negative case +# ------------------------- def test_pashub_trigger__coordination_design_lodgement_not_completed_and_pashub_link_set__returns_false() -> ( None ): - # arrange deal_id = uuid.uuid4() - old_deal = HubspotDealData( + old_deal = make_old_deal( id=deal_id, - deal_id="1", pashub_link="www.google.co.uk", - created_at=datetime(2025, 12, 1, 12, 0, 0), - updated_at=datetime(2025, 12, 1, 12, 0, 0), - ) - new_deal: Dict[str, str] = { - "id": str(deal_id), - "deal_id": "1", - "pashub_link": "www.google.co.uk", - "coordination_status": "not uploaded", - "design_status": "not uploaded", - "lodgement_status": "not uploaded", - "created_at": datetime(2025, 12, 1, 12, 0, 0).isoformat(), - "updated_at": datetime(2025, 12, 1, 12, 30, 0).isoformat(), - } - - expected_output = False - - # act - actual_output: bool = HubspotDealDiffer.check_for_pashub_trigger( - new_deal=new_deal, old_deal=old_deal ) - # assert - assert actual_output == expected_output + new_deal = make_new_deal( + deal_id, + pashub_link="www.google.co.uk", + coordination_status="not uploaded", + design_status="not uploaded", + lodgement_status="not uploaded", + ) + + assert ( + HubspotDealDiffer.check_for_pashub_trigger( + new_deal=new_deal, + old_deal=old_deal, + ) + is False + ) diff --git a/etl/hubspot/hubspotDataTodB.py b/etl/hubspot/hubspotDataTodB.py index f0beeee8..06cc3be9 100644 --- a/etl/hubspot/hubspotDataTodB.py +++ b/etl/hubspot/hubspotDataTodB.py @@ -291,54 +291,33 @@ class HubspotDataToDb: return False # Handle photo upload if it exists but S3 URL is missing - if ( - deal_in_db.major_condition_issue_photos - and not deal_in_db.major_condition_issue_evidence_s3_url - ): + if self._needs_photo_upload(deal_in_db): print( f"🖼️ Found photo for deal_id {deal_in_db.deal_id} — uploading to S3..." ) photo_url = hs_deal.get("major_condition_issue_photos") + if photo_url: - try: - # Download from HubSpot using fresh URL from hs_deal (not stale DB URL) - local_file = hubspot_client.download_file_from_url(photo_url) + self._upload_photo_to_s3( + deal_in_db, + photo_url, + hubspot_client, + verify=True, # 👈 key difference + ) - # Upload to S3 - bucket = "retrofit-data-dev" - s3_url = self.s3.upload_file( - local_file, bucket, prefix="hubspot/awaabs_law_evidence/" + # persist change + with db_read_session() as session: + db_record = session.get(HubspotDealData, deal_in_db.id) + db_record.major_condition_issue_evidence_s3_url = ( + deal_in_db.major_condition_issue_evidence_s3_url ) + session.add(db_record) + session.commit() - # Download again to verify integrity - downloaded = self.s3.download_from_url(s3_url) - if self._sha256(local_file) == self._sha256(downloaded): - print("✅ SHA256 match verified — upload successful.") - else: - print("❌ SHA256 mismatch — integrity check failed.") - raise ValueError("File integrity check failed after S3 upload.") - - # Update DB record with S3 URL - with db_read_session() as session: - db_record = session.get(HubspotDealData, deal_in_db.id) - db_record.major_condition_issue_evidence_s3_url = s3_url - session.add(db_record) - session.commit() - print( - f"✅ Updated DB with S3 URL for deal_id={deal_in_db.deal_id}" - ) - return False - except Exception as e: - print( - f"⚠️ Failed to download/upload photo for deal_id {deal_in_db.deal_id}: {e}" - ) - # Continue without the file — don't crash the entire update - finally: - if "local_file" in locals() and os.path.exists(local_file): - os.remove(local_file) + return False else: - print(f"⚠️ Photo URL missing for deal_id {deal_in_db.deal_id}") + print(f"⚠️ Photo URL missing for deal_id {deal_in_db.deal_id}") else: print(f"✅ No update or upload required for deal_id {deal_in_db.deal_id}.") @@ -534,10 +513,7 @@ class HubspotDataToDb: existing: HubspotDealData, hubspot_client: HubspotClient, ): - if ( - existing.major_condition_issue_photos - and not existing.major_condition_issue_evidence_s3_url - ): + if self._needs_photo_upload(existing): fresh_deal = hubspot_client.from_deal_id_get_info(existing.deal_id) photo_url = fresh_deal.get("major_condition_issue_photos") @@ -564,6 +540,7 @@ class HubspotDataToDb: record: HubspotDealData, photo_url: str, hubspot_client: HubspotClient, + verify: bool = False, ): try: local_file = hubspot_client.download_file_from_url(photo_url) @@ -574,6 +551,11 @@ class HubspotDataToDb: prefix="hubspot/awaabs_law_evidence/", ) + if verify: + downloaded = self.s3.download_from_url(s3_url) + if self._sha256(local_file) != self._sha256(downloaded): + raise ValueError("File integrity check failed after S3 upload.") + record.major_condition_issue_evidence_s3_url = s3_url except Exception as e: @@ -581,3 +563,9 @@ class HubspotDataToDb: finally: if "local_file" in locals() and os.path.exists(local_file): os.remove(local_file) + + def _needs_photo_upload(self, deal: HubspotDealData) -> bool: + return bool( + deal.major_condition_issue_photos + and not deal.major_condition_issue_evidence_s3_url + ) From c439d5f55794f8b1c8f9041d568f28d44f43d0fd Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 9 Apr 2026 09:41:43 +0000 Subject: [PATCH 16/32] move everything to etl/hubspot/ --- .../hubspot_trigger_orchestrator/handler.py | 61 -------------- .../hubspot}/hubspot_deal_differ.py | 0 ...ot_trigger_orchestrator_trigger_request.py | 0 etl/hubspot/scripts/scraper/main.py | 80 +++++++++++++------ .../tests/test_hubspot_deal_differ.py | 2 +- 5 files changed, 57 insertions(+), 86 deletions(-) delete mode 100644 backend/hubspot_trigger_orchestrator/handler.py rename {backend/hubspot_trigger_orchestrator => etl/hubspot}/hubspot_deal_differ.py (100%) rename {backend/hubspot_trigger_orchestrator => etl/hubspot}/hubspot_trigger_orchestrator_trigger_request.py (100%) rename {backend/hubspot_trigger_orchestrator => etl/hubspot}/tests/test_hubspot_deal_differ.py (98%) diff --git a/backend/hubspot_trigger_orchestrator/handler.py b/backend/hubspot_trigger_orchestrator/handler.py deleted file mode 100644 index 38724812..00000000 --- a/backend/hubspot_trigger_orchestrator/handler.py +++ /dev/null @@ -1,61 +0,0 @@ -import json -from typing import Any, Dict, Mapping, Optional - -from backend.app.db.models.organisation import HubspotDealData -from backend.hubspot_trigger_orchestrator.hubspot_deal_differ import HubspotDealDiffer -from backend.hubspot_trigger_orchestrator.hubspot_trigger_orchestrator_trigger_request import ( - HubspotTriggerOrchestratorTriggerRequest, -) -from backend.utils.subtasks import task_handler -from etl.hubspot.hubspotClient import HubspotClient -from etl.hubspot.hubspotDataTodB import HubspotDataToDb -from utils.logger import setup_logger - -logger = setup_logger() - - -@task_handler() -def handler(event: Mapping[str, Any], context: Any) -> None: - - db_client = HubspotDataToDb() - hubspot_client = HubspotClient() - - for record in event.get("Records", []): - body_dict = json.loads(record["body"]) - - logger.debug("Validating request body") - payload = HubspotTriggerOrchestratorTriggerRequest.model_validate(body_dict) - logger.debug("Successfully validated request body") - - hubspot_deal_id: str = payload.hubspot_deal_id - - db_deal: Optional[HubspotDealData] = db_client.find_deal_with_deal_id( - hubspot_deal_id - ) - if not db_deal: - # new hubspot deal, no diffing to do - # TODO: trigger hubspot to db ETL - return - - hubspot_deal: Dict[str, str] - company: Optional[str] - listing: Optional[dict[str, str]] - - hubspot_deal, company, listing = ( - hubspot_client.get_deal_and_company_and_listing(hubspot_deal_id) - ) - - if HubspotDealDiffer.check_for_pashub_trigger( - new_deal=hubspot_deal, old_deal=db_deal - ): - # TODO: trigger pashub file fetcher - return - - if HubspotDealDiffer.check_for_db_update_trigger( - new_deal=hubspot_deal, - new_company=company, - new_listing=listing, - old_deal=db_deal, - ): - # TODO: trigger db upsert - return diff --git a/backend/hubspot_trigger_orchestrator/hubspot_deal_differ.py b/etl/hubspot/hubspot_deal_differ.py similarity index 100% rename from backend/hubspot_trigger_orchestrator/hubspot_deal_differ.py rename to etl/hubspot/hubspot_deal_differ.py diff --git a/backend/hubspot_trigger_orchestrator/hubspot_trigger_orchestrator_trigger_request.py b/etl/hubspot/hubspot_trigger_orchestrator_trigger_request.py similarity index 100% rename from backend/hubspot_trigger_orchestrator/hubspot_trigger_orchestrator_trigger_request.py rename to etl/hubspot/hubspot_trigger_orchestrator_trigger_request.py diff --git a/etl/hubspot/scripts/scraper/main.py b/etl/hubspot/scripts/scraper/main.py index d8d4a357..8c4af1a7 100644 --- a/etl/hubspot/scripts/scraper/main.py +++ b/etl/hubspot/scripts/scraper/main.py @@ -1,39 +1,71 @@ from backend.app.db.models.organisation import HubspotDealData from etl.hubspot.hubspotClient import HubspotClient -from etl.hubspot.hubspotDataTodB import CompanyData, HubspotDataToDb + +# from etl.hubspot.hubspotDataTodB import CompanyData, HubspotDataToDb +from etl.hubspot.hubspotDataTodB import HubspotDataToDb from backend.utils.subtasks import task_handler from typing import Any, Dict, Optional +from etl.hubspot.hubspot_deal_differ import HubspotDealDiffer +from etl.hubspot.hubspot_trigger_orchestrator_trigger_request import ( + HubspotTriggerOrchestratorTriggerRequest, +) + @task_handler() def handler(body: dict[str, Any], context: Any) -> None: - hubspot_deal_id = body.get("hubspot_deal_id", "") - - if hubspot_deal_id == "": - raise RuntimeError( - "Missing Hubspot Deal ID in SQS body request, 'hubspot_deal_id'" - ) - hubspot_deal_id = "327170793707" - - hubspot_client = HubspotClient() db_client = HubspotDataToDb() + hubspot_client = HubspotClient() + + payload = HubspotTriggerOrchestratorTriggerRequest.model_validate(body) + hubspot_deal_id: str = payload.hubspot_deal_id + db_deal: Optional[HubspotDealData] = db_client.find_deal_with_deal_id( hubspot_deal_id ) - if db_deal: - db_client.update_deal_with_checks(db_deal, hubspot_client) - else: - hubspot_deal: Dict[str, str] - company: Optional[str] - listing: Optional[dict[str, str]] - hubspot_deal, company, listing = ( - hubspot_client.get_deal_and_company_and_listing(hubspot_deal_id) - ) + if not db_deal: + # New hubspot deal, no diffing to do + # TODO: Trigger hubspot to db ETL + return - if company: - company_data: CompanyData = hubspot_client.get_company_information(company) - db_client: HubspotDataToDb = HubspotDataToDb() - db_client.upsert_company(company_data) + hubspot_deal: Dict[str, str] + company: Optional[str] + listing: Optional[dict[str, str]] - db_client.upsert_deal(hubspot_deal, company, listing, hubspot_client) + hubspot_deal, company, listing = hubspot_client.get_deal_and_company_and_listing( + hubspot_deal_id + ) + + if HubspotDealDiffer.check_for_pashub_trigger( + new_deal=hubspot_deal, old_deal=db_deal + ): + # TODO: trigger pashub file fetcher + return + + if HubspotDealDiffer.check_for_db_update_trigger( + new_deal=hubspot_deal, + new_company=company, + new_listing=listing, + old_deal=db_deal, + ): + # TODO: trigger db upsert + return + + # if db_deal: + # db_client.update_deal_with_checks(db_deal, hubspot_client) + # else: + # hubspot_deal: Dict[str, str] + # company: Optional[str] + # listing: Optional[dict[str, str]] + + # hubspot_deal, company, listing = ( + # hubspot_client.get_deal_and_company_and_listing(hubspot_deal_id) + # ) + + # if company: + # company_data: CompanyData = hubspot_client.get_company_information(company) + # db_client: HubspotDataToDb = HubspotDataToDb() + # db_client.upsert_company(company_data) + + # db_client.upsert_deal(hubspot_deal, company, listing, hubspot_client) diff --git a/backend/hubspot_trigger_orchestrator/tests/test_hubspot_deal_differ.py b/etl/hubspot/tests/test_hubspot_deal_differ.py similarity index 98% rename from backend/hubspot_trigger_orchestrator/tests/test_hubspot_deal_differ.py rename to etl/hubspot/tests/test_hubspot_deal_differ.py index 75fa7927..12c5a288 100644 --- a/backend/hubspot_trigger_orchestrator/tests/test_hubspot_deal_differ.py +++ b/etl/hubspot/tests/test_hubspot_deal_differ.py @@ -5,7 +5,7 @@ import uuid import pytest from backend.app.db.models.organisation import HubspotDealData -from backend.hubspot_trigger_orchestrator.hubspot_deal_differ import HubspotDealDiffer +from etl.hubspot.hubspot_deal_differ import HubspotDealDiffer BASE_TIME = datetime(2025, 12, 1, 12, 0, 0) From 605652b30969156a445bc5aebf8cf083246aabc9 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 9 Apr 2026 09:56:58 +0000 Subject: [PATCH 17/32] =?UTF-8?q?diff=20checker=20for=20db=20load=20trigge?= =?UTF-8?q?r=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- etl/hubspot/hubspotDataTodB.py | 8 +- etl/hubspot/tests/test_hubspot_deal_differ.py | 138 ++++++++++++++---- 2 files changed, 116 insertions(+), 30 deletions(-) diff --git a/etl/hubspot/hubspotDataTodB.py b/etl/hubspot/hubspotDataTodB.py index 06cc3be9..4f43f1f7 100644 --- a/etl/hubspot/hubspotDataTodB.py +++ b/etl/hubspot/hubspotDataTodB.py @@ -2,7 +2,7 @@ from backend.app.db.connection import db_read_session from backend.app.db.models.organisation import Organisation, HubspotDealData from sqlmodel import select from datetime import datetime, timezone -from typing import Dict, Optional +from typing import Dict, Optional, Tuple from etl.hubspot.company_data import CompanyData from etl.hubspot.hubspotClient import HubspotClient from etl.hubspot.s3_uploader import S3Uploader @@ -103,7 +103,7 @@ class HubspotDataToDb: Also handles major_condition_issue_photos file upload to S3 with integrity check. """ - def soft_assert(condition, message="Assertion Failed"): + def soft_assert(condition: bool, message: str = "Assertion Failed"): if not condition: print(f"⚠️ Soft Assert Failed: {message}") return False @@ -111,6 +111,10 @@ class HubspotDataToDb: print(f"🔍 Checking if deal needs updating (deal_id={deal_in_db.deal_id})") + hs_deal: Dict[str, str] + hs_company_id: Optional[str] + hs_listing: Optional[Dict[str, str]] + hs_deal, hs_company_id, hs_listing = ( hubspot_client.get_deal_and_company_and_listing(deal_in_db.deal_id) ) diff --git a/etl/hubspot/tests/test_hubspot_deal_differ.py b/etl/hubspot/tests/test_hubspot_deal_differ.py index 12c5a288..876fcab9 100644 --- a/etl/hubspot/tests/test_hubspot_deal_differ.py +++ b/etl/hubspot/tests/test_hubspot_deal_differ.py @@ -31,9 +31,9 @@ def make_new_deal(deal_id: uuid.UUID, **overrides: Any) -> Dict[str, str]: } -# ------------------------------------- -# Random change we aren't interested in -# ------------------------------------- +# ==================== +# PASHUB TRIGGER TESTS +# ==================== @pytest.mark.parametrize( @@ -59,11 +59,6 @@ def test_pashub_trigger__outcome_note_added__returns_false( ) -# ------------------------- -# Pashub link changes -# ------------------------- - - @pytest.mark.parametrize( "old_overrides,new_overrides,expected", [ @@ -92,11 +87,6 @@ def test_pashub_trigger__pashub_link_changed__returns_true( ) -# ------------------------- -# Coordination -# ------------------------- - - @pytest.mark.parametrize( "coordination_status,expected", [ @@ -155,11 +145,6 @@ def test_pashub_trigger__coordination_completed_and_pashub_link_not_set__returns ) -# ------------------------- -# Design -# ------------------------- - - def test_pashub_trigger__design_completed_and_pashub_link_set__returns_true() -> None: deal_id = uuid.uuid4() @@ -204,11 +189,6 @@ def test_pashub_trigger__design_completed_and_pashub_link_not_set__returns_false ) -# ------------------------- -# Lodgement -# ------------------------- - - @pytest.mark.parametrize( "lodgement_status,expected", [ @@ -263,11 +243,6 @@ def test_pashub_trigger__lodgement_completed_and_pashub_link_not_set__returns_fa ) -# ------------------------- -# Negative case -# ------------------------- - - def test_pashub_trigger__coordination_design_lodgement_not_completed_and_pashub_link_set__returns_false() -> ( None ): @@ -293,3 +268,110 @@ def test_pashub_trigger__coordination_design_lodgement_not_completed_and_pashub_ ) is False ) + + +# ======================= +# DB UPDATE TRIGGER TESTS +# ======================= + + +def test_db_update_trigger__no_changes__returns_false() -> None: + deal_id = uuid.uuid4() + + old_deal = make_old_deal( + id=deal_id, + dealname="Test Deal", + dealstage="stage_1", + outcome="won", + ) + + new_deal = make_new_deal( + deal_id, + hs_object_id="1", + dealname="Test Deal", + dealstage="stage_1", + outcome="won", + ) + + result = HubspotDealDiffer.check_for_db_update_trigger( + new_deal=new_deal, + new_company=None, + new_listing=None, + old_deal=old_deal, + ) + + assert result is False + + +def test_db_update_trigger__dealname_changed__returns_true() -> None: + deal_id = uuid.uuid4() + + old_deal = make_old_deal( + id=deal_id, + dealname="Old Name", + ) + + new_deal = make_new_deal( + deal_id, + hs_object_id="1", + dealname="New Name", + ) + + result = HubspotDealDiffer.check_for_db_update_trigger( + new_deal=new_deal, + new_company=None, + new_listing=None, + old_deal=old_deal, + ) + + assert result is True + + +def test_db_update_trigger__company_changed__returns_true() -> None: + deal_id = uuid.uuid4() + + old_deal = make_old_deal( + id=deal_id, + company_id="old_company", + ) + + new_deal = make_new_deal( + deal_id, + hs_object_id="1", + ) + + new_company = "new_company" + + result = HubspotDealDiffer.check_for_db_update_trigger( + new_deal=new_deal, + new_company=new_company, + new_listing=None, + old_deal=old_deal, + ) + + assert result is True + + +def test_db_update_trigger__listing_changed__returns_true() -> None: + deal_id = uuid.uuid4() + + old_deal = make_old_deal( + id=deal_id, + listing_id="abc", + ) + + new_deal = make_new_deal( + deal_id, + hs_object_id="1", + ) + + new_listing = {"listing_id": "xyz"} + + result = HubspotDealDiffer.check_for_db_update_trigger( + new_deal=new_deal, + new_company=None, + new_listing=new_listing, + old_deal=old_deal, + ) + + assert result is True From 01636514aaa58f8749af194fac0ac31cd8a79284 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 9 Apr 2026 10:32:59 +0000 Subject: [PATCH 18/32] pull diffing logic out of loading method --- etl/hubspot/hubspotDataTodB.py | 181 ++++++++++++++-------------- etl/hubspot/scripts/scraper/main.py | 37 ++++-- 2 files changed, 118 insertions(+), 100 deletions(-) diff --git a/etl/hubspot/hubspotDataTodB.py b/etl/hubspot/hubspotDataTodB.py index 4f43f1f7..1c4b6b54 100644 --- a/etl/hubspot/hubspotDataTodB.py +++ b/etl/hubspot/hubspotDataTodB.py @@ -96,13 +96,103 @@ class HubspotDataToDb: return sha256.hexdigest() def update_deal_with_checks( - self, deal_in_db: HubspotDealData, hubspot_client: HubspotClient + self, + deal_in_db: HubspotDealData, + hubspot_client: HubspotClient, + hs_deal: Dict[str, str], + hs_company_id: Optional[str], + hs_listing: Optional[Dict[str, str]], ) -> bool: """ - Checks if a deal needs updating and syncs it with HubSpot. - Also handles major_condition_issue_photos file upload to S3 with integrity check. + Updates deal in database and handles major_condition_issue_photos file upload to S3 with integrity check. """ + self.upsert_deal(hs_deal, hs_company_id, hs_listing, hubspot_client) + # Handle photo upload if it exists but S3 URL is missing + if self._needs_photo_upload(deal_in_db): + print( + f"🖼️ Found photo for deal_id {deal_in_db.deal_id} — uploading to S3..." + ) + + photo_url = hs_deal.get("major_condition_issue_photos") + + if photo_url: + self._upload_photo_to_s3( + deal_in_db, + photo_url, + hubspot_client, + verify=True, + ) + + # persist change + with db_read_session() as session: + db_record = session.get(HubspotDealData, deal_in_db.id) + db_record.major_condition_issue_evidence_s3_url = ( + deal_in_db.major_condition_issue_evidence_s3_url + ) + session.add(db_record) + session.commit() + + return False + else: + print(f"⚠️ Photo URL missing for deal_id {deal_in_db.deal_id}") + + else: + print(f"✅ No update or upload required for deal_id {deal_in_db.deal_id}.") + + return True + + def upsert_deal( + self, + deal_data: Dict[str, str], + company: Optional[str], + listing: Optional[dict[str, str]], + hubspot_client: HubspotClient, + ): + """ + Inserts or updates a deal record. + Also uploads photos if present and adds S3 URL. + """ + with db_read_session() as session: + deal_id = deal_data.get("hs_object_id") + + statement = select(HubspotDealData).where( + HubspotDealData.deal_id == deal_id + ) + existing = session.exec(statement).first() + + if existing: + print(f"🔄 Updating existing deal (deal_id={deal_id})") + self._update_existing_deal(existing, deal_data, listing, company) + + self._handle_existing_photo_upload(existing, hubspot_client) + + session.add(existing) + session.commit() + session.refresh(existing) + return existing + + else: + print(f"🆕 Inserting new deal (deal_id={deal_id})") + new_record: HubspotDealData = self._build_new_deal( + deal_id, deal_data, listing, company + ) + + # Handle upload at insert time + self._handle_new_photo_upload(new_record, hubspot_client) + + session.add(new_record) + session.commit() + session.refresh(new_record) + return new_record + + def _deprecated_diff( + self, + deal_in_db: HubspotDealData, + hs_deal: Dict[str, str], + hs_company_id: Optional[str], + hs_listing: Optional[Dict[str, str]], + ): def soft_assert(condition: bool, message: str = "Assertion Failed"): if not condition: print(f"⚠️ Soft Assert Failed: {message}") @@ -111,14 +201,6 @@ class HubspotDataToDb: print(f"🔍 Checking if deal needs updating (deal_id={deal_in_db.deal_id})") - hs_deal: Dict[str, str] - hs_company_id: Optional[str] - hs_listing: Optional[Dict[str, str]] - - hs_deal, hs_company_id, hs_listing = ( - hubspot_client.get_deal_and_company_and_listing(deal_in_db.deal_id) - ) - # Soft compare key fields checks = [ soft_assert( @@ -291,87 +373,10 @@ class HubspotDataToDb: print( f"❗ Discrepancies found for deal_id {deal_in_db.deal_id} — syncing with HubSpot." ) - self.upsert_deal(hs_deal, hs_company_id, hs_listing, hubspot_client) return False - # Handle photo upload if it exists but S3 URL is missing - if self._needs_photo_upload(deal_in_db): - print( - f"🖼️ Found photo for deal_id {deal_in_db.deal_id} — uploading to S3..." - ) - - photo_url = hs_deal.get("major_condition_issue_photos") - - if photo_url: - self._upload_photo_to_s3( - deal_in_db, - photo_url, - hubspot_client, - verify=True, # 👈 key difference - ) - - # persist change - with db_read_session() as session: - db_record = session.get(HubspotDealData, deal_in_db.id) - db_record.major_condition_issue_evidence_s3_url = ( - deal_in_db.major_condition_issue_evidence_s3_url - ) - session.add(db_record) - session.commit() - - return False - else: - print(f"⚠️ Photo URL missing for deal_id {deal_in_db.deal_id}") - - else: - print(f"✅ No update or upload required for deal_id {deal_in_db.deal_id}.") - return True - def upsert_deal( - self, - deal_data: Dict[str, str], - company: Optional[str], - listing: Optional[dict[str, str]], - hubspot_client: HubspotClient, - ): - """ - Inserts or updates a deal record. - Also uploads photos if present and adds S3 URL. - """ - with db_read_session() as session: - deal_id = deal_data.get("hs_object_id") - - statement = select(HubspotDealData).where( - HubspotDealData.deal_id == deal_id - ) - existing = session.exec(statement).first() - - if existing: - print(f"🔄 Updating existing deal (deal_id={deal_id})") - self._update_existing_deal(existing, deal_data, listing, company) - - self._handle_existing_photo_upload(existing, hubspot_client) - - session.add(existing) - session.commit() - session.refresh(existing) - return existing - - else: - print(f"🆕 Inserting new deal (deal_id={deal_id})") - new_record: HubspotDealData = self._build_new_deal( - deal_id, deal_data, listing, company - ) - - # Handle upload at insert time - self._handle_new_photo_upload(new_record, hubspot_client) - - session.add(new_record) - session.commit() - session.refresh(new_record) - return new_record - def _update_existing_deal( self, existing: HubspotDealData, diff --git a/etl/hubspot/scripts/scraper/main.py b/etl/hubspot/scripts/scraper/main.py index 8c4af1a7..768a86eb 100644 --- a/etl/hubspot/scripts/scraper/main.py +++ b/etl/hubspot/scripts/scraper/main.py @@ -1,8 +1,7 @@ from backend.app.db.models.organisation import HubspotDealData from etl.hubspot.hubspotClient import HubspotClient -# from etl.hubspot.hubspotDataTodB import CompanyData, HubspotDataToDb -from etl.hubspot.hubspotDataTodB import HubspotDataToDb +from etl.hubspot.hubspotDataTodB import CompanyData, HubspotDataToDb from backend.utils.subtasks import task_handler from typing import Any, Dict, Optional @@ -24,11 +23,6 @@ def handler(body: dict[str, Any], context: Any) -> None: hubspot_deal_id ) - if not db_deal: - # New hubspot deal, no diffing to do - # TODO: Trigger hubspot to db ETL - return - hubspot_deal: Dict[str, str] company: Optional[str] listing: Optional[dict[str, str]] @@ -37,10 +31,14 @@ def handler(body: dict[str, Any], context: Any) -> None: hubspot_deal_id ) - if HubspotDealDiffer.check_for_pashub_trigger( - new_deal=hubspot_deal, old_deal=db_deal - ): - # TODO: trigger pashub file fetcher + if not db_deal: + # New hubspot deal, no diffing to do + if company: + company_data: CompanyData = hubspot_client.get_company_information(company) + db_client: HubspotDataToDb = HubspotDataToDb() + db_client.upsert_company(company_data) + + db_client.upsert_deal(hubspot_deal, company, listing, hubspot_client) return if HubspotDealDiffer.check_for_db_update_trigger( @@ -49,7 +47,22 @@ def handler(body: dict[str, Any], context: Any) -> None: new_listing=listing, old_deal=db_deal, ): - # TODO: trigger db upsert + db_client.update_deal_with_checks( + deal_in_db=db_deal, + hubspot_client=hubspot_client, + hs_deal=hubspot_deal, + hs_company_id=company, + hs_listing=listing, + ) + return + + # ============================== + # Orchestration of other lambdas + # ============================== + if HubspotDealDiffer.check_for_pashub_trigger( + new_deal=hubspot_deal, old_deal=db_deal + ): + # TODO: trigger pashub file fetcher return # if db_deal: From 125527baa9a56573c3f8120fbf9daca805d8b20e Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 9 Apr 2026 10:46:52 +0000 Subject: [PATCH 19/32] move HubspotDealData object to dedicated file --- backend/app/db/models/hubspot_deal_data.py | 77 +++++++++++++++++++ backend/app/db/models/organisation.py | 77 +------------------ etl/hubspot/hubspotDataTodB.py | 10 ++- etl/hubspot/hubspot_deal_differ.py | 2 +- etl/hubspot/scripts/scraper/main.py | 9 +-- etl/hubspot/tests/test_hubspot_deal_differ.py | 2 +- 6 files changed, 91 insertions(+), 86 deletions(-) create mode 100644 backend/app/db/models/hubspot_deal_data.py diff --git a/backend/app/db/models/hubspot_deal_data.py b/backend/app/db/models/hubspot_deal_data.py new file mode 100644 index 00000000..d5a51ace --- /dev/null +++ b/backend/app/db/models/hubspot_deal_data.py @@ -0,0 +1,77 @@ +import uuid +from sqlmodel import SQLModel, Field, Column, text +from datetime import datetime +from typing import Optional +from sqlalchemy import DateTime +from sqlalchemy.sql import func + + +class HubspotDealData(SQLModel, table=True): + __tablename__ = "hubspot_deal_data" + + id: uuid.UUID = Field(default_factory=uuid.uuid4, primary_key=True) + + # HubSpot Deal identifiers + deal_id: str = Field(index=True, nullable=False) + dealname: Optional[str] = Field(default=None) + dealstage: Optional[str] = Field(default=None) + company_id: Optional[str] = Field(default=None) + project_code: Optional[str] = Field(default=None) + + # HubSpot custom properties + landlord_property_id: Optional[str] = Field(default=None) + uprn: Optional[str] = Field(default=None) + outcome: Optional[str] = Field(default=None) + outcome_notes: Optional[str] = Field(default=None) + + major_condition_issue_description: Optional[str] = Field(default=None) + major_condition_issue_photos: Optional[str] = Field(default=None) + major_condition_issue_evidence_s3_url: Optional[str] = Field(default=None) + + coordination_status: Optional[str] = Field(default=None) + coordination_comments: Optional[str] = Field(default=None) + design_status: Optional[str] = Field(default=None) + + listing_id: Optional[str] = Field(default=None) + pashub_link: Optional[str] = Field(default=None) + sharepoint_link: Optional[str] = Field(default=None) + dampmould_growth: Optional[str] = Field(default=None) + damp_mould_and_repairs_comments: Optional[str] = Field(default=None) + pre_sap: Optional[str] = Field(default=None) + coordinator: Optional[str] = Field(default=None) + mtp_completion_date: Optional[datetime] = Field(default=None) + mtp_re_model_completion_date: Optional[datetime] = Field(default=None) + ioe_v3_completion_date: Optional[datetime] = Field(default=None) + proposed_measures: Optional[str] = Field(default=None) + approved_package: Optional[str] = Field(default=None) + designer: Optional[str] = Field(default=None) + design_completion_date: Optional[datetime] = Field(default=None) + actual_measures_installed: Optional[str] = Field(default=None) + installer: Optional[str] = Field(default=None) + installer_handover: Optional[str] = Field(default=None) + lodgement_status: Optional[str] = Field(default=None) + measures_lodgement_date: Optional[datetime] = Field(default=None) + lodgement_date: Optional[datetime] = Field(default=None) + expected_commencement_date: Optional[datetime] = Field(default=None) + surveyor: Optional[str] = Field(default=None) + confirmed_survey_date: Optional[datetime] = Field(default=None) + confirmed_survey_time: Optional[str] = Field(default=None) + surveyed_date: Optional[datetime] = Field(default=None) + design_type: Optional[str] = Field(default=None) + + created_at: datetime = Field( + sa_column=Column( + DateTime(timezone=True), + server_default=text("(NOW() AT TIME ZONE 'utc')"), + nullable=False, + ) + ) + + updated_at: datetime = Field( + sa_column=Column( + DateTime(timezone=True), + server_default=text("(NOW() AT TIME ZONE 'utc')"), + onupdate=func.now(), + nullable=False, + ) + ) diff --git a/backend/app/db/models/organisation.py b/backend/app/db/models/organisation.py index 784cc4ad..8afc5d63 100644 --- a/backend/app/db/models/organisation.py +++ b/backend/app/db/models/organisation.py @@ -1,9 +1,7 @@ -from sqlmodel import SQLModel, Field, Column, text +import uuid +from sqlmodel import SQLModel, Field from datetime import datetime, timezone from typing import Optional -from sqlalchemy import DateTime -from sqlalchemy.sql import func -import uuid class Organisation(SQLModel, table=True): @@ -13,74 +11,3 @@ class Organisation(SQLModel, table=True): updated_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc)) hubspot_company_id: Optional[str] = None name: Optional[str] = None - - -class HubspotDealData(SQLModel, table=True): - __tablename__ = "hubspot_deal_data" - - id: uuid.UUID = Field(default_factory=uuid.uuid4, primary_key=True) - - # HubSpot Deal identifiers - deal_id: str = Field(index=True, nullable=False) - dealname: Optional[str] = Field(default=None) - dealstage: Optional[str] = Field(default=None) - company_id: Optional[str] = Field(default=None) - project_code: Optional[str] = Field(default=None) - - # HubSpot custom properties - landlord_property_id: Optional[str] = Field(default=None) - uprn: Optional[str] = Field(default=None) - outcome: Optional[str] = Field(default=None) - outcome_notes: Optional[str] = Field(default=None) - - major_condition_issue_description: Optional[str] = Field(default=None) - major_condition_issue_photos: Optional[str] = Field(default=None) - major_condition_issue_evidence_s3_url: Optional[str] = Field(default=None) - - coordination_status: Optional[str] = Field(default=None) - coordination_comments: Optional[str] = Field(default=None) - design_status: Optional[str] = Field(default=None) - - listing_id: Optional[str] = Field(default=None) - pashub_link: Optional[str] = Field(default=None) - sharepoint_link: Optional[str] = Field(default=None) - dampmould_growth: Optional[str] = Field(default=None) - damp_mould_and_repairs_comments: Optional[str] = Field(default=None) - pre_sap: Optional[str] = Field(default=None) - coordinator: Optional[str] = Field(default=None) - mtp_completion_date: Optional[datetime] = Field(default=None) - mtp_re_model_completion_date: Optional[datetime] = Field(default=None) - ioe_v3_completion_date: Optional[datetime] = Field(default=None) - proposed_measures: Optional[str] = Field(default=None) - approved_package: Optional[str] = Field(default=None) - designer: Optional[str] = Field(default=None) - design_completion_date: Optional[datetime] = Field(default=None) - actual_measures_installed: Optional[str] = Field(default=None) - installer: Optional[str] = Field(default=None) - installer_handover: Optional[str] = Field(default=None) - lodgement_status: Optional[str] = Field(default=None) - measures_lodgement_date: Optional[datetime] = Field(default=None) - lodgement_date: Optional[datetime] = Field(default=None) - expected_commencement_date: Optional[datetime] = Field(default=None) - surveyor: Optional[str] = Field(default=None) - confirmed_survey_date: Optional[datetime] = Field(default=None) - confirmed_survey_time: Optional[str] = Field(default=None) - surveyed_date: Optional[datetime] = Field(default=None) - design_type: Optional[str] = Field(default=None) - - created_at: datetime = Field( - sa_column=Column( - DateTime(timezone=True), - server_default=text("(NOW() AT TIME ZONE 'utc')"), - nullable=False, - ) - ) - - updated_at: datetime = Field( - sa_column=Column( - DateTime(timezone=True), - server_default=text("(NOW() AT TIME ZONE 'utc')"), - onupdate=func.now(), - nullable=False, - ) - ) diff --git a/etl/hubspot/hubspotDataTodB.py b/etl/hubspot/hubspotDataTodB.py index 3c017f0e..5ebc8c73 100644 --- a/etl/hubspot/hubspotDataTodB.py +++ b/etl/hubspot/hubspotDataTodB.py @@ -1,13 +1,15 @@ -from backend.app.db.connection import db_read_session -from backend.app.db.models.organisation import Organisation, HubspotDealData +import hashlib +import os from sqlmodel import select from datetime import datetime, timezone from typing import Dict, Optional + +from backend.app.db.models.hubspot_deal_data import HubspotDealData from etl.hubspot.company_data import CompanyData from etl.hubspot.hubspotClient import HubspotClient from etl.hubspot.s3_uploader import S3Uploader -import hashlib -import os +from backend.app.db.connection import db_read_session +from backend.app.db.models.organisation import Organisation class HubspotDataToDb: diff --git a/etl/hubspot/hubspot_deal_differ.py b/etl/hubspot/hubspot_deal_differ.py index 1dd4ed51..dd992243 100644 --- a/etl/hubspot/hubspot_deal_differ.py +++ b/etl/hubspot/hubspot_deal_differ.py @@ -1,6 +1,6 @@ from typing import Dict, List, Optional -from backend.app.db.models.organisation import HubspotDealData +from backend.app.db.models.hubspot_deal_data import HubspotDealData class HubspotDealDiffer: diff --git a/etl/hubspot/scripts/scraper/main.py b/etl/hubspot/scripts/scraper/main.py index 768a86eb..826d7e05 100644 --- a/etl/hubspot/scripts/scraper/main.py +++ b/etl/hubspot/scripts/scraper/main.py @@ -1,14 +1,13 @@ -from backend.app.db.models.organisation import HubspotDealData -from etl.hubspot.hubspotClient import HubspotClient - -from etl.hubspot.hubspotDataTodB import CompanyData, HubspotDataToDb -from backend.utils.subtasks import task_handler from typing import Any, Dict, Optional +from etl.hubspot.hubspotClient import HubspotClient +from etl.hubspot.hubspotDataTodB import CompanyData, HubspotDataToDb from etl.hubspot.hubspot_deal_differ import HubspotDealDiffer from etl.hubspot.hubspot_trigger_orchestrator_trigger_request import ( HubspotTriggerOrchestratorTriggerRequest, ) +from backend.utils.subtasks import task_handler +from backend.app.db.models.hubspot_deal_data import HubspotDealData @task_handler() diff --git a/etl/hubspot/tests/test_hubspot_deal_differ.py b/etl/hubspot/tests/test_hubspot_deal_differ.py index 876fcab9..74d3f057 100644 --- a/etl/hubspot/tests/test_hubspot_deal_differ.py +++ b/etl/hubspot/tests/test_hubspot_deal_differ.py @@ -4,7 +4,7 @@ import uuid import pytest -from backend.app.db.models.organisation import HubspotDealData +from backend.app.db.models.hubspot_deal_data import HubspotDealData from etl.hubspot.hubspot_deal_differ import HubspotDealDiffer From 36aaabb3cfa05d776484f2af8fa53973936dc5b5 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 9 Apr 2026 11:19:33 +0000 Subject: [PATCH 20/32] =?UTF-8?q?diff=20checker=20for=20db=20load=20trigge?= =?UTF-8?q?r=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/app/db/models/hubspot_deal_data.py | 10 +- etl/hubspot/hubspotDataTodB.py | 247 ++------------------- etl/hubspot/hubspot_deal_differ.py | 90 +++++++- etl/hubspot/scripts/scraper/main.py | 18 -- etl/hubspot/utils.py | 11 + 5 files changed, 127 insertions(+), 249 deletions(-) create mode 100644 etl/hubspot/utils.py diff --git a/backend/app/db/models/hubspot_deal_data.py b/backend/app/db/models/hubspot_deal_data.py index d5a51ace..1d7607e0 100644 --- a/backend/app/db/models/hubspot_deal_data.py +++ b/backend/app/db/models/hubspot_deal_data.py @@ -59,19 +59,21 @@ class HubspotDealData(SQLModel, table=True): surveyed_date: Optional[datetime] = Field(default=None) design_type: Optional[str] = Field(default=None) - created_at: datetime = Field( + created_at: Optional[datetime] = Field( sa_column=Column( DateTime(timezone=True), server_default=text("(NOW() AT TIME ZONE 'utc')"), nullable=False, - ) + ), + default=None, # Nullable in db but optional here as value is set on db save for new record ) - updated_at: datetime = Field( + updated_at: Optional[datetime] = Field( sa_column=Column( DateTime(timezone=True), server_default=text("(NOW() AT TIME ZONE 'utc')"), onupdate=func.now(), nullable=False, - ) + ), + default=None, # Nullable in db but optional here as value is set on db save for new record ) diff --git a/etl/hubspot/hubspotDataTodB.py b/etl/hubspot/hubspotDataTodB.py index 5ebc8c73..210c9593 100644 --- a/etl/hubspot/hubspotDataTodB.py +++ b/etl/hubspot/hubspotDataTodB.py @@ -10,6 +10,7 @@ from etl.hubspot.hubspotClient import HubspotClient from etl.hubspot.s3_uploader import S3Uploader from backend.app.db.connection import db_read_session from backend.app.db.models.organisation import Organisation +from etl.hubspot.utils import parse_hs_date class HubspotDataToDb: @@ -60,11 +61,7 @@ class HubspotDataToDb: session.commit() return record - def new_record_to_hubspot_data(self, deal_data, company, listing, hubspot_client): - print("⚠️ Deprecated — use the new interface instead.") - return self.upsert_deal(deal_data, company, listing, hubspot_client) - - def find_all_deals_with_company_id(self, company_id): + def find_all_deals_with_company_id(self, company_id: str): """Returns a list of deals for a given company_id.""" with db_read_session() as session: return ( @@ -137,7 +134,7 @@ class HubspotDataToDb: return False else: - print(f"⚠️ Photo URL missing for deal_id {deal_in_db.deal_id}") + print(f"⚠️ Photo URL missing for deal_id {deal_in_db.deal_id}") else: print(f"✅ No update or upload required for deal_id {deal_in_db.deal_id}.") @@ -188,202 +185,6 @@ class HubspotDataToDb: session.refresh(new_record) return new_record - def _deprecated_diff( - self, - deal_in_db: HubspotDealData, - hs_deal: Dict[str, str], - hs_company_id: Optional[str], - hs_listing: Optional[Dict[str, str]], - ): - def soft_assert(condition: bool, message: str = "Assertion Failed"): - if not condition: - print(f"⚠️ Soft Assert Failed: {message}") - return False - return True - - print(f"🔍 Checking if deal needs updating (deal_id={deal_in_db.deal_id})") - - # Soft compare key fields - checks = [ - soft_assert( - deal_in_db.deal_id == hs_deal.get("hs_object_id"), "deal_id mismatch" - ), - soft_assert(deal_in_db.company_id == hs_company_id, "company_id mismatch"), - soft_assert( - deal_in_db.listing_id == hs_listing.get("listing_id"), - "listing_id mismatch", - ), - soft_assert( - deal_in_db.landlord_property_id == hs_listing.get("owner_property_id"), - "landlord_property_id mismatch", - ), - soft_assert( - deal_in_db.outcome == hs_deal.get("outcome"), "outcome mismatch" - ), - soft_assert( - deal_in_db.dealstage == hs_deal.get("dealstage"), "dealstage mismatch" - ), - soft_assert( - deal_in_db.dealname == hs_deal.get("dealname"), "dealname mismatch" - ), - soft_assert( - deal_in_db.project_code == hs_deal.get("project_code"), - "project_code mismatch", - ), - soft_assert( - deal_in_db.uprn == hs_listing.get("national_uprn"), "uprn mismatch" - ), - soft_assert( - deal_in_db.outcome_notes == hs_deal.get("outcome_notes"), - "outcome_notes mismatch", - ), - soft_assert( - deal_in_db.major_condition_issue_description - == hs_deal.get("major_condition_issue_description"), - "major condition description mismatch", - ), - soft_assert( - deal_in_db.major_condition_issue_photos - == hs_deal.get("major_condition_issue_photos"), - "major condition issue photos mismatch", - ), - soft_assert( - deal_in_db.coordination_status - == hs_deal.get("coordination_status__stage_1_"), - "coordination stage 1 status mismatch", - ), - soft_assert( - deal_in_db.coordination_comments - == hs_deal.get("coordination_comments"), - "coordination_comments mismatch", - ), - soft_assert( - deal_in_db.design_status == hs_deal.get("retrofit_design_status"), - "retrofit design mismatch", - ), - soft_assert( - deal_in_db.pashub_link == hs_deal.get("pashub_link"), - "pashub_link mismatch", - ), - soft_assert( - deal_in_db.sharepoint_link == hs_deal.get("sharepoint_link"), - "sharepoint_link mismatch", - ), - soft_assert( - deal_in_db.dampmould_growth == hs_deal.get("dampmould_growth"), - "dampmould_growth mismatch", - ), - soft_assert( - deal_in_db.damp_mould_and_repairs_comments - == hs_deal.get("damp_mould_and_repairs_comments"), - "damp_mould_and_repairs_comments mismatch", - ), - soft_assert( - deal_in_db.pre_sap == hs_deal.get("pre_sap"), - "pre_sap mismatch", - ), - soft_assert( - deal_in_db.coordinator == hs_deal.get("coordinator"), - "coordinator mismatch", - ), - soft_assert( - deal_in_db.mtp_completion_date - == self._parse_hs_date(hs_deal.get("mtp_completion_date")), - "mtp_completion_date mismatch", - ), - soft_assert( - deal_in_db.mtp_re_model_completion_date - == self._parse_hs_date(hs_deal.get("mtp_re_model_completion_date")), - "mtp_re_model_completion_date mismatch", - ), - soft_assert( - deal_in_db.ioe_v3_completion_date - == self._parse_hs_date(hs_deal.get("ioe_v3_completion_date")), - "ioe_v3_completion_date mismatch", - ), - soft_assert( - deal_in_db.proposed_measures == hs_deal.get("proposed_measures"), - "proposed_measures mismatch", - ), - soft_assert( - deal_in_db.approved_package == hs_deal.get("approved_package"), - "approved_package mismatch", - ), - soft_assert( - deal_in_db.designer == hs_deal.get("designer"), - "designer mismatch", - ), - soft_assert( - deal_in_db.design_completion_date - == self._parse_hs_date(hs_deal.get("design_completion_date")), - "design_completion_date mismatch", - ), - soft_assert( - deal_in_db.actual_measures_installed - == hs_deal.get("actual_measures_installed"), - "actual_measures_installed mismatch", - ), - soft_assert( - deal_in_db.installer == hs_deal.get("installer"), - "installer mismatch", - ), - soft_assert( - deal_in_db.installer_handover == hs_deal.get("installer_handover"), - "installer_handover mismatch", - ), - soft_assert( - deal_in_db.lodgement_status == hs_deal.get("lodgement_status"), - "lodgement_status mismatch", - ), - soft_assert( - deal_in_db.measures_lodgement_date - == self._parse_hs_date(hs_deal.get("measures_lodgement_date")), - "measures_lodgement_date mismatch", - ), - soft_assert( - deal_in_db.lodgement_date - == self._parse_hs_date(hs_deal.get("lodgement_date")), - "lodgement_date mismatch", - ), - soft_assert( - deal_in_db.expected_commencement_date - == self._parse_hs_date(hs_deal.get("expected_commencement_date")), - "expected_commencement_date mismatch", - ), - soft_assert( - deal_in_db.surveyor == hs_deal.get("surveyor"), - "surveyor mismatch", - ), - soft_assert( - deal_in_db.confirmed_survey_date - == self._parse_hs_date(hs_deal.get("confirmed_survey_date")), - "confirmed_survey_date mismatch", - ), - soft_assert( - deal_in_db.confirmed_survey_time - == hs_deal.get("confirmed_survey_time"), - "confirmed_survey_time mismatch", - ), - soft_assert( - deal_in_db.surveyed_date - == self._parse_hs_date(hs_deal.get("surveyed_date")), - "surveyed_date mismatch", - ), - soft_assert( - deal_in_db.design_type == hs_deal.get("design_type"), - "design_type mismatch", - ), - ] - - # If discrepancies found, update from HubSpot - if not all(checks): - print( - f"❗ Discrepancies found for deal_id {deal_in_db.deal_id} — syncing with HubSpot." - ) - return False - - return True - def _update_existing_deal( self, existing: HubspotDealData, @@ -420,38 +221,36 @@ class HubspotDataToDb: ), "pre_sap": deal_data.get("pre_sap"), "coordinator": deal_data.get("coordinator"), - "mtp_completion_date": self._parse_hs_date( - deal_data.get("mtp_completion_date") - ), - "mtp_re_model_completion_date": self._parse_hs_date( + "mtp_completion_date": parse_hs_date(deal_data.get("mtp_completion_date")), + "mtp_re_model_completion_date": parse_hs_date( deal_data.get("mtp_re_model_completion_date") ), - "ioe_v3_completion_date": self._parse_hs_date( + "ioe_v3_completion_date": parse_hs_date( deal_data.get("ioe_v3_completion_date") ), "proposed_measures": deal_data.get("proposed_measures"), "approved_package": deal_data.get("approved_package"), "designer": deal_data.get("designer"), - "design_completion_date": self._parse_hs_date( + "design_completion_date": parse_hs_date( deal_data.get("design_completion_date") ), "actual_measures_installed": deal_data.get("actual_measures_installed"), "installer": deal_data.get("installer"), "installer_handover": deal_data.get("installer_handover"), "lodgement_status": deal_data.get("lodgement_status"), - "measures_lodgement_date": self._parse_hs_date( + "measures_lodgement_date": parse_hs_date( deal_data.get("measures_lodgement_date") ), - "lodgement_date": self._parse_hs_date(deal_data.get("lodgement_date")), - "expected_commencement_date": self._parse_hs_date( + "lodgement_date": parse_hs_date(deal_data.get("lodgement_date")), + "expected_commencement_date": parse_hs_date( deal_data.get("expected_commencement_date") ), "surveyor": deal_data.get("surveyor"), - "confirmed_survey_date": self._parse_hs_date( + "confirmed_survey_date": parse_hs_date( deal_data.get("confirmed_survey_date") ), "confirmed_survey_time": deal_data.get("confirmed_survey_time"), - "surveyed_date": self._parse_hs_date(deal_data.get("surveyed_date")), + "surveyed_date": parse_hs_date(deal_data.get("surveyed_date")), "design_type": deal_data.get("design_type"), }.items(): setattr(existing, attr, value or getattr(existing, attr)) @@ -491,38 +290,34 @@ class HubspotDataToDb: ), pre_sap=deal_data.get("pre_sap"), coordinator=deal_data.get("coordinator"), - mtp_completion_date=self._parse_hs_date( - deal_data.get("mtp_completion_date") - ), - mtp_re_model_completion_date=self._parse_hs_date( + mtp_completion_date=parse_hs_date(deal_data.get("mtp_completion_date")), + mtp_re_model_completion_date=parse_hs_date( deal_data.get("mtp_re_model_completion_date") ), - ioe_v3_completion_date=self._parse_hs_date( + ioe_v3_completion_date=parse_hs_date( deal_data.get("ioe_v3_completion_date") ), proposed_measures=deal_data.get("proposed_measures"), approved_package=deal_data.get("approved_package"), designer=deal_data.get("designer"), - design_completion_date=self._parse_hs_date( + design_completion_date=parse_hs_date( deal_data.get("design_completion_date") ), actual_measures_installed=deal_data.get("actual_measures_installed"), installer=deal_data.get("installer"), installer_handover=deal_data.get("installer_handover"), lodgement_status=deal_data.get("lodgement_status"), - measures_lodgement_date=self._parse_hs_date( + measures_lodgement_date=parse_hs_date( deal_data.get("measures_lodgement_date") ), - lodgement_date=self._parse_hs_date(deal_data.get("lodgement_date")), - expected_commencement_date=self._parse_hs_date( + lodgement_date=parse_hs_date(deal_data.get("lodgement_date")), + expected_commencement_date=parse_hs_date( deal_data.get("expected_commencement_date") ), surveyor=deal_data.get("surveyor"), - confirmed_survey_date=self._parse_hs_date( - deal_data.get("confirmed_survey_date") - ), + confirmed_survey_date=parse_hs_date(deal_data.get("confirmed_survey_date")), confirmed_survey_time=deal_data.get("confirmed_survey_time"), - surveyed_date=self._parse_hs_date(deal_data.get("surveyed_date")), + surveyed_date=parse_hs_date(deal_data.get("surveyed_date")), design_type=deal_data.get("design_type"), ) diff --git a/etl/hubspot/hubspot_deal_differ.py b/etl/hubspot/hubspot_deal_differ.py index dd992243..42def3b2 100644 --- a/etl/hubspot/hubspot_deal_differ.py +++ b/etl/hubspot/hubspot_deal_differ.py @@ -1,6 +1,7 @@ from typing import Dict, List, Optional from backend.app.db.models.hubspot_deal_data import HubspotDealData +from etl.hubspot.utils import parse_hs_date class HubspotDealDiffer: @@ -18,7 +19,94 @@ class HubspotDealDiffer: new_listing: Optional[Dict[str, str]], old_deal: HubspotDealData, ) -> bool: - raise NotImplementedError + """ + Returns True if ANY difference exists between HubSpot data and DB. + Returns False if everything matches (i.e. no update needed). + """ + + # --- Deal ID --- + if str(old_deal.deal_id) != str(new_deal.get("hs_object_id")): + return True + + # --- Company --- + if new_company is not None: + if old_deal.company_id != new_company: + return True + + # --- Listing --- + hs_listing = new_listing or {} + + if old_deal.listing_id != hs_listing.get("listing_id"): + return True + + if old_deal.landlord_property_id != hs_listing.get("owner_property_id"): + return True + + if old_deal.uprn != hs_listing.get("national_uprn"): + return True + + # --- Field mappings --- + FIELD_MAP = { + "outcome": "outcome", + "dealstage": "dealstage", + "dealname": "dealname", + "project_code": "project_code", + "outcome_notes": "outcome_notes", + "major_condition_issue_description": "major_condition_issue_description", + "major_condition_issue_photos": "major_condition_issue_photos", + "coordination_status__stage_1_": "coordination_status", + "coordination_comments": "coordination_comments", + "retrofit_design_status": "design_status", + "pashub_link": "pashub_link", + "sharepoint_link": "sharepoint_link", + "dampmould_growth": "dampmould_growth", + "damp_mould_and_repairs_comments": "damp_mould_and_repairs_comments", + "pre_sap": "pre_sap", + "coordinator": "coordinator", + "proposed_measures": "proposed_measures", + "approved_package": "approved_package", + "designer": "designer", + "actual_measures_installed": "actual_measures_installed", + "installer": "installer", + "installer_handover": "installer_handover", + "lodgement_status": "lodgement_status", + "design_type": "design_type", + "surveyor": "surveyor", + } + + for hs_field, db_field in FIELD_MAP.items(): + old_value = getattr(old_deal, db_field) + new_value = new_deal.get(hs_field) + + if old_value != new_value: + return True + + # --- Date fields --- + date_fields = [ + ("mtp_completion_date", "mtp_completion_date"), + ("mtp_re_model_completion_date", "mtp_re_model_completion_date"), + ("ioe_v3_completion_date", "ioe_v3_completion_date"), + ("design_completion_date", "design_completion_date"), + ("measures_lodgement_date", "measures_lodgement_date"), + ("lodgement_date", "lodgement_date"), + ("expected_commencement_date", "expected_commencement_date"), + ("confirmed_survey_date", "confirmed_survey_date"), + ("surveyed_date", "surveyed_date"), + ] + + for hs_field, db_field in date_fields: + old_value = getattr(old_deal, db_field) + new_value = parse_hs_date(new_deal.get(hs_field)) + + if old_value != new_value: + return True + + # --- Time field --- + if old_deal.confirmed_survey_time != new_deal.get("confirmed_survey_time"): + return True + + # No differences found + return False @staticmethod def check_for_pashub_trigger( diff --git a/etl/hubspot/scripts/scraper/main.py b/etl/hubspot/scripts/scraper/main.py index 826d7e05..5d5b2b26 100644 --- a/etl/hubspot/scripts/scraper/main.py +++ b/etl/hubspot/scripts/scraper/main.py @@ -63,21 +63,3 @@ def handler(body: dict[str, Any], context: Any) -> None: ): # TODO: trigger pashub file fetcher return - - # if db_deal: - # db_client.update_deal_with_checks(db_deal, hubspot_client) - # else: - # hubspot_deal: Dict[str, str] - # company: Optional[str] - # listing: Optional[dict[str, str]] - - # hubspot_deal, company, listing = ( - # hubspot_client.get_deal_and_company_and_listing(hubspot_deal_id) - # ) - - # if company: - # company_data: CompanyData = hubspot_client.get_company_information(company) - # db_client: HubspotDataToDb = HubspotDataToDb() - # db_client.upsert_company(company_data) - - # db_client.upsert_deal(hubspot_deal, company, listing, hubspot_client) diff --git a/etl/hubspot/utils.py b/etl/hubspot/utils.py new file mode 100644 index 00000000..9fbeae62 --- /dev/null +++ b/etl/hubspot/utils.py @@ -0,0 +1,11 @@ +from datetime import datetime +from typing import Optional + + +def parse_hs_date(value: Optional[str]) -> Optional[datetime]: + if not value: + return None + try: + return datetime.fromisoformat(value.replace("Z", "+00:00")) + except ValueError: + return None From f572dfd2b316c17636061c72d323803394f0343c Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 9 Apr 2026 12:53:48 +0000 Subject: [PATCH 21/32] trigger pashub to ara lambda if necessary --- etl/hubspot/scripts/scraper/main.py | 31 +++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/etl/hubspot/scripts/scraper/main.py b/etl/hubspot/scripts/scraper/main.py index 5d5b2b26..18e425a4 100644 --- a/etl/hubspot/scripts/scraper/main.py +++ b/etl/hubspot/scripts/scraper/main.py @@ -1,3 +1,5 @@ +import json +import boto3 from typing import Any, Dict, Optional from etl.hubspot.hubspotClient import HubspotClient @@ -8,6 +10,9 @@ from etl.hubspot.hubspot_trigger_orchestrator_trigger_request import ( ) from backend.utils.subtasks import task_handler from backend.app.db.models.hubspot_deal_data import HubspotDealData +from utils.logger import setup_logger + +logger = setup_logger() @task_handler() @@ -15,6 +20,9 @@ def handler(body: dict[str, Any], context: Any) -> None: db_client = HubspotDataToDb() hubspot_client = HubspotClient() + sqs_client = boto3.client("sqs") + PASHUB_TRIGGER_QUEUE_URL = "pashub_to_ara-queue-dev" # TODO: get from env var + payload = HubspotTriggerOrchestratorTriggerRequest.model_validate(body) hubspot_deal_id: str = payload.hubspot_deal_id @@ -40,6 +48,9 @@ def handler(body: dict[str, Any], context: Any) -> None: db_client.upsert_deal(hubspot_deal, company, listing, hubspot_client) return + deal_unchanged = True + + # Deal already in db, check whether anything has changed if HubspotDealDiffer.check_for_db_update_trigger( new_deal=hubspot_deal, new_company=company, @@ -53,6 +64,9 @@ def handler(body: dict[str, Any], context: Any) -> None: hs_company_id=company, hs_listing=listing, ) + deal_unchanged = False + + if deal_unchanged: return # ============================== @@ -62,4 +76,21 @@ def handler(body: dict[str, Any], context: Any) -> None: new_deal=hubspot_deal, old_deal=db_deal ): # TODO: trigger pashub file fetcher + message_body: Dict[str, Optional[str]] = { + "pashub_link": hubspot_deal["pashub_link"], + "address": None, # can we get this? + "sharepoint_link": hubspot_deal["sharepoint_link"], + "uprn": hubspot_deal["national_uprn"], + "landlord_property_id": hubspot_deal["owner_property_id"], + "deal_stage": hubspot_deal["deal_stage"], + } + + response = sqs_client.send_message( + QueueUrl=PASHUB_TRIGGER_QUEUE_URL, MessageBody=json.dumps(message_body) + ) + + logger.info( + f"Sent message to Pashub To Ara queue. MessageId: {response['MessageId']}" + ) + return From c718e36c1180a0fd2413a6a5d6cf93562eebc462 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 9 Apr 2026 12:54:01 +0000 Subject: [PATCH 22/32] trigger pashub to ara lambda if necessary --- etl/hubspot/scripts/scraper/main.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/etl/hubspot/scripts/scraper/main.py b/etl/hubspot/scripts/scraper/main.py index 18e425a4..0bc285a7 100644 --- a/etl/hubspot/scripts/scraper/main.py +++ b/etl/hubspot/scripts/scraper/main.py @@ -92,5 +92,3 @@ def handler(body: dict[str, Any], context: Any) -> None: logger.info( f"Sent message to Pashub To Ara queue. MessageId: {response['MessageId']}" ) - - return From 2b93e06629e146c8d536d019414946cf958bf405 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 9 Apr 2026 13:07:13 +0000 Subject: [PATCH 23/32] add todo --- etl/hubspot/hubspotClient.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/etl/hubspot/hubspotClient.py b/etl/hubspot/hubspotClient.py index 8053b41f..6bdf71ed 100644 --- a/etl/hubspot/hubspotClient.py +++ b/etl/hubspot/hubspotClient.py @@ -230,7 +230,9 @@ class HubspotClient: self.logger.info(f"Listing info for deal {deal_id}: {listing_info}") return listing_info - def from_deal_id_get_info(self, deal_id: str) -> dict[str, str]: + def from_deal_id_get_info( + self, deal_id: str + ) -> dict[str, str]: # TODO: add dataclass for this deals_api: DealsBasicApi = self.client.crm.deals.basic_api # type: ignore[reportUnknownMemberType] deal: HubspotObject = self._call_with_retry( From ff0027dbc46f04c2383a2a8630f476c0c9b071f8 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 9 Apr 2026 13:09:48 +0000 Subject: [PATCH 24/32] remove todo --- etl/hubspot/scripts/scraper/main.py | 1 - 1 file changed, 1 deletion(-) diff --git a/etl/hubspot/scripts/scraper/main.py b/etl/hubspot/scripts/scraper/main.py index 0bc285a7..cec03da8 100644 --- a/etl/hubspot/scripts/scraper/main.py +++ b/etl/hubspot/scripts/scraper/main.py @@ -75,7 +75,6 @@ def handler(body: dict[str, Any], context: Any) -> None: if HubspotDealDiffer.check_for_pashub_trigger( new_deal=hubspot_deal, old_deal=db_deal ): - # TODO: trigger pashub file fetcher message_body: Dict[str, Optional[str]] = { "pashub_link": hubspot_deal["pashub_link"], "address": None, # can we get this? From 4425b28d4fd15e8ab23fc20abcb25b1728dcc085 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 9 Apr 2026 14:24:52 +0000 Subject: [PATCH 25/32] address review comments and add logging --- etl/hubspot/hubspotDataTodB.py | 24 +++---- etl/hubspot/scripts/scraper/main.py | 99 ++++++++++++++++------------- 2 files changed, 64 insertions(+), 59 deletions(-) diff --git a/etl/hubspot/hubspotDataTodB.py b/etl/hubspot/hubspotDataTodB.py index 210c9593..65fad572 100644 --- a/etl/hubspot/hubspotDataTodB.py +++ b/etl/hubspot/hubspotDataTodB.py @@ -78,22 +78,6 @@ class HubspotDataToDb: .one_or_none() ) - def _parse_hs_date(self, value: Optional[str]) -> Optional[datetime]: - if not value: - return None - try: - return datetime.fromisoformat(value.replace("Z", "+00:00")) - except ValueError: - return None - - def _sha256(self, file_path: str) -> str: - """Compute SHA-256 checksum of a file.""" - sha256 = hashlib.sha256() - with open(file_path, "rb") as f: - for chunk in iter(lambda: f.read(8192), b""): - sha256.update(chunk) - return sha256.hexdigest() - def update_deal_with_checks( self, deal_in_db: HubspotDealData, @@ -185,6 +169,14 @@ class HubspotDataToDb: session.refresh(new_record) return new_record + def _sha256(self, file_path: str) -> str: + """Compute SHA-256 checksum of a file.""" + sha256 = hashlib.sha256() + with open(file_path, "rb") as f: + for chunk in iter(lambda: f.read(8192), b""): + sha256.update(chunk) + return sha256.hexdigest() + def _update_existing_deal( self, existing: HubspotDealData, diff --git a/etl/hubspot/scripts/scraper/main.py b/etl/hubspot/scripts/scraper/main.py index cec03da8..8fa71bf7 100644 --- a/etl/hubspot/scripts/scraper/main.py +++ b/etl/hubspot/scripts/scraper/main.py @@ -38,56 +38,69 @@ def handler(body: dict[str, Any], context: Any) -> None: hubspot_deal_id ) + deal_changed = False if not db_deal: # New hubspot deal, no diffing to do + logger.info(f"New HubSpot deal of ID {hubspot_deal_id}. Loading to database...") if company: company_data: CompanyData = hubspot_client.get_company_information(company) db_client: HubspotDataToDb = HubspotDataToDb() db_client.upsert_company(company_data) db_client.upsert_deal(hubspot_deal, company, listing, hubspot_client) - return - - deal_unchanged = True - - # Deal already in db, check whether anything has changed - if HubspotDealDiffer.check_for_db_update_trigger( - new_deal=hubspot_deal, - new_company=company, - new_listing=listing, - old_deal=db_deal, - ): - db_client.update_deal_with_checks( - deal_in_db=db_deal, - hubspot_client=hubspot_client, - hs_deal=hubspot_deal, - hs_company_id=company, - hs_listing=listing, - ) - deal_unchanged = False - - if deal_unchanged: - return - - # ============================== - # Orchestration of other lambdas - # ============================== - if HubspotDealDiffer.check_for_pashub_trigger( - new_deal=hubspot_deal, old_deal=db_deal - ): - message_body: Dict[str, Optional[str]] = { - "pashub_link": hubspot_deal["pashub_link"], - "address": None, # can we get this? - "sharepoint_link": hubspot_deal["sharepoint_link"], - "uprn": hubspot_deal["national_uprn"], - "landlord_property_id": hubspot_deal["owner_property_id"], - "deal_stage": hubspot_deal["deal_stage"], - } - - response = sqs_client.send_message( - QueueUrl=PASHUB_TRIGGER_QUEUE_URL, MessageBody=json.dumps(message_body) - ) - + else: + # Deal already in db, check whether anything has changed logger.info( - f"Sent message to Pashub To Ara queue. MessageId: {response['MessageId']}" + f"HubSpot deal {hubspot_deal_id} already in database. Checking for changes..." ) + if HubspotDealDiffer.check_for_db_update_trigger( + new_deal=hubspot_deal, + new_company=company, + new_listing=listing, + old_deal=db_deal, + ): + logger.info( + f"Deal {hubspot_deal_id} has been changed, updating database..." + ) + db_client.update_deal_with_checks( + deal_in_db=db_deal, + hubspot_client=hubspot_client, + hs_deal=hubspot_deal, + hs_company_id=company, + hs_listing=listing, + ) + deal_changed = True + + if not deal_changed: + logger.info(f"No changes to deal {hubspot_deal_id}") + return + + # ============================== + # Orchestration of other lambdas + # ============================== + if HubspotDealDiffer.check_for_pashub_trigger( + new_deal=hubspot_deal, old_deal=db_deal + ): + logger.info( + f"Triggering Pas Hub file fetcher for HubSpot deal ID {hubspot_deal_id}" + ) + message_body: Dict[str, Optional[str]] = { + "pashub_link": hubspot_deal["pashub_link"], + "address": None, # potentially available from Listing, leave as None for now + "sharepoint_link": hubspot_deal["sharepoint_link"], + "uprn": hubspot_deal["national_uprn"], + "landlord_property_id": hubspot_deal["owner_property_id"], + "deal_stage": hubspot_deal["deal_stage"], + } + + response = sqs_client.send_message( + QueueUrl=PASHUB_TRIGGER_QUEUE_URL, MessageBody=json.dumps(message_body) + ) + + logger.info( + f"Sent message to Pashub To Ara queue. MessageId: {response['MessageId']}" + ) + else: + logger.info( + f"Not Triggering PasHub file fetcher for HubSpot deal ID {hubspot_deal_id}" + ) From bd891a7a85365b4fca64ad95ebc7c4fb0ed4b82a Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 9 Apr 2026 14:41:14 +0000 Subject: [PATCH 26/32] address JTK review comments --- backend/app/db/models/hubspot_deal_data.py | 8 ++++---- etl/hubspot/hubspotDataTodB.py | 2 +- etl/hubspot/hubspot_deal_differ.py | 6 ++---- etl/hubspot/scripts/onboarding/new_organisation.py | 2 +- etl/hubspot/scripts/scraper/main.py | 2 +- 5 files changed, 9 insertions(+), 11 deletions(-) diff --git a/backend/app/db/models/hubspot_deal_data.py b/backend/app/db/models/hubspot_deal_data.py index 1d7607e0..758f688d 100644 --- a/backend/app/db/models/hubspot_deal_data.py +++ b/backend/app/db/models/hubspot_deal_data.py @@ -65,8 +65,8 @@ class HubspotDealData(SQLModel, table=True): server_default=text("(NOW() AT TIME ZONE 'utc')"), nullable=False, ), - default=None, # Nullable in db but optional here as value is set on db save for new record - ) + default=func.now(), + ) # Nullable in db but optional here as value is set on db save for new record updated_at: Optional[datetime] = Field( sa_column=Column( @@ -75,5 +75,5 @@ class HubspotDealData(SQLModel, table=True): onupdate=func.now(), nullable=False, ), - default=None, # Nullable in db but optional here as value is set on db save for new record - ) + default=func.now(), + ) # Nullable in db but optional here as value is set on db save for new record diff --git a/etl/hubspot/hubspotDataTodB.py b/etl/hubspot/hubspotDataTodB.py index 65fad572..a50c99da 100644 --- a/etl/hubspot/hubspotDataTodB.py +++ b/etl/hubspot/hubspotDataTodB.py @@ -31,7 +31,7 @@ class HubspotDataToDb: records = self.read_org_table(limit) return [org.name for org in records if org.name] - def upsert_company(self, company_data: CompanyData) -> Organisation: + def upsert_organisation(self, company_data: CompanyData) -> Organisation: """Upserts a company record. Updates if hubspot_company_id exists, otherwise creates new.""" with db_read_session() as session: hubspot_id = company_data.get("hs_object_id") diff --git a/etl/hubspot/hubspot_deal_differ.py b/etl/hubspot/hubspot_deal_differ.py index 42def3b2..4db303ab 100644 --- a/etl/hubspot/hubspot_deal_differ.py +++ b/etl/hubspot/hubspot_deal_differ.py @@ -8,6 +8,7 @@ class HubspotDealDiffer: COORDINATION_COMPLETE: List[str] = [ "v1 ioe/mtp complete", "v2 ioe/mtp complete", + "v3 ioe/mtp complete", ] RETROFIT_DESIGN_COMPLETE = "uploaded" LODGEMENT_COMPLETE: List[str] = ["lodgement complete", "measures lodged"] @@ -72,6 +73,7 @@ class HubspotDealDiffer: "lodgement_status": "lodgement_status", "design_type": "design_type", "surveyor": "surveyor", + "confirmed_survey_time": "confirmed_survey_time", } for hs_field, db_field in FIELD_MAP.items(): @@ -101,10 +103,6 @@ class HubspotDealDiffer: if old_value != new_value: return True - # --- Time field --- - if old_deal.confirmed_survey_time != new_deal.get("confirmed_survey_time"): - return True - # No differences found return False diff --git a/etl/hubspot/scripts/onboarding/new_organisation.py b/etl/hubspot/scripts/onboarding/new_organisation.py index f8c6ba7a..0785949a 100644 --- a/etl/hubspot/scripts/onboarding/new_organisation.py +++ b/etl/hubspot/scripts/onboarding/new_organisation.py @@ -22,7 +22,7 @@ companies_to_add_or_ensure_it_exists = [ for company in companies_to_add_or_ensure_it_exists: company_info: CompanyData = hubspot.get_company_information(company.value) - dbRead.upsert_company(company_info) + dbRead.upsert_organisation(company_info) dbRead = HubspotDataToDb() diff --git a/etl/hubspot/scripts/scraper/main.py b/etl/hubspot/scripts/scraper/main.py index 8fa71bf7..31945705 100644 --- a/etl/hubspot/scripts/scraper/main.py +++ b/etl/hubspot/scripts/scraper/main.py @@ -45,7 +45,7 @@ def handler(body: dict[str, Any], context: Any) -> None: if company: company_data: CompanyData = hubspot_client.get_company_information(company) db_client: HubspotDataToDb = HubspotDataToDb() - db_client.upsert_company(company_data) + db_client.upsert_organisation(company_data) db_client.upsert_deal(hubspot_deal, company, listing, hubspot_client) else: From 62fe46adc43d5c9ae98ad3b9de798bfaa1a82374 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 9 Apr 2026 15:00:36 +0000 Subject: [PATCH 27/32] get queue name from settings --- .github/workflows/deploy_terraform.yml | 2 +- backend/app/config.py | 1 + etl/hubspot/scripts/scraper/main.py | 3 ++- .../terraform/lambda/hubspot_deal_etl/main.tf | 10 ++++++++++ .../terraform/lambda/pashub_to_ara/outputs.tf | 4 ++++ 5 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 infrastructure/terraform/lambda/pashub_to_ara/outputs.tf diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index fccc6da4..22f16fee 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -505,7 +505,7 @@ jobs: # Deploy Hubspot ETL Lambda # ============================================================ hubspot_etl_lambda: - needs: [hubspot_etl_image, determine_stage] + needs: [hubspot_etl_image, determine_stage, pashub_to_ara_lambda] uses: ./.github/workflows/_deploy_lambda.yml with: lambda_name: hubspot-etl-to-ara diff --git a/backend/app/config.py b/backend/app/config.py index 80a2d46a..9532ddd6 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -38,6 +38,7 @@ class Settings(BaseSettings): PLAN_TRIGGER_BUCKET: str = "changeme" ENGINE_SQS_URL: str = "changeme" CATEGORISATION_SQS_URL: str = "changeme" + PASHUB_TO_ARA_SQS_URL: str = "changeme" # Third parties EPC_AUTH_TOKEN: str = "changeme" diff --git a/etl/hubspot/scripts/scraper/main.py b/etl/hubspot/scripts/scraper/main.py index 31945705..ea79bc18 100644 --- a/etl/hubspot/scripts/scraper/main.py +++ b/etl/hubspot/scripts/scraper/main.py @@ -2,6 +2,7 @@ import json import boto3 from typing import Any, Dict, Optional +from backend.app.config import get_settings from etl.hubspot.hubspotClient import HubspotClient from etl.hubspot.hubspotDataTodB import CompanyData, HubspotDataToDb from etl.hubspot.hubspot_deal_differ import HubspotDealDiffer @@ -21,7 +22,7 @@ def handler(body: dict[str, Any], context: Any) -> None: hubspot_client = HubspotClient() sqs_client = boto3.client("sqs") - PASHUB_TRIGGER_QUEUE_URL = "pashub_to_ara-queue-dev" # TODO: get from env var + PASHUB_TRIGGER_QUEUE_URL = get_settings().PASHUB_TO_ARA_SQS_URL payload = HubspotTriggerOrchestratorTriggerRequest.model_validate(body) hubspot_deal_id: str = payload.hubspot_deal_id diff --git a/infrastructure/terraform/lambda/hubspot_deal_etl/main.tf b/infrastructure/terraform/lambda/hubspot_deal_etl/main.tf index 6ce7a386..518e1e05 100644 --- a/infrastructure/terraform/lambda/hubspot_deal_etl/main.tf +++ b/infrastructure/terraform/lambda/hubspot_deal_etl/main.tf @@ -7,6 +7,14 @@ data "terraform_remote_state" "shared" { } } +data "terraform_remote_state" "pashub_to_ara" { + backend = "s3" + config = { + bucket = "pashub-to-ara-terraform-state" + key = "ev:/${var.stage}/terraform.tfstate" + region = "eu-west-2" + } +} data "aws_secretsmanager_secret_version" "db_credentials" { secret_id = "${var.stage}/assessment_model/db_credentials" @@ -39,6 +47,8 @@ module "hubspot_deal_etl" { DB_NAME = var.db_name DB_PORT = var.db_port HUBSPOT_API_KEY = var.hubspot_api_key + + PASHUB_TO_ARA_SQS_URL = data.terraform_remote_state.pashub_to_ara.pashhub_to_ara_queue_url } } diff --git a/infrastructure/terraform/lambda/pashub_to_ara/outputs.tf b/infrastructure/terraform/lambda/pashub_to_ara/outputs.tf new file mode 100644 index 00000000..738aa4fc --- /dev/null +++ b/infrastructure/terraform/lambda/pashub_to_ara/outputs.tf @@ -0,0 +1,4 @@ +output "pashhub_to_ara_queue_url" { + value = module.lambda.queue_url + description = "URL of the PasHub to Ara SQS queue" +} From f1f3b84cbdadcecd4010658f0b119295a805e4ee Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 9 Apr 2026 15:49:13 +0000 Subject: [PATCH 28/32] simplify photo upload logic --- etl/hubspot/hubspotDataTodB.py | 89 +++++------------------------ etl/hubspot/scripts/scraper/main.py | 9 ++- 2 files changed, 19 insertions(+), 79 deletions(-) diff --git a/etl/hubspot/hubspotDataTodB.py b/etl/hubspot/hubspotDataTodB.py index a50c99da..6763f19c 100644 --- a/etl/hubspot/hubspotDataTodB.py +++ b/etl/hubspot/hubspotDataTodB.py @@ -78,53 +78,6 @@ class HubspotDataToDb: .one_or_none() ) - def update_deal_with_checks( - self, - deal_in_db: HubspotDealData, - hubspot_client: HubspotClient, - hs_deal: Dict[str, str], - hs_company_id: Optional[str], - hs_listing: Optional[Dict[str, str]], - ) -> bool: - """ - Updates deal in database and handles major_condition_issue_photos file upload to S3 with integrity check. - """ - self.upsert_deal(hs_deal, hs_company_id, hs_listing, hubspot_client) - - # Handle photo upload if it exists but S3 URL is missing - if self._needs_photo_upload(deal_in_db): - print( - f"🖼️ Found photo for deal_id {deal_in_db.deal_id} — uploading to S3..." - ) - - photo_url = hs_deal.get("major_condition_issue_photos") - - if photo_url: - self._upload_photo_to_s3( - deal_in_db, - photo_url, - hubspot_client, - verify=True, - ) - - # persist change - with db_read_session() as session: - db_record = session.get(HubspotDealData, deal_in_db.id) - db_record.major_condition_issue_evidence_s3_url = ( - deal_in_db.major_condition_issue_evidence_s3_url - ) - session.add(db_record) - session.commit() - - return False - else: - print(f"⚠️ Photo URL missing for deal_id {deal_in_db.deal_id}") - - else: - print(f"✅ No update or upload required for deal_id {deal_in_db.deal_id}.") - - return True - def upsert_deal( self, deal_data: Dict[str, str], @@ -169,14 +122,6 @@ class HubspotDataToDb: session.refresh(new_record) return new_record - def _sha256(self, file_path: str) -> str: - """Compute SHA-256 checksum of a file.""" - sha256 = hashlib.sha256() - with open(file_path, "rb") as f: - for chunk in iter(lambda: f.read(8192), b""): - sha256.update(chunk) - return sha256.hexdigest() - def _update_existing_deal( self, existing: HubspotDealData, @@ -315,18 +260,20 @@ class HubspotDataToDb: def _handle_existing_photo_upload( self, - existing: HubspotDealData, + existing_deal: HubspotDealData, hubspot_client: HubspotClient, ): - if self._needs_photo_upload(existing): - fresh_deal = hubspot_client.from_deal_id_get_info(existing.deal_id) - photo_url = fresh_deal.get("major_condition_issue_photos") + # if self._needs_photo_upload(existing): - if not photo_url: - print(f"⚠️ Photo URL missing for deal_id {existing.deal_id}") - return + fresh_deal = hubspot_client.from_deal_id_get_info(existing_deal.deal_id) + fresh_photo_url = fresh_deal.get("major_condition_issue_photos") - self._upload_photo_to_s3(existing, photo_url, hubspot_client) + if not fresh_photo_url: + print(f"⚠️ Photo URL missing for deal_id {existing_deal.deal_id}") + return + + if fresh_photo_url != existing_deal.major_condition_issue_photos: + self._upload_photo_to_s3(existing_deal, fresh_photo_url, hubspot_client) def _handle_new_photo_upload( self, @@ -343,12 +290,11 @@ class HubspotDataToDb: def _upload_photo_to_s3( self, record: HubspotDealData, - photo_url: str, + hubspot_photo_url: str, hubspot_client: HubspotClient, - verify: bool = False, ): try: - local_file = hubspot_client.download_file_from_url(photo_url) + local_file = hubspot_client.download_file_from_url(hubspot_photo_url) s3_url = self.s3.upload_file( local_file, @@ -356,11 +302,6 @@ class HubspotDataToDb: prefix="hubspot/awaabs_law_evidence/", ) - if verify: - downloaded = self.s3.download_from_url(s3_url) - if self._sha256(local_file) != self._sha256(downloaded): - raise ValueError("File integrity check failed after S3 upload.") - record.major_condition_issue_evidence_s3_url = s3_url except Exception as e: @@ -369,8 +310,8 @@ class HubspotDataToDb: if "local_file" in locals() and os.path.exists(local_file): os.remove(local_file) - def _needs_photo_upload(self, deal: HubspotDealData) -> bool: + def _needs_photo_upload(self, old_deal: HubspotDealData) -> bool: return bool( - deal.major_condition_issue_photos - and not deal.major_condition_issue_evidence_s3_url + old_deal.major_condition_issue_photos + and not old_deal.major_condition_issue_evidence_s3_url ) diff --git a/etl/hubspot/scripts/scraper/main.py b/etl/hubspot/scripts/scraper/main.py index ea79bc18..f41ef154 100644 --- a/etl/hubspot/scripts/scraper/main.py +++ b/etl/hubspot/scripts/scraper/main.py @@ -63,12 +63,11 @@ def handler(body: dict[str, Any], context: Any) -> None: logger.info( f"Deal {hubspot_deal_id} has been changed, updating database..." ) - db_client.update_deal_with_checks( - deal_in_db=db_deal, + db_client.upsert_deal( + deal_data=hubspot_deal, + company=company, + listing=listing, hubspot_client=hubspot_client, - hs_deal=hubspot_deal, - hs_company_id=company, - hs_listing=listing, ) deal_changed = True From a495c930a1bdc0510f7339890467cc0da050268f Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 9 Apr 2026 15:49:37 +0000 Subject: [PATCH 29/32] remove unused import --- etl/hubspot/hubspotDataTodB.py | 1 - 1 file changed, 1 deletion(-) diff --git a/etl/hubspot/hubspotDataTodB.py b/etl/hubspot/hubspotDataTodB.py index 6763f19c..c24d5813 100644 --- a/etl/hubspot/hubspotDataTodB.py +++ b/etl/hubspot/hubspotDataTodB.py @@ -1,4 +1,3 @@ -import hashlib import os from sqlmodel import select from datetime import datetime, timezone From 757c2241132a6796c5c75133bd5b8c14466340c5 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 9 Apr 2026 15:54:33 +0000 Subject: [PATCH 30/32] add image update logging --- etl/hubspot/hubspotDataTodB.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/etl/hubspot/hubspotDataTodB.py b/etl/hubspot/hubspotDataTodB.py index c24d5813..b7171290 100644 --- a/etl/hubspot/hubspotDataTodB.py +++ b/etl/hubspot/hubspotDataTodB.py @@ -10,6 +10,10 @@ from etl.hubspot.s3_uploader import S3Uploader from backend.app.db.connection import db_read_session from backend.app.db.models.organisation import Organisation from etl.hubspot.utils import parse_hs_date +from utils.logger import setup_logger + + +logger = setup_logger() class HubspotDataToDb: @@ -272,7 +276,12 @@ class HubspotDataToDb: return if fresh_photo_url != existing_deal.major_condition_issue_photos: + logger.info( + f"Hubspot image URL changed from {existing_deal.major_condition_issue_photos} to {fresh_photo_url}" + ) self._upload_photo_to_s3(existing_deal, fresh_photo_url, hubspot_client) + else: + logger.info(f"Hubspot iamge URL unchanged: {fresh_photo_url}") def _handle_new_photo_upload( self, From 3123723e8b284811d5459befdb277ed2b77e695a Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 9 Apr 2026 16:25:44 +0000 Subject: [PATCH 31/32] =?UTF-8?q?differ=20handles=20missing=20timezone=20f?= =?UTF-8?q?rom=20hubspot=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- etl/hubspot/hubspotDataTodB.py | 6 ++--- etl/hubspot/scripts/scraper/main.py | 7 +++++ etl/hubspot/tests/test_hubspot_deal_differ.py | 27 ++++++++++++++++++- 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/etl/hubspot/hubspotDataTodB.py b/etl/hubspot/hubspotDataTodB.py index b7171290..9756833b 100644 --- a/etl/hubspot/hubspotDataTodB.py +++ b/etl/hubspot/hubspotDataTodB.py @@ -101,11 +101,11 @@ class HubspotDataToDb: existing = session.exec(statement).first() if existing: + self._handle_existing_photo_upload(existing, hubspot_client) + print(f"🔄 Updating existing deal (deal_id={deal_id})") self._update_existing_deal(existing, deal_data, listing, company) - self._handle_existing_photo_upload(existing, hubspot_client) - session.add(existing) session.commit() session.refresh(existing) @@ -281,7 +281,7 @@ class HubspotDataToDb: ) self._upload_photo_to_s3(existing_deal, fresh_photo_url, hubspot_client) else: - logger.info(f"Hubspot iamge URL unchanged: {fresh_photo_url}") + logger.info(f"Hubspot image URL unchanged: {fresh_photo_url}") def _handle_new_photo_upload( self, diff --git a/etl/hubspot/scripts/scraper/main.py b/etl/hubspot/scripts/scraper/main.py index f41ef154..d754cbb1 100644 --- a/etl/hubspot/scripts/scraper/main.py +++ b/etl/hubspot/scripts/scraper/main.py @@ -104,3 +104,10 @@ def handler(body: dict[str, Any], context: Any) -> None: logger.info( f"Not Triggering PasHub file fetcher for HubSpot deal ID {hubspot_deal_id}" ) + + print("done") + + +if __name__ == "__main__": + handler({"hubspot_deal_id": "371470706915"}, "") + print("beep") diff --git a/etl/hubspot/tests/test_hubspot_deal_differ.py b/etl/hubspot/tests/test_hubspot_deal_differ.py index 74d3f057..9f41a5e6 100644 --- a/etl/hubspot/tests/test_hubspot_deal_differ.py +++ b/etl/hubspot/tests/test_hubspot_deal_differ.py @@ -1,4 +1,4 @@ -from datetime import datetime +from datetime import datetime, timezone from typing import Any, Dict import uuid @@ -352,6 +352,31 @@ def test_db_update_trigger__company_changed__returns_true() -> None: assert result is True +def test_db_update_trigger__missing_hubspot_timezone__returns_false() -> None: + deal_id = uuid.uuid4() + + old_deal = make_old_deal( + id=deal_id, + design_completion_date=datetime(2025, 11, 3, 0, 0, tzinfo=timezone.utc), + ) + + new_deal = make_new_deal( + deal_id, + design_completion_date=datetime(2025, 11, 3, 0, 0), + ) + + new_company = "new_company" + + result = HubspotDealDiffer.check_for_db_update_trigger( + new_deal=new_deal, + new_company=new_company, + new_listing=None, + old_deal=old_deal, + ) + + assert result is False + + def test_db_update_trigger__listing_changed__returns_true() -> None: deal_id = uuid.uuid4() From 9852aa2809ad61667da39c2d612cadb79d55f9b2 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 9 Apr 2026 16:40:47 +0000 Subject: [PATCH 32/32] =?UTF-8?q?differ=20handles=20missing=20timezone=20f?= =?UTF-8?q?rom=20hubspot=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- etl/hubspot/hubspot_deal_differ.py | 4 ++++ etl/hubspot/tests/test_hubspot_deal_differ.py | 7 +++---- etl/hubspot/utils.py | 9 +++++++-- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/etl/hubspot/hubspot_deal_differ.py b/etl/hubspot/hubspot_deal_differ.py index 4db303ab..b95b544c 100644 --- a/etl/hubspot/hubspot_deal_differ.py +++ b/etl/hubspot/hubspot_deal_differ.py @@ -103,6 +103,10 @@ class HubspotDealDiffer: if old_value != new_value: return True + # --- Time field --- + if old_deal.confirmed_survey_time != new_deal.get("confirmed_survey_time"): + return True + # No differences found return False diff --git a/etl/hubspot/tests/test_hubspot_deal_differ.py b/etl/hubspot/tests/test_hubspot_deal_differ.py index 9f41a5e6..69f7668b 100644 --- a/etl/hubspot/tests/test_hubspot_deal_differ.py +++ b/etl/hubspot/tests/test_hubspot_deal_differ.py @@ -362,14 +362,13 @@ def test_db_update_trigger__missing_hubspot_timezone__returns_false() -> None: new_deal = make_new_deal( deal_id, - design_completion_date=datetime(2025, 11, 3, 0, 0), + hs_object_id="1", + design_completion_date=datetime(2025, 11, 3, 0, 0).isoformat(), ) - new_company = "new_company" - result = HubspotDealDiffer.check_for_db_update_trigger( new_deal=new_deal, - new_company=new_company, + new_company=None, new_listing=None, old_deal=old_deal, ) diff --git a/etl/hubspot/utils.py b/etl/hubspot/utils.py index 9fbeae62..b7331f94 100644 --- a/etl/hubspot/utils.py +++ b/etl/hubspot/utils.py @@ -1,4 +1,4 @@ -from datetime import datetime +from datetime import datetime, timezone from typing import Optional @@ -6,6 +6,11 @@ def parse_hs_date(value: Optional[str]) -> Optional[datetime]: if not value: return None try: - return datetime.fromisoformat(value.replace("Z", "+00:00")) + dt = datetime.fromisoformat(value.replace("Z", "+00:00")) + + if dt.tzinfo is None: + return dt.replace(tzinfo=timezone.utc) + + return dt.astimezone(timezone.utc) except ValueError: return None