hubspot projects data is scraped

This commit is contained in:
Jun-te Kim 2026-05-28 10:54:15 +00:00
parent 37bcde435f
commit b72a0c2be4
10 changed files with 273 additions and 22 deletions

View file

@ -17,6 +17,7 @@ class HubspotDealData(SQLModel, table=True):
dealstage: Optional[str] = Field(default=None)
company_id: Optional[str] = Field(default=None)
project_code: Optional[str] = Field(default=None)
project_id: Optional[str] = Field(default=None)
# HubSpot custom properties
landlord_property_id: Optional[str] = Field(default=None)

View file

@ -0,0 +1,34 @@
import uuid
from sqlmodel import SQLModel, Field, Column, text
from datetime import datetime
from typing import Optional
from sqlalchemy import DateTime
from sqlalchemy.sql import func
class HubspotProjectData(SQLModel, table=True):
__tablename__ = "hubspot_projects_data"
id: uuid.UUID = Field(default_factory=uuid.uuid4, primary_key=True)
project_id: str = Field(index=True, nullable=False, unique=True)
name: Optional[str] = Field(default=None)
created_at: Optional[datetime] = Field(
sa_column=Column(
DateTime(timezone=True),
server_default=text("(NOW() AT TIME ZONE 'utc')"),
nullable=False,
),
default=func.now(),
)
updated_at: Optional[datetime] = Field(
sa_column=Column(
DateTime(timezone=True),
server_default=text("(NOW() AT TIME ZONE 'utc')"),
onupdate=func.now(),
nullable=False,
),
default=func.now(),
)

View file

@ -31,6 +31,7 @@ from hubspot.crm.associations.v4.models import ( # type: ignore[reportMissingTy
from backend.app.config import get_settings
from etl.hubspot.company_data import CompanyData
from etl.hubspot.project_data import ProjectData
from utils.logger import setup_logger
import mimetypes
@ -239,6 +240,47 @@ class HubspotClient:
self.logger.info(f"Listing info for deal {deal_id}: {listing_info}")
return listing_info
def from_deal_id_get_associated_project(
self, deal_id: str
) -> Optional[ProjectData]:
"""
Get the associated project (custom object "0-970") for a given deal.
Returns a ProjectData with "project_id" and "name", or None if not found.
"""
associations_api: AssociationsBasicApi = self.client.crm.associations.v4.basic_api # type: ignore[reportUnknownMemberType]
projects_api: ObjectsBasicApi = self.client.crm.objects.basic_api # type: ignore[reportUnknownMemberType] # works for custom objects like "project"
response: AssociationsPageResponse = self._call_with_retry(
lambda: associations_api.get_page( # type: ignore[reportUnknownMemberType]
object_type="deals",
object_id=deal_id,
to_object_type="0-970",
limit=1,
)
)
results: list[AssociationsResult] = cast(list[AssociationsResult], response.results) # type: ignore[reportUnknownMemberType]
if not results:
self.logger.info(f"No project association found for deal {deal_id}")
return None
first: AssociationsResult = results[0]
project_id: str = cast(str, first.to_object_id) # type: ignore[reportUnknownMemberType, reportUnknownVariableType]
self.logger.info(f"Associated project ID for deal {deal_id}: {project_id}")
project: HubspotObject = self._call_with_retry(
lambda: projects_api.get_by_id( # type: ignore[reportUnknownMemberType]
object_type="0-970",
object_id=project_id,
properties=["hs_name"],
)
)
project_properties: dict[str, str] = cast(dict[str, str], project.properties) # type: ignore[reportUnknownMemberType]
return ProjectData(
project_id=project_id, name=project_properties.get("hs_name")
)
def from_deal_id_get_info(
self, deal_id: str
) -> dict[str, str]: # TODO: add dataclass for this
@ -325,17 +367,22 @@ class HubspotClient:
self.logger.warning(f"Failed to fetch HubSpot owner {owner_id}")
return None
def get_deal_and_company_and_listing(
def get_deal_and_company_and_listing_and_project(
self, deal_id: str
) -> tuple[dict[str, str], Optional[str], Optional[dict[str, str]]]:
) -> tuple[
dict[str, str], Optional[str], Optional[dict[str, str]], Optional[ProjectData]
]:
deal: dict[str, str] = self.from_deal_id_get_info(deal_id)
company: Optional[str] = self.from_deal_id_get_associated_company_id(deal_id)
listing: Optional[dict[str, str]] = self.from_deal_id_get_associated_listing(
deal_id
)
project: Optional[ProjectData] = self.from_deal_id_get_associated_project(
deal_id
)
return deal, company, listing
return deal, company, listing, project
def get_company_information(self, company_id: str) -> CompanyData:
companies_api: CompaniesBasicApi = self.client.crm.companies.basic_api # type: ignore[reportUnknownMemberType]

View file

@ -4,9 +4,11 @@ from datetime import datetime, timezone
from typing import Dict, Optional
from backend.app.db.models.hubspot_deal_data import HubspotDealData
from backend.app.db.models.hubspot_project_data import HubspotProjectData
from backend.app.db.models.hubspot_user import HubspotUser
from etl.hubspot.company_data import CompanyData
from etl.hubspot.hubspotClient import HubspotClient
from etl.hubspot.project_data import ProjectData
from etl.hubspot.s3_uploader import S3Uploader
from backend.app.db.connection import db_read_session
from backend.app.db.models.organisation import Organisation
@ -64,6 +66,30 @@ class HubspotDataToDb:
session.commit()
return record
def upsert_project(self, project: ProjectData) -> HubspotProjectData:
"""Upserts a project record. Updates if project_id exists, otherwise creates new."""
with db_read_session() as session:
project_id = project["project_id"]
name = project.get("name")
existing = session.exec(
select(HubspotProjectData).where(
HubspotProjectData.project_id == project_id
)
).first()
if existing:
existing.name = name
existing.updated_at = datetime.now(timezone.utc)
session.add(existing)
record = existing
else:
record = HubspotProjectData(project_id=project_id, name=name)
session.add(record)
session.commit()
return record
def find_all_deals_with_company_id(self, company_id: str):
"""Returns a list of deals for a given company_id."""
with db_read_session() as session:
@ -87,6 +113,7 @@ class HubspotDataToDb:
company: Optional[str],
listing: Optional[dict[str, str]],
hubspot_client: HubspotClient,
project: Optional[ProjectData] = None,
):
"""
Inserts or updates a deal record.
@ -111,7 +138,9 @@ class HubspotDataToDb:
self._handle_existing_photo_upload(existing, hubspot_client)
print(f"🔄 Updating existing deal (deal_id={deal_id})")
self._update_existing_deal(existing, deal_data, listing, company)
self._update_existing_deal(
existing, deal_data, listing, company, project
)
session.add(existing)
session.commit()
@ -121,7 +150,7 @@ class HubspotDataToDb:
else:
print(f"🆕 Inserting new deal (deal_id={deal_id})")
new_record: HubspotDealData = self._build_new_deal(
deal_id, deal_data, listing, company
deal_id, deal_data, listing, company, project
)
# Handle upload at insert time
@ -170,10 +199,12 @@ class HubspotDataToDb:
deal_data: Dict[str, str],
listing: Optional[dict[str, str]],
company: Optional[str],
project: Optional[ProjectData] = None,
):
for attr, value in {
"dealname": deal_data.get("dealname"),
"dealstage": deal_data.get("dealstage"),
"project_id": project["project_id"] if project else None,
"listing_id": listing.get("listing_id", None) if listing else None,
"landlord_property_id": (
listing.get("owner_property_id", None) if listing else None
@ -270,11 +301,13 @@ class HubspotDataToDb:
deal_data: Dict[str, str],
listing: Optional[dict[str, str]],
company: Optional[str],
project: Optional[ProjectData] = None,
) -> HubspotDealData:
return HubspotDealData(
deal_id=deal_id,
dealname=deal_data.get("dealname"),
dealstage=deal_data.get("dealstage"),
project_id=project["project_id"] if project else None,
listing_id=listing.get("listing_id") if listing else None,
landlord_property_id=(
listing.get("owner_property_id") if listing else None

View file

@ -0,0 +1,6 @@
from typing import Optional, TypedDict
class ProjectData(TypedDict):
project_id: str
name: Optional[str]

View file

@ -14,7 +14,7 @@ payload = {
{
"task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917",
"sub_task_id": "8673913b-1a88-42d7-8578-0449123d94b0",
"hubspot_deal_id": "254427203793",
"hubspot_deal_id": "467396027619",
}
)
}

View file

@ -5,6 +5,7 @@ from typing import Any, Dict, Optional
from backend.app.config import get_settings
from etl.hubspot.hubspotClient import HubspotClient
from etl.hubspot.hubspotDataTodB import CompanyData, HubspotDataToDb
from etl.hubspot.project_data import ProjectData
from etl.hubspot.hubspot_deal_differ import HubspotDealDiffer
from etl.hubspot.hubspot_trigger_orchestrator_trigger_request import (
HubspotTriggerOrchestratorTriggerRequest,
@ -34,9 +35,10 @@ def handler(body: dict[str, Any], context: Any) -> None:
hubspot_deal: Dict[str, str]
company: Optional[str]
listing: Optional[dict[str, str]]
project: Optional[ProjectData]
hubspot_deal, company, listing = hubspot_client.get_deal_and_company_and_listing(
hubspot_deal_id
hubspot_deal, company, listing, project = (
hubspot_client.get_deal_and_company_and_listing_and_project(hubspot_deal_id)
)
deal_changed = False
@ -47,7 +49,10 @@ def handler(body: dict[str, Any], context: Any) -> None:
db_client: HubspotDataToDb = HubspotDataToDb()
db_client.upsert_organisation(company_data)
db_client.upsert_deal(hubspot_deal, company, listing, hubspot_client)
if project:
db_client.upsert_project(project)
db_client.upsert_deal(hubspot_deal, company, listing, hubspot_client, project)
# ==============================
# Orchestration of other lambdas
@ -77,11 +82,15 @@ def handler(body: dict[str, Any], context: Any) -> None:
logger.info(
f"Deal {hubspot_deal_id} has been changed, updating database..."
)
if project:
db_client.upsert_project(project)
db_client.upsert_deal(
deal_data=hubspot_deal,
company=company,
listing=listing,
hubspot_client=hubspot_client,
project=project,
)
deal_changed = True

View file

@ -71,7 +71,9 @@ class TestHubspotClientIntegration:
def test_get_deal_info_for_db(self, client: HubspotClient):
deal_id: str = "263490768079"
deal, company, listing = client.get_deal_and_company_and_listing(deal_id)
deal, company, listing, project = (
client.get_deal_and_company_and_listing_and_project(deal_id)
)
assert "dealname" in deal
assert "dealstage" in deal
@ -81,6 +83,8 @@ class TestHubspotClientIntegration:
assert listing is None or "hs_object_id" in listing
assert project is None or "project_id" in project
def test_get_company_information(self, client: HubspotClient):
company_id: str = Companies.APPLE.value

View file

@ -1,4 +1,5 @@
from etl.hubspot.hubspotDataTodB import HubspotDataToDb
from etl.hubspot.project_data import ProjectData
from backend.app.db.models.hubspot_deal_data import HubspotDealData
@ -32,3 +33,55 @@ def test_update_existing_deal__designer_set__overwrites_existing() -> None:
)
assert existing.designer == "New Designer"
def test_build_new_deal__project_sets_project_id() -> None:
new_deal = _make_instance()._build_new_deal(
deal_id="MOCK_DEAL_ID",
deal_data={},
listing=None,
company=None,
project=ProjectData(project_id="proj-1", name="Project One"),
)
assert new_deal.project_id == "proj-1"
def test_build_new_deal__no_project__project_id_none() -> None:
new_deal = _make_instance()._build_new_deal(
deal_id="MOCK_DEAL_ID",
deal_data={},
listing=None,
company=None,
project=None,
)
assert new_deal.project_id is None
def test_update_existing_deal__project_sets_project_id() -> None:
existing = HubspotDealData(deal_id="MOCK_DEAL_ID")
_make_instance()._update_existing_deal(
existing=existing,
deal_data={},
listing=None,
company=None,
project=ProjectData(project_id="proj-1", name="Project One"),
)
assert existing.project_id == "proj-1"
def test_update_existing_deal__no_project__clears_project_id() -> None:
existing = HubspotDealData(deal_id="MOCK_DEAL_ID", project_id="old-proj")
_make_instance()._update_existing_deal(
existing=existing,
deal_data={},
listing=None,
company=None,
project=None,
)
assert existing.project_id is None

View file

@ -12,6 +12,7 @@ DEAL_ID = "999"
PASHUB_LINK = "https://pashub.example.com/deal/999"
MAGICPLAN_QUEUE_URL = "https://sqs.eu-west-2.amazonaws.com/123/magic-plan-dev"
PASHUB_QUEUE_URL = "https://sqs.test/pashub"
PROJECT = {"project_id": "proj-1", "name": "Project One"}
def make_hubspot_deal(**kwargs: Any) -> Dict[str, Any]:
@ -35,7 +36,8 @@ def run_handler(
hubspot_deal: Dict[str, Any],
db_deal: Optional[HubspotDealData],
listing: Optional[dict],
) -> MagicMock:
project: Optional[dict] = None,
) -> tuple[MagicMock, MagicMock]:
mock_sqs = MagicMock()
mock_sqs.send_message.return_value = {"MessageId": "test-id"}
@ -47,10 +49,12 @@ def run_handler(
):
mock_db_cls.return_value.find_deal_with_deal_id.return_value = db_deal
mock_db_cls.return_value.upsert_deal.return_value = None
mock_hs_cls.return_value.get_deal_and_company_and_listing.return_value = (
mock_hs = mock_hs_cls.return_value
mock_hs.get_deal_and_company_and_listing_and_project.return_value = (
hubspot_deal,
None,
listing,
project,
)
mock_boto3.client.return_value = mock_sqs
mock_settings.return_value.MAGICPLAN_SQS_URL = MAGICPLAN_QUEUE_URL
@ -58,7 +62,7 @@ def run_handler(
handler.__wrapped__({"hubspot_deal_id": DEAL_ID}, "")
return mock_sqs
return mock_sqs, mock_db_cls.return_value
# ====================================
@ -72,7 +76,7 @@ def test_new_deal_surveyed__sends_magicplan_sqs() -> None:
listing = {"national_uprn": UPRN}
# Act
mock_sqs = run_handler(hubspot_deal=hubspot_deal, db_deal=None, listing=listing)
mock_sqs, _ = run_handler(hubspot_deal=hubspot_deal, db_deal=None, listing=listing)
# Assert
mock_sqs.send_message.assert_called_once_with(
@ -88,7 +92,7 @@ def test_new_deal_not_surveyed__no_magicplan_sqs() -> None:
hubspot_deal = make_hubspot_deal(outcome="assessed")
# Act
mock_sqs = run_handler(hubspot_deal=hubspot_deal, db_deal=None, listing=None)
mock_sqs, _ = run_handler(hubspot_deal=hubspot_deal, db_deal=None, listing=None)
# Assert
mock_sqs.send_message.assert_not_called()
@ -99,7 +103,7 @@ def test_new_deal_surveyed_no_listing__magicplan_sqs_uprn_is_null() -> None:
hubspot_deal = make_hubspot_deal(outcome="surveyed")
# Act
mock_sqs = run_handler(hubspot_deal=hubspot_deal, db_deal=None, listing=None)
mock_sqs, _ = run_handler(hubspot_deal=hubspot_deal, db_deal=None, listing=None)
# Assert
mock_sqs.send_message.assert_called_once_with(
@ -122,7 +126,7 @@ def test_existing_deal_surveyed_transition__sends_magicplan_sqs() -> None:
listing = {"national_uprn": UPRN}
# Act
mock_sqs = run_handler(hubspot_deal=hubspot_deal, db_deal=db_deal, listing=listing)
mock_sqs, _ = run_handler(hubspot_deal=hubspot_deal, db_deal=db_deal, listing=listing)
# Assert
mock_sqs.send_message.assert_called_once_with(
@ -139,7 +143,7 @@ def test_existing_deal_already_surveyed__no_magicplan_sqs() -> None:
hubspot_deal = make_hubspot_deal(outcome="surveyed", dealname="New Name")
# Act
mock_sqs = run_handler(hubspot_deal=hubspot_deal, db_deal=db_deal, listing=None)
mock_sqs, _ = run_handler(hubspot_deal=hubspot_deal, db_deal=db_deal, listing=None)
# Assert
mock_sqs.send_message.assert_not_called()
@ -155,7 +159,7 @@ def test_new_deal_with_pashub_link__sends_pashub_sqs() -> None:
hubspot_deal = make_hubspot_deal(pashub_link=PASHUB_LINK)
# Act
mock_sqs = run_handler(hubspot_deal=hubspot_deal, db_deal=None, listing=None)
mock_sqs, _ = run_handler(hubspot_deal=hubspot_deal, db_deal=None, listing=None)
# Assert
mock_sqs.send_message.assert_called_once_with(
@ -179,7 +183,7 @@ def test_new_deal_no_pashub_link__no_pashub_sqs() -> None:
hubspot_deal = make_hubspot_deal()
# Act
mock_sqs = run_handler(hubspot_deal=hubspot_deal, db_deal=None, listing=None)
mock_sqs, _ = run_handler(hubspot_deal=hubspot_deal, db_deal=None, listing=None)
# Assert
mock_sqs.send_message.assert_not_called()
@ -196,7 +200,7 @@ def test_existing_deal_pashub_link_added__sends_pashub_sqs() -> None:
hubspot_deal = make_hubspot_deal(pashub_link=PASHUB_LINK)
# Act
mock_sqs = run_handler(hubspot_deal=hubspot_deal, db_deal=db_deal, listing=None)
mock_sqs, _ = run_handler(hubspot_deal=hubspot_deal, db_deal=db_deal, listing=None)
# Assert
mock_sqs.send_message.assert_called_once_with(
@ -221,7 +225,67 @@ def test_existing_deal_pashub_link_unchanged__no_pashub_sqs() -> None:
hubspot_deal = make_hubspot_deal(pashub_link=PASHUB_LINK, dealname="New Name")
# Act
mock_sqs = run_handler(hubspot_deal=hubspot_deal, db_deal=db_deal, listing=None)
mock_sqs, _ = run_handler(hubspot_deal=hubspot_deal, db_deal=db_deal, listing=None)
# Assert
mock_sqs.send_message.assert_not_called()
# ====================================
# PROJECT upsert
# ====================================
def test_new_deal_with_project__upserts_project() -> None:
# Arrange
hubspot_deal = make_hubspot_deal()
# Act
_, mock_db = run_handler(
hubspot_deal=hubspot_deal, db_deal=None, listing=None, project=PROJECT
)
# Assert
mock_db.upsert_project.assert_called_once_with(PROJECT)
def test_new_deal_no_project__no_project_upsert() -> None:
# Arrange
hubspot_deal = make_hubspot_deal()
# Act
_, mock_db = run_handler(
hubspot_deal=hubspot_deal, db_deal=None, listing=None, project=None
)
# Assert
mock_db.upsert_project.assert_not_called()
def test_existing_deal_changed_with_project__upserts_project() -> None:
# Arrange
db_deal = make_db_deal(outcome="assessed")
hubspot_deal = make_hubspot_deal(outcome="surveyed")
# Act
_, mock_db = run_handler(
hubspot_deal=hubspot_deal, db_deal=db_deal, listing=None, project=PROJECT
)
# Assert
mock_db.upsert_project.assert_called_once_with(PROJECT)
def test_existing_deal_unchanged__no_project_upsert() -> None:
# Arrange: db deal matches hubspot deal, so the differ reports no change
db_deal = make_db_deal(dealname=DEAL_NAME)
hubspot_deal = make_hubspot_deal()
# Act
_, mock_db = run_handler(
hubspot_deal=hubspot_deal, db_deal=db_deal, listing=None, project=PROJECT
)
# Assert
mock_db.upsert_project.assert_not_called()
mock_db.upsert_deal.assert_not_called()