Merge branch 'main' into feature/ecmk-to-ara

This commit is contained in:
Daniel Roth 2026-04-01 16:26:36 +00:00
commit f3ebe122f8
11 changed files with 112 additions and 15 deletions

View file

@ -80,6 +80,8 @@ on:
required: false required: false
TF_VAR_pashub_password: TF_VAR_pashub_password:
required: false required: false
TF_VAR_hubspot_api_key:
required: false
jobs: jobs:
deploy: deploy:
runs-on: ubuntu-latest runs-on: ubuntu-latest
@ -146,6 +148,7 @@ jobs:
TF_VAR_social_housing_wave_3_sharepoint_id: ${{ secrets.TF_VAR_social_housing_wave_3_sharepoint_id }} TF_VAR_social_housing_wave_3_sharepoint_id: ${{ secrets.TF_VAR_social_housing_wave_3_sharepoint_id }}
TF_VAR_pashub_email: ${{ secrets.TF_VAR_pashub_email }} TF_VAR_pashub_email: ${{ secrets.TF_VAR_pashub_email }}
TF_VAR_pashub_password: ${{ secrets.TF_VAR_pashub_password }} TF_VAR_pashub_password: ${{ secrets.TF_VAR_pashub_password }}
TF_VAR_hubspot_api_key: ${{ secrets.TF_VAR_hubspot_api_key }}
run: | run: |
ECR_REPO_URL_VAR="" ECR_REPO_URL_VAR=""
if [[ -n "${{ inputs.ecr_repo }}" ]]; then if [[ -n "${{ inputs.ecr_repo }}" ]]; then
@ -191,6 +194,7 @@ jobs:
TF_VAR_social_housing_wave_3_sharepoint_id: ${{ secrets.TF_VAR_social_housing_wave_3_sharepoint_id }} TF_VAR_social_housing_wave_3_sharepoint_id: ${{ secrets.TF_VAR_social_housing_wave_3_sharepoint_id }}
TF_VAR_pashub_email: ${{ secrets.TF_VAR_pashub_email }} TF_VAR_pashub_email: ${{ secrets.TF_VAR_pashub_email }}
TF_VAR_pashub_password: ${{ secrets.TF_VAR_pashub_password }} TF_VAR_pashub_password: ${{ secrets.TF_VAR_pashub_password }}
TF_VAR_hubspot_api_key: ${{ secrets.TF_VAR_hubspot_api_key }}
run: | run: |
EXTRA_VARS="" EXTRA_VARS=""
if [[ -n "${{ inputs.ecr_repo }}" ]]; then if [[ -n "${{ inputs.ecr_repo }}" ]]; then

View file

@ -518,6 +518,7 @@ jobs:
TF_VAR_db_host: ${{ secrets.DEV_DB_HOST }} TF_VAR_db_host: ${{ secrets.DEV_DB_HOST }}
TF_VAR_db_name: ${{ secrets.DEV_DB_NAME }} TF_VAR_db_name: ${{ secrets.DEV_DB_NAME }}
TF_VAR_db_port: ${{ secrets.DEV_DB_PORT }} TF_VAR_db_port: ${{ secrets.DEV_DB_PORT }}
TF_VAR_hubspot_api_key: ${{ secrets.HUBSPOT_API_KEY }}
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.DEV_AWS_REGION }} AWS_REGION: ${{ secrets.DEV_AWS_REGION }}

View file

@ -63,6 +63,8 @@ class HubspotDealData(SQLModel, table=True):
surveyor: Optional[str] = Field(default=None) surveyor: Optional[str] = Field(default=None)
confirmed_survey_date: Optional[datetime] = Field(default=None) confirmed_survey_date: Optional[datetime] = Field(default=None)
confirmed_survey_time: Optional[str] = Field(default=None) confirmed_survey_time: Optional[str] = Field(default=None)
surveyed_date: Optional[datetime] = Field(default=None)
design_type: Optional[str] = Field(default=None)
created_at: datetime = Field( created_at: datetime = Field(
sa_column=Column( sa_column=Column(

View file

@ -5,7 +5,8 @@ from typing import Callable, Any
from uuid import UUID from uuid import UUID
import json import json
from backend.app.db.functions.tasks.Tasks import SubTaskInterface from backend.app.db.functions.tasks.Tasks import SubTaskInterface, TasksInterface
from utils.logger import setup_logger
def subtask_handler(): def subtask_handler():
@ -93,3 +94,76 @@ def subtask_handler():
return wrapper return wrapper
return decorator return decorator
def task_handler():
"""
Decorator that wraps a Lambda handler and automatically:
- Parses body from the first SQS record (or uses the event dict directly)
- Creates a fresh Task + SubTask in the database
- Marks the subtask as in progress
- Executes the handler, passing the parsed body
- Marks complete on success, failed on exception (and re-raises)
"""
def decorator(func: Callable[..., Any]):
task_source = f"{func.__module__}.{func.__qualname__}"
@wraps(func)
def wrapper(event: dict[str, Any], context: Any, *args, **kwargs):
logger = setup_logger()
# Parse body: Records-style SQS or plain dict event
if "Records" in event:
raw_body = event["Records"][0].get("body", {})
if isinstance(raw_body, str):
try:
body = json.loads(raw_body)
except Exception:
body = {}
else:
body = raw_body or {}
else:
body = event
# Create fresh task + subtask
logger.info("Creating task for source: %s", task_source)
task_id, subtask_id = TasksInterface.create_task(
task_source=task_source,
inputs=body,
)
logger.info("Created task_id=%s subtask_id=%s", task_id, subtask_id)
interface = SubTaskInterface()
interface.update_subtask_status(
subtask_id=subtask_id,
status="in progress",
)
try:
result = func(body, context, *args, **kwargs)
interface.update_subtask_status(
subtask_id=subtask_id,
status="complete",
outputs={"result": result} if result else None,
)
logger.info("Task %s completed successfully", task_id)
return result
except Exception as e:
logger.exception("Task %s failed: %s", task_id, e)
interface.update_subtask_status(
subtask_id=subtask_id,
status="failed",
outputs={"error": str(e)},
)
raise
return wrapper
return decorator

View file

@ -231,6 +231,8 @@ class HubspotClient:
"surveyor", "surveyor",
"confirmed_survey_date", "confirmed_survey_date",
"confirmed_survey_time", "confirmed_survey_time",
"surveyed_date",
"design_type",
], ],
) )

View file

@ -257,6 +257,14 @@ class HubspotDataToDb:
deal_in_db.confirmed_survey_time == hs_deal.get("confirmed_survey_time"), deal_in_db.confirmed_survey_time == hs_deal.get("confirmed_survey_time"),
"confirmed_survey_time mismatch", "confirmed_survey_time mismatch",
), ),
soft_assert(
deal_in_db.surveyed_date == self._parse_hs_date(hs_deal.get("surveyed_date")),
"surveyed_date mismatch",
),
soft_assert(
deal_in_db.design_type == hs_deal.get("design_type"),
"design_type mismatch",
),
] ]
# If discrepancies found, update from HubSpot # If discrepancies found, update from HubSpot
@ -380,6 +388,8 @@ class HubspotDataToDb:
"surveyor": deal_data.get("surveyor"), "surveyor": deal_data.get("surveyor"),
"confirmed_survey_date": self._parse_hs_date(deal_data.get("confirmed_survey_date")), "confirmed_survey_date": self._parse_hs_date(deal_data.get("confirmed_survey_date")),
"confirmed_survey_time": deal_data.get("confirmed_survey_time"), "confirmed_survey_time": deal_data.get("confirmed_survey_time"),
"surveyed_date": self._parse_hs_date(deal_data.get("surveyed_date")),
"design_type": deal_data.get("design_type"),
}.items(): }.items():
setattr(existing, attr, value or getattr(existing, attr)) setattr(existing, attr, value or getattr(existing, attr))
@ -462,6 +472,8 @@ class HubspotDataToDb:
surveyor=deal_data.get("surveyor"), surveyor=deal_data.get("surveyor"),
confirmed_survey_date=self._parse_hs_date(deal_data.get("confirmed_survey_date")), confirmed_survey_date=self._parse_hs_date(deal_data.get("confirmed_survey_date")),
confirmed_survey_time=deal_data.get("confirmed_survey_time"), confirmed_survey_time=deal_data.get("confirmed_survey_time"),
surveyed_date=self._parse_hs_date(deal_data.get("surveyed_date")),
design_type=deal_data.get("design_type"),
) )
# Handle upload at insert time # Handle upload at insert time

View file

@ -1,6 +1,7 @@
from etl.hubspot.hubspotClient import HubspotClient, Companies, Pipeline from etl.hubspot.hubspotClient import HubspotClient, Companies, Pipeline
from etl.hubspot.scripts.scraper.main import handler from etl.hubspot.scripts.scraper.main import handler
from tqdm import tqdm from tqdm import tqdm
import json
PIPELINE_ID = Pipeline.OPERATIONS_SOCIAL_HOUSING.value PIPELINE_ID = Pipeline.OPERATIONS_SOCIAL_HOUSING.value
@ -29,7 +30,7 @@ def bulk_load(companies: list[Companies] | None = None) -> None:
continue continue
deal_bar.set_postfix({"status": "uploading", "deal": deal_id}) deal_bar.set_postfix({"status": "uploading", "deal": deal_id})
handler({"hubspot_deal_id": deal_id}, context=None) handler({"Records": [{"body": json.dumps({"hubspot_deal_id": deal_id})}]}, context=None)
processed += 1 processed += 1
deal_bar.set_postfix({"status": "done", "deal": deal_id}) deal_bar.set_postfix({"status": "done", "deal": deal_id})

View file

@ -9,16 +9,13 @@
from etl.hubspot.hubspotClient import HubspotClient from etl.hubspot.hubspotClient import HubspotClient
from etl.hubspot.hubspotDataTodB import HubspotDataToDb from etl.hubspot.hubspotDataTodB import HubspotDataToDb
from backend.utils.subtasks import task_handler
from typing import Any from typing import Any
import json
# @subtask_handler() TODO: Do this without subtask_handler but task_handler() that creates task_id and subtask_id @task_handler()
def handler(body: dict[str, Any], context: Any, local: bool = False) -> None: def handler(body: dict[str, Any], context: Any) -> None:
if local is True:
body = {
"hubspot_deal_id": "254427203793",
}
hubspot_deal_id = body.get("hubspot_deal_id", "") hubspot_deal_id = body.get("hubspot_deal_id", "")
if hubspot_deal_id == "": if hubspot_deal_id == "":
@ -36,5 +33,3 @@ def handler(body: dict[str, Any], context: Any, local: bool = False) -> None:
else: else:
deal, company, listing = hubspot.get_deal_info_for_db(hubspot_deal_id) deal, company, listing = hubspot.get_deal_info_for_db(hubspot_deal_id)
dbloader.upsert_deal(deal, company, listing, hubspot) dbloader.upsert_deal(deal, company, listing, hubspot)
print("Finsihed running")

View file

@ -38,6 +38,7 @@ module "hubspot_deal_etl" {
DB_HOST = var.db_host DB_HOST = var.db_host
DB_NAME = var.db_name DB_NAME = var.db_name
DB_PORT = var.db_port DB_PORT = var.db_port
HUBSPOT_API_KEY = var.hubspot_api_key
} }
} }

View file

@ -41,6 +41,11 @@ variable "db_host" {
type = string type = string
} }
variable "hubspot_api_key" {
type = string
}
variable "db_name" { variable "db_name" {
type = string type = string
} }

View file

@ -26,13 +26,13 @@ from backend.app.db.functions.materials_functions import get_materials
from collections import defaultdict from collections import defaultdict
from sqlalchemy import func from sqlalchemy import func
PORTFOLIO_ID = 640 PORTFOLIO_ID = 656
SCENARIOS = [1154] SCENARIOS = [1177]
scenario_names = { scenario_names = {
1154: "EPC - 10k Budget", 1177: "EPC C; Proposed Measures",
} }
project_name = "First Charterhouse Investments" project_name = "Walsall Council | WH:LG"
def get_data(portfolio_id, scenario_ids): def get_data(portfolio_id, scenario_ids):