Merge pull request #1090 from Hestia-Homes/trigger-pashub-fetcher-lambda

Pashub fetcher: improve job ID extraction logic and write script to trigger deployed lambda
This commit is contained in:
Daniel Roth 2026-05-15 12:07:39 +01:00 committed by GitHub
commit d3a4365d6e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 202 additions and 5 deletions

View file

@ -74,6 +74,10 @@ class PashubClient:
logger.info(f"Getting UPRN for job ID {job_id}")
url = f"{self.base}/jobs/{job_id}"
logger.debug(
f"About to make API request with session headers: {self.session.headers}"
)
r = self.session.get(url)
if r.status_code == 401:
raise UnauthorizedError("Token expired or invalid")
@ -82,7 +86,10 @@ class PashubClient:
try:
return r.json()["uprn"]
except Exception:
except Exception as e:
logger.warning(
f"Failed to get UPRN for Job ID {job_id} with exception: {e}"
)
return None
def _select_latest_core_files(

View file

@ -1,11 +1,10 @@
import re
from typing import Optional
from pydantic import BaseModel
class PashubToAraTriggerRequest(BaseModel):
pashub_link: (
str # e.g. https://pashub.net/jobs/12345-abcd-1234-abcd-12345abcde/details
)
pashub_link: str # e.g. https://pashub.net/jobs/{id}/details, /jobs/{id}/evidence/view, /jobs/{id}
address: Optional[str] = None
sharepoint_link: Optional[str] = None
@ -17,4 +16,7 @@ class PashubToAraTriggerRequest(BaseModel):
@property
def pashub_job_id(self) -> str:
return self.pashub_link.split("/")[-2]
match = re.search(r"/jobs/([^/]+)", self.pashub_link)
if not match:
raise ValueError(f"No job ID found in PasHub link: {self.pashub_link}")
return match.group(1)

View file

@ -0,0 +1,51 @@
import pytest
from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
PashubToAraTriggerRequest,
)
def make_request(pashub_link: str) -> PashubToAraTriggerRequest:
return PashubToAraTriggerRequest(pashub_link=pashub_link)
def test_pashub_job_id_extracts_id_from_details_link() -> None:
# Arrange
request = make_request("https://pashub.net/jobs/job-id-123/details")
# Act
result = request.pashub_job_id
# Assert
assert result == "job-id-123"
def test_pashub_job_id_raises_for_invalid_link() -> None:
# Arrange
request = make_request("https://pashub.net/rcs-dashboard")
# Act / Assert
with pytest.raises(ValueError):
request.pashub_job_id
def test_pashub_job_id_extracts_id_from_bare_job_link() -> None:
# Arrange
request = make_request("https://pashub.net/jobs/job-id-123")
# Act
result = request.pashub_job_id
# Assert
assert result == "job-id-123"
def test_pashub_job_id_extracts_id_from_evidence_view_link() -> None:
# Arrange
request = make_request("https://pashub.net/jobs/job-id-123/evidence/view")
# Act
result = request.pashub_job_id
# Assert
assert result == "job-id-123"

View file

@ -0,0 +1,137 @@
import json
import logging
import os
from typing import Any, Optional, cast
import boto3
from openpyxl import load_workbook
from backend.app.config import get_settings
from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
PashubToAraTriggerRequest,
)
logging.basicConfig(level=logging.INFO, format="%(message)s")
logger: logging.Logger = logging.getLogger(__name__)
DRY_RUN: bool = False
DEAL_ID_FILTER: frozenset[str] = frozenset(
{
"379452094688",
"379466504437",
"379660170452",
"380016925932",
"379848065216",
"379466504434",
"379452094690",
"379965924567",
"380016925923",
"379792072898",
"379654754502",
"379560262861",
"379969670369",
"379248717001",
"379971468493",
"379999888607",
"379606372580",
"379969603797",
"379967743213",
"379263155434",
"379855267025",
"379889899719",
"379071064307",
"379867925741",
}
)
EXCEL_PATH: str = os.path.join(
os.path.dirname(__file__),
"united-infrastructure-exports-all-deals-2026-05-14.xlsx",
)
def _build_requests(excel_path: str) -> list[PashubToAraTriggerRequest]:
wb = load_workbook(excel_path, data_only=True)
ws = wb.worksheets[0]
headers: dict[str, int] = {}
for col in range(1, ws.max_column + 1):
header_val = ws.cell(row=1, column=col).value
if header_val is not None:
headers[str(header_val).strip()] = col
pashub_col: int = headers["PasHub link"]
record_id_col: int = headers["Record ID"]
deal_name_col: int = headers["Deal Name"]
deal_stage_col: int = headers["Deal Stage"]
requests: list[PashubToAraTriggerRequest] = []
for row in range(2, ws.max_row + 1):
pashub_link_raw = ws.cell(row=row, column=pashub_col).value
if not pashub_link_raw:
continue
pashub_link: str = str(pashub_link_raw).strip()
record_id_raw = ws.cell(row=row, column=record_id_col).value
deal_name_raw = ws.cell(row=row, column=deal_name_col).value
deal_stage_raw = ws.cell(row=row, column=deal_stage_col).value
hubspot_deal_id: Optional[str] = (
str(record_id_raw) if record_id_raw is not None else None
)
address: Optional[str] = (
str(deal_name_raw).strip() if deal_name_raw is not None else None
)
deal_stage: Optional[str] = (
str(deal_stage_raw).strip() if deal_stage_raw is not None else None
)
requests.append(
PashubToAraTriggerRequest(
pashub_link=pashub_link,
hubspot_deal_id=hubspot_deal_id,
address=address,
deal_stage=deal_stage,
)
)
return requests
def main() -> None:
trigger_requests: list[PashubToAraTriggerRequest] = _build_requests(EXCEL_PATH)
if DEAL_ID_FILTER:
trigger_requests = [
r for r in trigger_requests if r.hubspot_deal_id in DEAL_ID_FILTER
]
sqs: Any = cast(Any, boto3.client("sqs")) # type: ignore[reportUnknownMemberType]
queue_url: str = get_settings().PASHUB_TO_ARA_SQS_URL
count: int = 0
for request in trigger_requests:
action: str = "DRY RUN" if DRY_RUN else "SENDING"
logger.info(
f"[{action}] deal_id={request.hubspot_deal_id} pashub_link={request.pashub_link}"
)
if not DRY_RUN:
response: dict[str, Any] = sqs.send_message(
QueueUrl=queue_url,
MessageBody=json.dumps(request.model_dump()),
)
message_id: str = response["MessageId"]
logger.info(f" MessageId: {message_id}")
count += 1
label: str = "would send" if DRY_RUN else "sent"
print(f"{count} messages {label}")
if __name__ == "__main__":
main()