diff --git a/.github/workflows/_deploy_lambda.yml b/.github/workflows/_deploy_lambda.yml index 1cc7d462..0d702155 100644 --- a/.github/workflows/_deploy_lambda.yml +++ b/.github/workflows/_deploy_lambda.yml @@ -80,6 +80,10 @@ on: required: false TF_VAR_pashub_password: required: false + TF_VAR_pashub_coordination_email: + required: false + TF_VAR_pashub_coordination_password: + required: false TF_VAR_hubspot_api_key: required: false @@ -154,6 +158,8 @@ jobs: TF_VAR_social_housing_wave_3_sharepoint_id: ${{ secrets.TF_VAR_social_housing_wave_3_sharepoint_id }} TF_VAR_pashub_email: ${{ secrets.TF_VAR_pashub_email }} TF_VAR_pashub_password: ${{ secrets.TF_VAR_pashub_password }} + TF_VAR_pashub_coordination_email: ${{ secrets.TF_VAR_pashub_coordination_email }} + TF_VAR_pashub_coordination_password: ${{ secrets.TF_VAR_pashub_coordination_password }} TF_VAR_hubspot_api_key: ${{ secrets.TF_VAR_hubspot_api_key }} TF_VAR_magicplan_customer_id: ${{ secrets.TF_VAR_magicplan_customer_id }} TF_VAR_magicplan_api_key: ${{ secrets.TF_VAR_magicplan_api_key }} @@ -202,6 +208,8 @@ jobs: TF_VAR_social_housing_wave_3_sharepoint_id: ${{ secrets.TF_VAR_social_housing_wave_3_sharepoint_id }} TF_VAR_pashub_email: ${{ secrets.TF_VAR_pashub_email }} TF_VAR_pashub_password: ${{ secrets.TF_VAR_pashub_password }} + TF_VAR_pashub_coordination_email: ${{ secrets.TF_VAR_pashub_coordination_email }} + TF_VAR_pashub_coordination_password: ${{ secrets.TF_VAR_pashub_coordination_password }} TF_VAR_hubspot_api_key: ${{ secrets.TF_VAR_hubspot_api_key }} TF_VAR_magicplan_customer_id: ${{ secrets.TF_VAR_magicplan_customer_id }} TF_VAR_magicplan_api_key: ${{ secrets.TF_VAR_magicplan_api_key }} diff --git a/.github/workflows/_smoke_test_lambda.yml b/.github/workflows/_smoke_test_lambda.yml new file mode 100644 index 00000000..3fcf0de4 --- /dev/null +++ b/.github/workflows/_smoke_test_lambda.yml @@ -0,0 +1,85 @@ +name: Lambda smoke test + +on: + workflow_call: + inputs: + dockerfile_path: + required: true + type: string + build_context: + required: false + default: "." + type: string + service_name: + required: true + type: string + +jobs: + smoke-test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Download AWS Lambda RIE + run: | + mkdir -p ~/.aws-lambda-rie + curl -fsSL -o ~/.aws-lambda-rie/aws-lambda-rie \ + https://github.com/aws/aws-lambda-runtime-interface-emulator/releases/latest/download/aws-lambda-rie + chmod +x ~/.aws-lambda-rie/aws-lambda-rie + + - name: Build Lambda image + run: | + docker build \ + --platform linux/amd64 \ + -f ${{ inputs.dockerfile_path }} \ + -t ${{ inputs.service_name }}-smoke-test:latest \ + ${{ inputs.build_context }} + + - name: Start Lambda container + run: | + IMG=${{ inputs.service_name }}-smoke-test:latest + ENTRY=$(docker inspect --format='{{range .Config.Entrypoint}}{{.}} {{end}}' "$IMG") + CMD_ARGS=$(docker inspect --format='{{range .Config.Cmd}}{{.}} {{end}}' "$IMG") + + if echo "$ENTRY" | grep -q "lambda-entrypoint.sh"; then + # AWS base image — RIE is bundled + docker run -d --name ${{ inputs.service_name }}-smoke-test \ + -p 9000:8080 \ + "$IMG" + else + # Custom base — mount RIE from runner and re-wire entrypoint + docker run -d --name ${{ inputs.service_name }}-smoke-test \ + -v "$HOME/.aws-lambda-rie:/aws-lambda-rie" \ + -p 9000:8080 \ + --entrypoint /aws-lambda-rie/aws-lambda-rie \ + "$IMG" \ + $ENTRY $CMD_ARGS + fi + + - name: Invoke Lambda and check for import errors + run: | + response=$(curl -s --retry-connrefused --retry 15 --retry-delay 1 \ + -X POST \ + http://localhost:9000/2015-03-31/functions/function/invocations \ + -H "Content-Type: application/json" \ + -d '{"Records":[{"body":"{}"}]}') + + echo "Response: $response" + + if [ -z "$response" ]; then + echo "No response from Lambda RIE" + exit 1 + fi + + if echo "$response" | grep -qE 'ImportModuleError|ModuleNotFoundError|ImportError'; then + echo "Import error detected in handler" + exit 1 + fi + + - name: Dump container logs + if: always() + run: docker logs ${{ inputs.service_name }}-smoke-test + + - name: Tear down container + if: always() + run: docker rm -f ${{ inputs.service_name }}-smoke-test diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index e0343974..bd014e3d 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -407,6 +407,8 @@ jobs: TF_VAR_social_housing_wave_3_sharepoint_id: ${{ secrets.SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID }} TF_VAR_pashub_email: ${{ secrets.PASHUB_EMAIL }} TF_VAR_pashub_password: ${{ secrets.PASHUB_PASSWORD }} + TF_VAR_pashub_coordination_email: ${{ secrets.PASHUB_COORDINATION_EMAIL }} + TF_VAR_pashub_coordination_password: ${{ secrets.PASHUB_COORDINATION_PASSWORD }} # ============================================================ diff --git a/.github/workflows/lambda_smoke_tests.yml b/.github/workflows/lambda_smoke_tests.yml new file mode 100644 index 00000000..5ff5420a --- /dev/null +++ b/.github/workflows/lambda_smoke_tests.yml @@ -0,0 +1,107 @@ +name: Lambda Smoke Tests + +on: + pull_request: + branches: + - main + +jobs: + # ============================================================ + # Ara Engine + # ============================================================ + ara_engine_smoke_test: + uses: ./.github/workflows/_smoke_test_lambda.yml + with: + dockerfile_path: backend/docker/engine.Dockerfile + build_context: . + service_name: ara-engine + + # ============================================================ + # Address 2 UPRN + # ============================================================ + address2uprn_smoke_test: + uses: ./.github/workflows/_smoke_test_lambda.yml + with: + dockerfile_path: backend/address2UPRN/handler/Dockerfile + build_context: . + service_name: address2uprn + + # ============================================================ + # Postcode Splitter + # ============================================================ + postcode_splitter_smoke_test: + uses: ./.github/workflows/_smoke_test_lambda.yml + with: + dockerfile_path: backend/postcode_splitter/handler/Dockerfile + build_context: . + service_name: postcode-splitter + + # ============================================================ + # Bulk Address2UPRN Combiner + # ============================================================ + bulk_address2uprn_combiner_smoke_test: + uses: ./.github/workflows/_smoke_test_lambda.yml + with: + dockerfile_path: backend/bulk_address2uprn_combiner/handler/Dockerfile + build_context: . + service_name: bulk-address2uprn-combiner + + # ============================================================ + # Condition ETL + # ============================================================ + condition_etl_smoke_test: + uses: ./.github/workflows/_smoke_test_lambda.yml + with: + dockerfile_path: backend/condition/handler/Dockerfile + build_context: . + service_name: condition-etl + + # ============================================================ + # Categorisation + # ============================================================ + categorisation_smoke_test: + uses: ./.github/workflows/_smoke_test_lambda.yml + with: + dockerfile_path: backend/categorisation/handler/Dockerfile + build_context: . + service_name: categorisation + + # ============================================================ + # Ordnance Survey + # ============================================================ + ordnance_survey_smoke_test: + uses: ./.github/workflows/_smoke_test_lambda.yml + with: + dockerfile_path: backend/ordnanceSurvey/handler/Dockerfile + build_context: . + service_name: ordnance-survey + + # ============================================================ + # Pas Hub Fetcher + # ============================================================ + pashub_smoke_test: + uses: ./.github/workflows/_smoke_test_lambda.yml + with: + dockerfile_path: backend/pashub_fetcher/handler/Dockerfile + build_context: . + service_name: pashub + + # ============================================================ + # MagicPlan + # ============================================================ + magic_plan_smoke_test: + uses: ./.github/workflows/_smoke_test_lambda.yml + with: + dockerfile_path: backend/magic_plan/handler/Dockerfile + build_context: . + service_name: magic-plan + + # ============================================================ + # HubSpot Scraper + # ============================================================ + hubspot_scraper_smoke_test: + uses: ./.github/workflows/_smoke_test_lambda.yml + with: + dockerfile_path: etl/hubspot/scripts/scraper/handler/Dockerfile + build_context: . + service_name: hubspot-scraper diff --git a/backend/address2UPRN/tests/test_csv.py b/backend/address2UPRN/tests/test_csv.py index 73d94388..5c97e691 100644 --- a/backend/address2UPRN/tests/test_csv.py +++ b/backend/address2UPRN/tests/test_csv.py @@ -12,12 +12,21 @@ FIXTURE_PATH = Path(__file__).parent / "test_data.csv" # Each parametrized case fires at least one EPC request; without throttling, # GitHub-hosted runners burst fast enough to hit 429s. EPC_THROTTLE_SECONDS = 1.0 +EPC_LONG_PAUSE_EVERY = 100 +EPC_LONG_PAUSE_SECONDS = 5.0 + +_epc_request_count = 0 @pytest.fixture(autouse=True) def _throttle_epc_requests(): + global _epc_request_count yield - time.sleep(EPC_THROTTLE_SECONDS) + _epc_request_count += 1 + if _epc_request_count % EPC_LONG_PAUSE_EVERY == 0: + time.sleep(EPC_LONG_PAUSE_SECONDS) + else: + time.sleep(EPC_THROTTLE_SECONDS) def load_test_cases(): diff --git a/backend/address2UPRN/tests/test_data.csv b/backend/address2UPRN/tests/test_data.csv index 408edc29..1c1ce58a 100644 --- a/backend/address2UPRN/tests/test_data.csv +++ b/backend/address2UPRN/tests/test_data.csv @@ -364,4 +364,7 @@ FLAT B 158 LEAHURST ROAD,SE13 5NL,100021976974 164a Victoria Square,M4 5FA,77211315 165a Victoria Square,M4 5FA,77211316 166a Victoria Square,M4 5FA,None -"FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY",CR2 7DL,None \ No newline at end of file +"FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY",CR2 7DL,None +71A Stoneleigh Avenue,NE12 8NP,None +71B Stoneleigh Avenue,NE12 8NP,None +71 Stoneleigh Avenue,NE12 8NP,47086009 \ No newline at end of file diff --git a/backend/app/config.py b/backend/app/config.py index bdfc9ace..fcfb6d5b 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -86,6 +86,8 @@ class Settings(BaseSettings): # Pas Hub PASHUB_EMAIL: Optional[str] = None PASHUB_PASSWORD: Optional[str] = None + PASHUB_COORDINATION_EMAIL: Optional[str] = None + PASHUB_COORDINATION_PASSWORD: Optional[str] = None # Optional AWS creds (only required in local) AWS_ACCESS_KEY_ID: Optional[str] = None diff --git a/backend/condition/handler/Dockerfile b/backend/condition/handler/Dockerfile index 71556895..fa130573 100644 --- a/backend/condition/handler/Dockerfile +++ b/backend/condition/handler/Dockerfile @@ -32,6 +32,7 @@ COPY utils/ utils/ COPY backend/condition/ backend/condition/ COPY backend/app/db/models/condition.py backend/app/db/models/condition.py +COPY backend/app/db/base.py backend/app/db/base.py COPY backend/app/db/connection.py backend/app/db/connection.py COPY backend/app/config.py backend/app/config.py diff --git a/backend/pashub_fetcher/handler/handler.py b/backend/pashub_fetcher/handler/handler.py index cd0c8113..626ce59d 100644 --- a/backend/pashub_fetcher/handler/handler.py +++ b/backend/pashub_fetcher/handler/handler.py @@ -1,9 +1,11 @@ -from typing import Any, Dict, List +from typing import Any, Callable, Dict, List, Optional from backend.app.config import get_settings -from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError +from backend.pashub_fetcher.pashub_client import PashubClient from backend.pashub_fetcher.pashub_service import PashubService -from backend.pashub_fetcher.pashub_to_ara_trigger_request import PashubToAraTriggerRequest +from backend.pashub_fetcher.pashub_to_ara_trigger_request import ( + PashubToAraTriggerRequest, +) from backend.pashub_fetcher.token_getter import get_token_from_local_storage from backend.app.db.models.tasks import SourceEnum from backend.utils.subtasks import task_handler @@ -28,38 +30,41 @@ def handler(body: Dict[str, Any], context: Any) -> List[str]: settings = get_settings() - pas_hub_email = settings.PASHUB_EMAIL - pas_hub_password = settings.PASHUB_PASSWORD + pashub_email = settings.PASHUB_EMAIL + pashub_password = settings.PASHUB_PASSWORD - if (not pas_hub_email) or (not pas_hub_password): + coordination_hub_email = settings.PASHUB_COORDINATION_EMAIL + coordination_hub_password = settings.PASHUB_COORDINATION_PASSWORD + coordination_client_factory: Optional[Callable[[], PashubClient]] = None + + if (not pashub_email) or (not pashub_password): raise ValueError("Pas Hub credentials not provided") sharepoint_client = DomnaSharepointClient( sharepoint_location=DomnaSites.SOCIAL_HOUSING_WAVE_3 ) + if coordination_hub_email and coordination_hub_password: + _coord_email, _coord_password = ( + coordination_hub_email, + coordination_hub_password, + ) + coordination_client_factory = lambda: get_pashub_client( + _coord_email, _coord_password + ) + logger.debug("Validating request body") payload = PashubToAraTriggerRequest.model_validate(body) logger.debug("Successfully validated request body") service = PashubService( - pashub_client=get_pashub_client(pas_hub_email, pas_hub_password), + pashub_client=get_pashub_client(pashub_email, pashub_password), sharepoint_client=sharepoint_client, s3_bucket=S3_BUCKET, + coordination_client_factory=coordination_client_factory, ) - try: - files: List[str] = service.run(payload) - except UnauthorizedError: - logger.warning("Token expired - refreshing") - - service = PashubService( - pashub_client=get_pashub_client(pas_hub_email, pas_hub_password), - sharepoint_client=sharepoint_client, - s3_bucket=S3_BUCKET, - ) - - files = service.run(payload) + files: List[str] = service.run(payload) logger.info(f"Saved {len(files)} files") diff --git a/backend/pashub_fetcher/pashub_service.py b/backend/pashub_fetcher/pashub_service.py index b3302fd9..13498a32 100644 --- a/backend/pashub_fetcher/pashub_service.py +++ b/backend/pashub_fetcher/pashub_service.py @@ -1,6 +1,6 @@ import os from datetime import datetime, timezone -from typing import List, NamedTuple, Optional, cast +from typing import Callable, List, NamedTuple, Optional, cast from backend.app.db.connection import db_session from backend.app.db.models.uploaded_file import ( @@ -11,7 +11,7 @@ from backend.app.db.models.uploaded_file import ( from backend.documents_parser.db_writer import save_epc_property_data from backend.documents_parser.parser import parse_site_notes_pdf from backend.pashub_fetcher.core_files import get_file_type_string -from backend.pashub_fetcher.pashub_client import PashubClient +from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError from backend.pashub_fetcher.pashub_to_ara_trigger_request import ( PashubToAraTriggerRequest, ) @@ -36,17 +36,36 @@ class PashubService: pashub_client: PashubClient, sharepoint_client: DomnaSharepointClient, s3_bucket: str, + coordination_client_factory: Optional[Callable[[], PashubClient]] = None, ) -> None: self._pashub_client = pashub_client self._sharepoint_client = sharepoint_client self._s3_bucket = s3_bucket + self._coordination_client_factory = coordination_client_factory + self._coordination_client: Optional[PashubClient] = None + + def _get_coordination_client(self) -> PashubClient: + if self._coordination_client_factory is None: + raise UnauthorizedError("No coordination client factory configured") + if self._coordination_client is None: + self._coordination_client = self._coordination_client_factory() + return self._coordination_client def run(self, request: PashubToAraTriggerRequest) -> List[str]: job_id = request.pashub_job_id + active_client = self._pashub_client + + if request.uprn: + uprn: Optional[str] = request.uprn + else: + try: + uprn = active_client.get_uprn_by_job_id(job_id) + logger.info(f"Failed to access job {job_id} with PasHub credentials") + except UnauthorizedError: + logger.info(f"Trying CoordinationHub credentials for job {job_id}") + active_client = self._get_coordination_client() + uprn = active_client.get_uprn_by_job_id(job_id) - uprn: Optional[str] = request.uprn or self._pashub_client.get_uprn_by_job_id( - job_id - ) hubspot_deal_id: Optional[str] = request.hubspot_deal_id if uprn: @@ -54,9 +73,15 @@ class PashubService: else: logger.info(f"No UPRN found for job {job_id}") - job_files: List[str] = self._pashub_client.get_core_evidence_files_by_job_id( - job_id - ) + try: + job_files: List[str] = active_client.get_core_evidence_files_by_job_id( + job_id + ) + except UnauthorizedError: + if active_client is not self._pashub_client: + raise + active_client = self._get_coordination_client() + job_files = active_client.get_core_evidence_files_by_job_id(job_id) if uprn or hubspot_deal_id: logger.info("Uploading files to s3") diff --git a/backend/pashub_fetcher/tests/test_pashub_service.py b/backend/pashub_fetcher/tests/test_pashub_service.py index 2aff416b..991d2a46 100644 --- a/backend/pashub_fetcher/tests/test_pashub_service.py +++ b/backend/pashub_fetcher/tests/test_pashub_service.py @@ -1,8 +1,9 @@ -from typing import Optional +import pytest +from typing import Callable, Optional from unittest.mock import MagicMock, call, patch -from backend.pashub_fetcher.pashub_client import PashubClient +from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError from backend.pashub_fetcher.pashub_service import PashubService from backend.pashub_fetcher.pashub_to_ara_trigger_request import ( PashubToAraTriggerRequest, @@ -31,11 +32,13 @@ def make_service( pashub_client: Optional[PashubClient] = None, sharepoint_client: Optional[DomnaSharepointClient] = None, s3_bucket: str = "test-bucket", + coordination_client_factory: Optional[Callable[[], PashubClient]] = None, ) -> PashubService: return PashubService( pashub_client=pashub_client or MagicMock(spec=PashubClient), sharepoint_client=sharepoint_client or MagicMock(spec=DomnaSharepointClient), s3_bucket=s3_bucket, + coordination_client_factory=coordination_client_factory, ) @@ -225,6 +228,84 @@ def test_run_parses_and_saves_site_notes_for_rd_sap_site_note_file() -> None: # --------------------------------------------------------------------------- +# --------------------------------------------------------------------------- +# run(): coordination fallback +# --------------------------------------------------------------------------- + + +def test_run_uses_coordination_client_when_pas_401_on_uprn_lookup() -> None: + pas_client = MagicMock(spec=PashubClient) + pas_client.get_uprn_by_job_id.side_effect = UnauthorizedError() + + coord_client = MagicMock(spec=PashubClient) + coord_client.get_uprn_by_job_id.return_value = "99999" + coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"] + + factory = MagicMock(return_value=coord_client) + + service = make_service(pashub_client=pas_client, coordination_client_factory=factory) + + with ( + patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"), + patch("backend.pashub_fetcher.pashub_service.db_session"), + patch("backend.pashub_fetcher.pashub_service.os.remove"), + ): + result = service.run(make_request()) + + assert result == ["/tmp/a.pdf"] + coord_client.get_uprn_by_job_id.assert_called_once() + coord_client.get_core_evidence_files_by_job_id.assert_called_once() + assert factory.call_count == 1 + + +def test_run_uses_coordination_client_when_pas_401_on_file_listing() -> None: + pas_client = MagicMock(spec=PashubClient) + pas_client.get_core_evidence_files_by_job_id.side_effect = UnauthorizedError() + + coord_client = MagicMock(spec=PashubClient) + coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"] + + factory = MagicMock(return_value=coord_client) + + service = make_service(pashub_client=pas_client, coordination_client_factory=factory) + + with ( + patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"), + patch("backend.pashub_fetcher.pashub_service.db_session"), + patch("backend.pashub_fetcher.pashub_service.os.remove"), + ): + result = service.run(make_request(uprn="12345")) + + assert result == ["/tmp/a.pdf"] + coord_client.get_core_evidence_files_by_job_id.assert_called_once() + pas_client.get_uprn_by_job_id.assert_not_called() + + +def test_run_raises_unauthorized_when_pas_401_and_no_factory() -> None: + pas_client = MagicMock(spec=PashubClient) + pas_client.get_uprn_by_job_id.side_effect = UnauthorizedError() + + service = make_service(pashub_client=pas_client) + + with pytest.raises(UnauthorizedError): + service.run(make_request()) + + +def test_run_raises_unauthorized_when_both_clients_401() -> None: + pas_client = MagicMock(spec=PashubClient) + pas_client.get_uprn_by_job_id.side_effect = UnauthorizedError() + + coord_client = MagicMock(spec=PashubClient) + coord_client.get_uprn_by_job_id.side_effect = UnauthorizedError() + + factory = MagicMock(return_value=coord_client) + + service = make_service(pashub_client=pas_client, coordination_client_factory=factory) + + with pytest.raises(UnauthorizedError): + service.run(make_request()) + + def test_run_warns_and_continues_when_site_notes_parsing_fails() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None diff --git a/infrastructure/terraform/lambda/pashub_to_ara/main.tf b/infrastructure/terraform/lambda/pashub_to_ara/main.tf index 902d7845..eba9c874 100644 --- a/infrastructure/terraform/lambda/pashub_to_ara/main.tf +++ b/infrastructure/terraform/lambda/pashub_to_ara/main.tf @@ -49,6 +49,8 @@ module "lambda" { SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID = var.social_housing_wave_3_sharepoint_id PASHUB_EMAIL = var.pashub_email PASHUB_PASSWORD = var.pashub_password + PASHUB_COORDINATION_EMAIL = var.pashub_coordination_email + PASHUB_COORDINATION_PASSWORD = var.pashub_coordination_password } } diff --git a/infrastructure/terraform/lambda/pashub_to_ara/variables.tf b/infrastructure/terraform/lambda/pashub_to_ara/variables.tf index 0e99d378..cdeff256 100644 --- a/infrastructure/terraform/lambda/pashub_to_ara/variables.tf +++ b/infrastructure/terraform/lambda/pashub_to_ara/variables.tf @@ -100,4 +100,16 @@ variable "pashub_email" { variable "pashub_password" { type = string sensitive = true +} + +variable "pashub_coordination_email" { + type = string + sensitive = true + default = null +} + +variable "pashub_coordination_password" { + type = string + sensitive = true + default = null } \ No newline at end of file