diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 5ac4cee4..2077f686 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -372,17 +372,10 @@ jobs: ecr_repo: pashub_to_ara-${{ needs.determine_stage.outputs.stage }} dockerfile_path: backend/pashub_fetcher/handler/Dockerfile build_context: . - build_args: | - DEV_DB_HOST=$DEV_DB_HOST - DEV_DB_PORT=$DEV_DB_PORT - DEV_DB_NAME=$DEV_DB_NAME secrets: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} AWS_REGION: ${{ secrets.DEV_AWS_REGION }} - DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }} - DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }} - DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }} # ============================================================ @@ -402,6 +395,9 @@ jobs: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + TF_VAR_db_host: ${{ secrets.DEV_DB_HOST }} + TF_VAR_db_name: ${{ secrets.DEV_DB_NAME }} + TF_VAR_db_port: ${{ secrets.DEV_DB_PORT }} TF_VAR_sharepoint_client_id: ${{ secrets.SHAREPOINT_CLIENT_ID }} TF_VAR_sharepoint_client_secret: ${{ secrets.SHAREPOINT_CLIENT_SECRET }} TF_VAR_sharepoint_tenant_id: ${{ secrets.SHAREPOINT_TENANT_ID }} diff --git a/backend/app/bulk_uploads/router.py b/backend/app/bulk_uploads/router.py index 1e341790..9928b456 100644 --- a/backend/app/bulk_uploads/router.py +++ b/backend/app/bulk_uploads/router.py @@ -16,7 +16,6 @@ from backend.app.bulk_uploads.schema import ( PostcodeSplitterTriggerRequest, ) from backend.app.bulk_uploads.scoring import score_bucket -from utils.s3 import parse_s3_uri, read_csv_from_s3 ADDRESS_COLS = ("Address 1", "Address 2", "Address 3", "postcode") INTERNAL_REF_COL = "Internal Reference" @@ -139,7 +138,8 @@ async def get_combined_results( matched_address_raw = _normalize(raw.get(MATCHED_ADDRESS_COL)) matched_address = ( None - if not matched_address_raw or matched_address_raw.lower() == MISSING_SENTINEL + if not matched_address_raw + or matched_address_raw.lower() == MISSING_SENTINEL else matched_address_raw ) @@ -176,4 +176,3 @@ async def get_combined_results( ), rows=rows, ) - diff --git a/backend/app/db/models/hubspot_deal_data.py b/backend/app/db/models/hubspot_deal_data.py index 758f688d..fa508fbe 100644 --- a/backend/app/db/models/hubspot_deal_data.py +++ b/backend/app/db/models/hubspot_deal_data.py @@ -38,6 +38,14 @@ class HubspotDealData(SQLModel, table=True): dampmould_growth: Optional[str] = Field(default=None) damp_mould_and_repairs_comments: Optional[str] = Field(default=None) pre_sap: Optional[str] = Field(default=None) + batch: Optional[str] = Field(default=None) + block_reference: Optional[str] = Field(default=None) + epc_prn: Optional[str] = Field(default=None) + potential_post_sap_score_dropdown: Optional[str] = Field(default=None) + ei_score: Optional[str] = Field(default=None) + ei_score__potential_: Optional[str] = Field(default=None) + epc_sap_score: Optional[str] = Field(default=None) + epc_sap_score__potential_: Optional[str] = Field(default=None) coordinator: Optional[str] = Field(default=None) mtp_completion_date: Optional[datetime] = Field(default=None) mtp_re_model_completion_date: Optional[datetime] = Field(default=None) diff --git a/backend/pashub_fetcher/token_getter.py b/backend/pashub_fetcher/token_getter.py index 5534d114..2e2d1440 100644 --- a/backend/pashub_fetcher/token_getter.py +++ b/backend/pashub_fetcher/token_getter.py @@ -7,24 +7,40 @@ from utils.logger import setup_logger logger = setup_logger() -def get_token_from_local_storage(email: str, password: str) -> str: +def get_token_from_local_storage( + email: str, password: str, record_video: bool = False +) -> str: logger.info("Starting Playwright flow") - # For local testing / debugging, we save videos of the flow - video_dir = os.path.join(os.path.dirname(__file__), "videos") - os.makedirs(video_dir, exist_ok=True) - with sync_playwright() as p: + logger.info("Playwright server started") + browser = p.chromium.launch( headless=True, - args=["--no-sandbox", "--disable-dev-shm-usage"], + args=[ + "--no-sandbox", + "--disable-dev-shm-usage", + "--disable-gpu", + "--single-process", + "--no-zygote", + ], ) + logger.info("Chromium launched successfully") - context = browser.new_context( - record_video_dir=video_dir, - record_video_size={"width": 1280, "height": 720}, - ) + video_dir = None + if record_video: + video_dir = os.path.join(os.path.dirname(__file__), "videos") + os.makedirs(video_dir, exist_ok=True) + + if record_video: + context = browser.new_context( + record_video_dir=video_dir, + record_video_size={"width": 1280, "height": 720}, + ) + else: + context = browser.new_context() page = context.new_page() + logger.info("Page created") try: logger.info("Navigating to site...") @@ -71,8 +87,8 @@ def get_token_from_local_storage(email: str, password: str) -> str: raise Exception(f"Unexpected error: {str(e)}") finally: - logger.info("Closing browser context (saving video)...") context.close() browser.close() - logger.info(f"Video(s) saved in: {video_dir}") + if record_video and video_dir: + logger.info(f"Video(s) saved in: {video_dir}") diff --git a/etl/hubspot/hubspotClient.py b/etl/hubspot/hubspotClient.py index 6bdf71ed..df28e4d6 100644 --- a/etl/hubspot/hubspotClient.py +++ b/etl/hubspot/hubspotClient.py @@ -254,12 +254,12 @@ class HubspotClient: "sharepoint_link", "dampmould_growth", "damp_mould_and_repairs_comments", - "pre_sap", + "pre_sap_score_dropdown", "coordinator", "mtp_completion_date", "mtp_re_model_completion_date", "ioe_v3_completion_date", - "proposed_measures", + "proposed_measures_dropdown", "approved_package", "designer", "design_completion_date", @@ -275,6 +275,14 @@ class HubspotClient: "confirmed_survey_time", "surveyed_date", "design_type", + "batch", + "block_reference", + "epc_prn", + "potential_post_sap_score_dropdown", + "ei_score", + "ei_score__potential_", + "epc_sap_score", + "epc_sap_score__potential_", ], ) ) diff --git a/etl/hubspot/hubspotDataTodB.py b/etl/hubspot/hubspotDataTodB.py index 9756833b..a2eb24c2 100644 --- a/etl/hubspot/hubspotDataTodB.py +++ b/etl/hubspot/hubspotDataTodB.py @@ -159,7 +159,17 @@ class HubspotDataToDb: "damp_mould_and_repairs_comments": deal_data.get( "damp_mould_and_repairs_comments" ), - "pre_sap": deal_data.get("pre_sap"), + "pre_sap": deal_data.get("pre_sap_score_dropdown"), + "batch": deal_data.get("batch"), + "block_reference": deal_data.get("block_reference"), + "epc_prn": deal_data.get("epc_prn"), + "potential_post_sap_score_dropdown": deal_data.get( + "potential_post_sap_score_dropdown" + ), + "ei_score": deal_data.get("ei_score"), + "ei_score__potential_": deal_data.get("ei_score__potential_"), + "epc_sap_score": deal_data.get("epc_sap_score"), + "epc_sap_score__potential_": deal_data.get("epc_sap_score__potential_"), "coordinator": deal_data.get("coordinator"), "mtp_completion_date": parse_hs_date(deal_data.get("mtp_completion_date")), "mtp_re_model_completion_date": parse_hs_date( @@ -168,7 +178,7 @@ class HubspotDataToDb: "ioe_v3_completion_date": parse_hs_date( deal_data.get("ioe_v3_completion_date") ), - "proposed_measures": deal_data.get("proposed_measures"), + "proposed_measures": deal_data.get("proposed_measures_dropdown"), "approved_package": deal_data.get("approved_package"), "designer": deal_data.get("designer"), "design_completion_date": parse_hs_date( @@ -228,7 +238,17 @@ class HubspotDataToDb: damp_mould_and_repairs_comments=deal_data.get( "damp_mould_and_repairs_comments" ), - pre_sap=deal_data.get("pre_sap"), + pre_sap=deal_data.get("pre_sap_score_dropdown"), + batch=deal_data.get("batch"), + block_reference=deal_data.get("block_reference"), + epc_prn=deal_data.get("epc_prn"), + potential_post_sap_score_dropdown=deal_data.get( + "potential_post_sap_score_dropdown" + ), + ei_score=deal_data.get("ei_score"), + ei_score__potential_=deal_data.get("ei_score__potential_"), + epc_sap_score=deal_data.get("epc_sap_score"), + epc_sap_score__potential_=deal_data.get("epc_sap_score__potential_"), coordinator=deal_data.get("coordinator"), mtp_completion_date=parse_hs_date(deal_data.get("mtp_completion_date")), mtp_re_model_completion_date=parse_hs_date( @@ -237,7 +257,7 @@ class HubspotDataToDb: ioe_v3_completion_date=parse_hs_date( deal_data.get("ioe_v3_completion_date") ), - proposed_measures=deal_data.get("proposed_measures"), + proposed_measures=deal_data.get("proposed_measures_dropdown"), approved_package=deal_data.get("approved_package"), designer=deal_data.get("designer"), design_completion_date=parse_hs_date( diff --git a/etl/hubspot/hubspot_deal_differ.py b/etl/hubspot/hubspot_deal_differ.py index 74c8264d..fa5bbe42 100644 --- a/etl/hubspot/hubspot_deal_differ.py +++ b/etl/hubspot/hubspot_deal_differ.py @@ -62,9 +62,17 @@ class HubspotDealDiffer: "sharepoint_link": "sharepoint_link", "dampmould_growth": "dampmould_growth", "damp_mould_and_repairs_comments": "damp_mould_and_repairs_comments", - "pre_sap": "pre_sap", + "pre_sap_score_dropdown": "pre_sap", + "batch": "batch", + "block_reference": "block_reference", + "epc_prn": "epc_prn", + "potential_post_sap_score_dropdown": "potential_post_sap_score_dropdown", + "ei_score": "ei_score", + "ei_score__potential_": "ei_score__potential_", + "epc_sap_score": "epc_sap_score", + "epc_sap_score__potential_": "epc_sap_score__potential_", "coordinator": "coordinator", - "proposed_measures": "proposed_measures", + "proposed_measures_dropdown": "proposed_measures", "approved_package": "approved_package", "designer": "designer", "actual_measures_installed": "actual_measures_installed", @@ -158,7 +166,7 @@ class HubspotDealDiffer: @staticmethod def _design_completed(new_deal: Dict[str, str], old_deal: HubspotDealData) -> bool: - new_status: str = new_deal.get("coordination_status") or "" + new_status: str = new_deal.get("design_status") or "" return ( new_status != "" and new_status.lower() == HubspotDealDiffer.RETROFIT_DESIGN_COMPLETE @@ -169,7 +177,7 @@ class HubspotDealDiffer: def _lodgement_completed( new_deal: Dict[str, str], old_deal: HubspotDealData ) -> bool: - new_status: str = new_deal.get("coordination_status") or "" + new_status: str = new_deal.get("lodgement_status") or "" return ( new_status != "" and new_status.lower() in HubspotDealDiffer.LODGEMENT_COMPLETE diff --git a/etl/hubspot/scripts/scraper/bulk_load.py b/etl/hubspot/scripts/scraper/bulk_load.py index f0529905..91aa89e2 100644 --- a/etl/hubspot/scripts/scraper/bulk_load.py +++ b/etl/hubspot/scripts/scraper/bulk_load.py @@ -9,8 +9,8 @@ PIPELINE_ID = Pipeline.OPERATIONS_SOCIAL_HOUSING.value companies = list( [ # Companies.THE_GUINESS_PARTNERSHIP, - Companies.SOUTHERN_HOUSING_GROUP, - # Companies.CALICO_HOMES, + # Companies.SOUTHERN_HOUSING_GROUP, + Companies.CALICO_HOMES, ] ) diff --git a/etl/hubspot/tests/test_hubspot_deal_differ.py b/etl/hubspot/tests/test_hubspot_deal_differ.py index 69f7668b..0523c982 100644 --- a/etl/hubspot/tests/test_hubspot_deal_differ.py +++ b/etl/hubspot/tests/test_hubspot_deal_differ.py @@ -90,8 +90,8 @@ def test_pashub_trigger__pashub_link_changed__returns_true( @pytest.mark.parametrize( "coordination_status,expected", [ - ("v1 ioe/mtp complete", True), - ("v2 ioe/mtp complete", True), + ("(v1) ioe/mtp complete", True), + ("(v2) ioe/mtp complete", True), ], ) def test_pashub_trigger__coordination_completed_and_pashub_link_set__returns_true( diff --git a/infrastructure/terraform/lambda/_template/main.tf b/infrastructure/terraform/lambda/_template/main.tf index 81b1c7f1..1cd63ffe 100644 --- a/infrastructure/terraform/lambda/_template/main.tf +++ b/infrastructure/terraform/lambda/_template/main.tf @@ -36,6 +36,8 @@ module "lambda" { # Optional: Set maximum_concurrency to limit concurrent SQS-triggered invocations (2-1000) maximum_concurrency = var.maximum_concurrency + reserved_concurrent_executions = var.reserved_concurrent_executions + batch_size = var.batch_size environment = { diff --git a/infrastructure/terraform/lambda/_template/variables.tf b/infrastructure/terraform/lambda/_template/variables.tf index 0a3092ee..daaa0b7c 100644 --- a/infrastructure/terraform/lambda/_template/variables.tf +++ b/infrastructure/terraform/lambda/_template/variables.tf @@ -23,6 +23,12 @@ variable "maximum_concurrency" { description = "Maximum number of concurrent Lambda invocations from SQS (2-1000). null = no limit." } +variable "reserved_concurrent_executions" { + type = number + default = -1 + description = "Reserved concurrency for the Lambda function. -1 = unreserved. 1+ = hard limit across all triggers." +} + variable "batch_size" { type = number default = 1 diff --git a/infrastructure/terraform/lambda/pashub_to_ara/main.tf b/infrastructure/terraform/lambda/pashub_to_ara/main.tf index e898e949..ae719a99 100644 --- a/infrastructure/terraform/lambda/pashub_to_ara/main.tf +++ b/infrastructure/terraform/lambda/pashub_to_ara/main.tf @@ -7,6 +7,14 @@ data "terraform_remote_state" "shared" { } } +data "aws_secretsmanager_secret_version" "db_credentials" { + secret_id = "${var.stage}/assessment_model/db_credentials" +} + +locals { + db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string) +} + module "lambda" { source = "../../modules/lambda_with_sqs" @@ -18,10 +26,27 @@ module "lambda" { # Optional: Set maximum_concurrency to limit concurrent SQS-triggered invocations (2-1000) maximum_concurrency = var.maximum_concurrency + reserved_concurrent_executions = var.reserved_concurrent_executions + batch_size = var.batch_size environment = { STAGE = var.stage LOG_LEVEL = "info" + + DB_USERNAME = local.db_credentials.db_assessment_model_username + DB_PASSWORD = local.db_credentials.db_assessment_model_password + DB_HOST = var.db_host + DB_NAME = var.db_name + DB_PORT = var.db_port + + SHAREPOINT_CLIENT_ID = var.sharepoint_client_id + SHAREPOINT_CLIENT_SECRET = var.sharepoint_client_secret + DOMNA_SHAREPOINT_ID = var.domna_sharepoint_id + OSMOSIS_ACD_SHAREPOINT_ID = var.osmosis_acd_sharepoint_id + PRIVATE_PAY_SHAREPOINT_ID = var.private_pay_sharepoint_id + SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID = var.social_housing_wave_3_sharepoint_id + PASHUB_EMAIL = var.pashub_email + PASHUB_PASSWORD = var.pashub_password } } diff --git a/infrastructure/terraform/lambda/pashub_to_ara/variables.tf b/infrastructure/terraform/lambda/pashub_to_ara/variables.tf index e7646811..e68a26b6 100644 --- a/infrastructure/terraform/lambda/pashub_to_ara/variables.tf +++ b/infrastructure/terraform/lambda/pashub_to_ara/variables.tf @@ -23,6 +23,12 @@ variable "maximum_concurrency" { description = "Maximum number of concurrent Lambda invocations from SQS (2-1000). null = no limit." } +variable "reserved_concurrent_executions" { + type = number + default = 1 + description = "Reserved concurrency. Defaults to 1 to prevent concurrent Playwright browser collisions." +} + variable "batch_size" { type = number default = 1 @@ -35,3 +41,58 @@ locals { output "resolved_image_uri" { value = local.image_uri } + +variable "db_host" { + type = string + sensitive = true +} + +variable "db_name" { + type = string + sensitive = true +} + +variable "db_port" { + type = string + sensitive = true +} + +variable "sharepoint_client_id" { + type = string + sensitive = true +} + +variable "sharepoint_client_secret" { + type = string + sensitive = true +} + +variable "domna_sharepoint_id" { + type = string + sensitive = true +} + +variable "osmosis_acd_sharepoint_id" { + type = string + sensitive = true +} + +variable "private_pay_sharepoint_id" { + type = string + sensitive = true +} + +variable "social_housing_wave_3_sharepoint_id" { + type = string + sensitive = true +} + +variable "pashub_email" { + type = string + sensitive = true +} + +variable "pashub_password" { + type = string + sensitive = true +} \ No newline at end of file diff --git a/infrastructure/terraform/modules/lambda_service/main.tf b/infrastructure/terraform/modules/lambda_service/main.tf index 8a159db1..3250110b 100644 --- a/infrastructure/terraform/modules/lambda_service/main.tf +++ b/infrastructure/terraform/modules/lambda_service/main.tf @@ -9,6 +9,8 @@ resource "aws_lambda_function" "this" { memory_size = var.memory_size publish = true + reserved_concurrent_executions = var.reserved_concurrent_executions + environment { variables = var.environment } diff --git a/infrastructure/terraform/modules/lambda_service/variables.tf b/infrastructure/terraform/modules/lambda_service/variables.tf index 43def6ad..46241f30 100644 --- a/infrastructure/terraform/modules/lambda_service/variables.tf +++ b/infrastructure/terraform/modules/lambda_service/variables.tf @@ -16,3 +16,9 @@ variable "environment" { type = map(string) default = {} } + +variable "reserved_concurrent_executions" { + type = number + default = -1 + description = "Reserved concurrency for the Lambda function. -1 = unreserved (default). 0 = throttle all. 1+ = hard limit." +} diff --git a/infrastructure/terraform/modules/lambda_with_sqs/main.tf b/infrastructure/terraform/modules/lambda_with_sqs/main.tf index 35626487..97f86793 100644 --- a/infrastructure/terraform/modules/lambda_with_sqs/main.tf +++ b/infrastructure/terraform/modules/lambda_with_sqs/main.tf @@ -31,7 +31,8 @@ module "lambda" { timeout = var.timeout memory_size = var.memory_size - environment = var.environment + environment = var.environment + reserved_concurrent_executions = var.reserved_concurrent_executions } ############################################ diff --git a/infrastructure/terraform/modules/lambda_with_sqs/variables.tf b/infrastructure/terraform/modules/lambda_with_sqs/variables.tf index 7c2832d2..90585e92 100644 --- a/infrastructure/terraform/modules/lambda_with_sqs/variables.tf +++ b/infrastructure/terraform/modules/lambda_with_sqs/variables.tf @@ -40,3 +40,9 @@ variable "maximum_concurrency" { default = null description = "Maximum number of concurrent Lambda invocations from SQS. null = no limit." } + +variable "reserved_concurrent_executions" { + type = number + default = -1 + description = "Reserved concurrency for the Lambda function. -1 = unreserved. 1 = single-threaded." +}