From 7911bb4db0746f94bd7f01c7e82f8ffdc47c39bc Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 10 Feb 2026 21:08:39 +0000 Subject: [PATCH] parse uri --- backend/postcode_splitter/main.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index adb8e5c9..5a63d920 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -17,15 +17,30 @@ from backend.address2UPRN.main import ( logger = setup_logger() -def parse_s3_console_url(s3_uri: str) -> tuple[str, str]: +def parse_s3_uri(s3_uri: str) -> tuple[str, str]: """ - Parse AWS console S3 URL to extract bucket and key. + Parse S3 URI to extract bucket and key. - Format: https://account-id-hash.region.console.aws.amazon.com/s3/object/bucket?region=...&prefix=path + Supports two formats: + 1. S3 URI format: s3://bucket/key + 2. AWS console URL: https://account-id-hash.region.console.aws.amazon.com/s3/object/bucket?region=...&prefix=path """ - logger.info("Parsing S3 console URL") + logger.info("Parsing S3 URI") try: + # Check if it's an S3 URI format + if s3_uri.startswith("s3://"): + parts = s3_uri[5:].split("/", 1) + if len(parts) < 2: + raise ValueError("S3 URI must include both bucket and key") + bucket = parts[0] + key = parts[1] + logger.info(f"Extracted bucket: {bucket}, key: {key}") + return bucket, key + + # Otherwise, treat as AWS console URL + logger.info("Parsing as AWS console URL") + # Split base URL and query string if "?" not in s3_uri: raise ValueError("No query string found") @@ -215,7 +230,7 @@ def handler(event, context): # Read CSV from S3 logger.info(f"Processing S3 URI: {s3_uri}") - bucket, key = parse_s3_console_url(s3_uri) + bucket, key = parse_s3_uri(s3_uri) logger.info(f"S3 Bucket: {bucket}, Key: {key}") csv_data = read_csv_from_s3_dict(bucket, key)