parse uri

This commit is contained in:
Jun-te Kim 2026-02-10 21:08:39 +00:00
parent 9dc5e0b984
commit 7911bb4db0

View file

@ -17,15 +17,30 @@ from backend.address2UPRN.main import (
logger = setup_logger()
def parse_s3_console_url(s3_uri: str) -> tuple[str, str]:
def parse_s3_uri(s3_uri: str) -> tuple[str, str]:
"""
Parse AWS console S3 URL to extract bucket and key.
Parse S3 URI to extract bucket and key.
Format: https://account-id-hash.region.console.aws.amazon.com/s3/object/bucket?region=...&prefix=path
Supports two formats:
1. S3 URI format: s3://bucket/key
2. AWS console URL: https://account-id-hash.region.console.aws.amazon.com/s3/object/bucket?region=...&prefix=path
"""
logger.info("Parsing S3 console URL")
logger.info("Parsing S3 URI")
try:
# Check if it's an S3 URI format
if s3_uri.startswith("s3://"):
parts = s3_uri[5:].split("/", 1)
if len(parts) < 2:
raise ValueError("S3 URI must include both bucket and key")
bucket = parts[0]
key = parts[1]
logger.info(f"Extracted bucket: {bucket}, key: {key}")
return bucket, key
# Otherwise, treat as AWS console URL
logger.info("Parsing as AWS console URL")
# Split base URL and query string
if "?" not in s3_uri:
raise ValueError("No query string found")
@ -215,7 +230,7 @@ def handler(event, context):
# Read CSV from S3
logger.info(f"Processing S3 URI: {s3_uri}")
bucket, key = parse_s3_console_url(s3_uri)
bucket, key = parse_s3_uri(s3_uri)
logger.info(f"S3 Bucket: {bucket}, Key: {key}")
csv_data = read_csv_from_s3_dict(bucket, key)