From a94e5ca592fd1e83d320bc2d8ae0bf2c34996282 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 10 Feb 2026 08:04:57 +0000 Subject: [PATCH] s3 url processing --- backend/postcode_splitter/main.py | 43 ++++++++++++------------------- 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index 1d0e56a0..adb8e5c9 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -23,41 +23,32 @@ def parse_s3_console_url(s3_uri: str) -> tuple[str, str]: Format: https://account-id-hash.region.console.aws.amazon.com/s3/object/bucket?region=...&prefix=path """ - logger.info(f"Parsing S3 URI: {s3_uri}") - - if "console.aws.amazon.com" not in s3_uri: - logger.error("URI does not contain 'console.aws.amazon.com'") - raise ValueError(f"Could not parse S3 URI: {s3_uri}") - - if "?prefix=" not in s3_uri: - logger.error("URI does not contain '?prefix='") - raise ValueError(f"Could not parse S3 URI: {s3_uri}") + logger.info("Parsing S3 console URL") try: + # Split base URL and query string + if "?" not in s3_uri: + raise ValueError("No query string found") + base, query = s3_uri.split("?", 1) - logger.debug(f"Base: {base}") - logger.debug(f"Query: {query}") + + # Extract bucket from base URL + if "/s3/object/" not in base: + raise ValueError("No '/s3/object/' found in URL path") path_parts = base.split("/s3/object/") - logger.debug(f"Path parts: {path_parts}") + bucket = path_parts[1] + logger.info(f"Extracted bucket: {bucket}") - if len(path_parts) > 1: - bucket = path_parts[1] - logger.info(f"Extracted bucket: {bucket}") + # Extract prefix from query parameters + params = dict(item.split("=") for item in query.split("&") if "=" in item) + key = unquote(params.get("prefix", "")) + logger.info(f"Extracted key: {key}") - params = dict(item.split("=") for item in query.split("&") if "=" in item) - logger.debug(f"Query params: {params}") - - key = unquote(params.get("prefix", "")) - logger.info(f"Extracted key: {key}") - - return bucket, key - else: - logger.error(f"Could not find '/s3/object/' in URI") - raise ValueError(f"Could not parse S3 URI: {s3_uri}") + return bucket, key except Exception as e: logger.error(f"Error parsing S3 URI: {type(e).__name__}: {e}") - raise ValueError(f"Could not parse S3 URI: {s3_uri}") from e + raise ValueError(f"Could not parse S3 URI") from e def sanitise_postcode(postcode: str) -> str | None: