s3 url processing

This commit is contained in:
Jun-te Kim 2026-02-10 08:04:57 +00:00
parent 8121e6d5b6
commit a94e5ca592

View file

@ -23,41 +23,32 @@ def parse_s3_console_url(s3_uri: str) -> tuple[str, str]:
Format: https://account-id-hash.region.console.aws.amazon.com/s3/object/bucket?region=...&prefix=path
"""
logger.info(f"Parsing S3 URI: {s3_uri}")
if "console.aws.amazon.com" not in s3_uri:
logger.error("URI does not contain 'console.aws.amazon.com'")
raise ValueError(f"Could not parse S3 URI: {s3_uri}")
if "?prefix=" not in s3_uri:
logger.error("URI does not contain '?prefix='")
raise ValueError(f"Could not parse S3 URI: {s3_uri}")
logger.info("Parsing S3 console URL")
try:
# Split base URL and query string
if "?" not in s3_uri:
raise ValueError("No query string found")
base, query = s3_uri.split("?", 1)
logger.debug(f"Base: {base}")
logger.debug(f"Query: {query}")
# Extract bucket from base URL
if "/s3/object/" not in base:
raise ValueError("No '/s3/object/' found in URL path")
path_parts = base.split("/s3/object/")
logger.debug(f"Path parts: {path_parts}")
bucket = path_parts[1]
logger.info(f"Extracted bucket: {bucket}")
if len(path_parts) > 1:
bucket = path_parts[1]
logger.info(f"Extracted bucket: {bucket}")
# Extract prefix from query parameters
params = dict(item.split("=") for item in query.split("&") if "=" in item)
key = unquote(params.get("prefix", ""))
logger.info(f"Extracted key: {key}")
params = dict(item.split("=") for item in query.split("&") if "=" in item)
logger.debug(f"Query params: {params}")
key = unquote(params.get("prefix", ""))
logger.info(f"Extracted key: {key}")
return bucket, key
else:
logger.error(f"Could not find '/s3/object/' in URI")
raise ValueError(f"Could not parse S3 URI: {s3_uri}")
return bucket, key
except Exception as e:
logger.error(f"Error parsing S3 URI: {type(e).__name__}: {e}")
raise ValueError(f"Could not parse S3 URI: {s3_uri}") from e
raise ValueError(f"Could not parse S3 URI") from e
def sanitise_postcode(postcode: str) -> str | None: