Model/infrastructure/s3/s3_uri.py

25 lines
888 B
Python

from urllib.parse import unquote
def parse_s3_uri(s3_uri: str) -> tuple[str, str]:
if s3_uri.startswith("s3://"):
parts = s3_uri[len("s3://") :].split("/", 1)
if len(parts) < 2 or not parts[0] or not parts[1]:
raise ValueError("S3 URI must include both a bucket and a key")
return parts[0], parts[1]
if "?" not in s3_uri:
raise ValueError(f"Not an s3:// URI and has no query string: {s3_uri!r}")
base, query = s3_uri.split("?", 1)
if "/s3/object/" not in base:
raise ValueError(f"Console URL has no '/s3/object/' segment: {s3_uri!r}")
bucket = base.split("/s3/object/", 1)[1]
params: dict[str, str] = {}
for item in query.split("&"):
if "=" in item:
name, value = item.split("=", 1)
params[name] = value
key = unquote(params.get("prefix", ""))
return bucket, key