mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
parse uri
This commit is contained in:
parent
9dc5e0b984
commit
7911bb4db0
1 changed files with 20 additions and 5 deletions
|
|
@ -17,15 +17,30 @@ from backend.address2UPRN.main import (
|
|||
logger = setup_logger()
|
||||
|
||||
|
||||
def parse_s3_console_url(s3_uri: str) -> tuple[str, str]:
|
||||
def parse_s3_uri(s3_uri: str) -> tuple[str, str]:
|
||||
"""
|
||||
Parse AWS console S3 URL to extract bucket and key.
|
||||
Parse S3 URI to extract bucket and key.
|
||||
|
||||
Format: https://account-id-hash.region.console.aws.amazon.com/s3/object/bucket?region=...&prefix=path
|
||||
Supports two formats:
|
||||
1. S3 URI format: s3://bucket/key
|
||||
2. AWS console URL: https://account-id-hash.region.console.aws.amazon.com/s3/object/bucket?region=...&prefix=path
|
||||
"""
|
||||
logger.info("Parsing S3 console URL")
|
||||
logger.info("Parsing S3 URI")
|
||||
|
||||
try:
|
||||
# Check if it's an S3 URI format
|
||||
if s3_uri.startswith("s3://"):
|
||||
parts = s3_uri[5:].split("/", 1)
|
||||
if len(parts) < 2:
|
||||
raise ValueError("S3 URI must include both bucket and key")
|
||||
bucket = parts[0]
|
||||
key = parts[1]
|
||||
logger.info(f"Extracted bucket: {bucket}, key: {key}")
|
||||
return bucket, key
|
||||
|
||||
# Otherwise, treat as AWS console URL
|
||||
logger.info("Parsing as AWS console URL")
|
||||
|
||||
# Split base URL and query string
|
||||
if "?" not in s3_uri:
|
||||
raise ValueError("No query string found")
|
||||
|
|
@ -215,7 +230,7 @@ def handler(event, context):
|
|||
|
||||
# Read CSV from S3
|
||||
logger.info(f"Processing S3 URI: {s3_uri}")
|
||||
bucket, key = parse_s3_console_url(s3_uri)
|
||||
bucket, key = parse_s3_uri(s3_uri)
|
||||
logger.info(f"S3 Bucket: {bucket}, Key: {key}")
|
||||
|
||||
csv_data = read_csv_from_s3_dict(bucket, key)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue