Model/etl/hubspot/s3_uploader.py
2026-03-31 10:42:02 +00:00

116 lines
3.7 KiB
Python

import os
import boto3
from botocore.exceptions import ClientError
from urllib.parse import urlparse
from datetime import datetime
import requests
class S3Uploader:
"""
Simple helper to upload local files to S3 and return their S3 HTTPS URI.
"""
def __init__(
self,
aws_access_key: str,
aws_secret_key: str,
region: str = "eu-west-2",
):
self.aws_access_key = aws_access_key
self.aws_secret_key = aws_secret_key
self.region = region
self.s3 = boto3.client(
"s3",
aws_access_key_id=self.aws_access_key,
aws_secret_access_key=self.aws_secret_key,
region_name=self.region,
)
def upload_file(self, file_path: str, bucket: str, prefix: str = "uploads/") -> str:
"""
Upload a local file to an S3 bucket and return its HTTPS URI.
Args:
file_path (str): Path to the local file.
bucket (str): S3 bucket name.
prefix (str): Folder/prefix in the bucket.
Returns:
str: HTTPS-style S3 URI (not signed).
"""
try:
filename = os.path.basename(file_path)
timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
s3_key = os.path.join(prefix, f"{timestamp}_{filename}")
self.s3.upload_file(file_path, bucket, s3_key)
s3_uri = f"https://{bucket}.s3.{self.region}.amazonaws.com/{s3_key}"
return s3_uri
except ClientError as e:
raise RuntimeError(f"❌ S3 upload failed: {e}")
def print_bucket(self):
print(self.s3.head_bucket(Bucket="retrofit-data-dev"))
def generate_presigned_url(
self, bucket: str, key: str, expires_in: int = 3600
) -> str:
"""
Generate a temporary presigned URL for an S3 object.
"""
try:
return self.s3.generate_presigned_url(
"get_object",
Params={"Bucket": bucket, "Key": key},
ExpiresIn=expires_in,
)
except ClientError as e:
raise RuntimeError(f"❌ Failed to generate signed URL: {e}")
def download_from_url(
self, s3_url: str, local_dir: str = ".", expires_in: int = 3600
) -> str:
"""
Download a file from a public or private S3 URL.
If private, generates a presigned URL first.
Args:
s3_url (str): Full S3 HTTPS URL (e.g., https://bucket.s3.region.amazonaws.com/path/file.txt)
local_dir (str): Folder to save the file in.
expires_in (int): Presigned URL lifetime (seconds).
Returns:
str: Local file path of the downloaded file.
"""
parsed = urlparse(s3_url)
host_parts = parsed.netloc.split(".")
if len(host_parts) < 3 or host_parts[1] != "s3":
raise ValueError("❌ Not a valid S3 HTTPS URL")
bucket = host_parts[0]
key = parsed.path.lstrip("/")
# Generate presigned URL (whether public or private)
presigned_url = self.generate_presigned_url(bucket, key, expires_in)
filename = os.path.basename(key)
local_path = os.path.join(local_dir, filename)
try:
response = requests.get(presigned_url, stream=True)
response.raise_for_status()
os.makedirs(local_dir, exist_ok=True)
with open(local_path, "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
print(f"✅ Downloaded: {local_path}")
return local_path
except requests.exceptions.RequestException as e:
raise RuntimeError(f"❌ Failed to download file: {e}")