import boto3 import os def print_hello_from_etl_module(): print("You are printing from a etl module we made in poetry") def split_s3_url(s3_url): if not s3_url.startswith("s3://"): raise ValueError("Invalid S3 URL. Must start with 's3://'") path = s3_url[5:] parts = path.split('/', 1) if len(parts) != 2: raise ValueError("S3 URL must include a key after the bucket name") return parts[0], parts[1] def create_temp_file(content_bytes, relative_path): # Save under /tmp/s3/ full_path = os.path.join("/tmp/s3", relative_path) # Make sure the directory exists os.makedirs(os.path.dirname(full_path), exist_ok=True) # Write content to file with open(full_path, 'wb') as temp_file: temp_file.write(content_bytes) print(f"Temporary file created at: {full_path}") return full_path def download_data_from_s3(s3_uri): s3 = boto3.resource('s3') bucket_name, file_name = split_s3_url(s3_uri) obj = s3.Object(bucket_name, file_name) data = obj.get()['Body'].read() # Save using full S3 key as relative path return create_temp_file(data, file_name) # Example usage # download_data_from_s3("s3://retrofit-energy-assessments-dev/JAFFERSONS ENERGY CONSULTANTS/VDE001/12103116/docs & plans/77 Perryn Road, W3 7LT EPR.pdf")