diff --git a/etl/non_invasive_surveys/photos/app.py b/etl/non_invasive_surveys/photos/app.py index 1b6790f9..ffd993a6 100644 --- a/etl/non_invasive_surveys/photos/app.py +++ b/etl/non_invasive_surveys/photos/app.py @@ -1,4 +1,5 @@ import boto3 +import os from PIL import Image from pathlib import Path from dotenv import load_dotenv @@ -19,6 +20,10 @@ FOLDER_UPRN_LOOKUP = { "8 Corporation Rd DY2 7PX": 90070461 } +load_dotenv(ENV_FILEPATH) +CLOUDFRONT_DISTRIBUTION_DOMAIN_NAME = os.getenv("CLOUDFRONT_DISTRIBUTION_DOMAIN_NAME", None) +CDN_BUCKET_NAME = os.getenv("CDN_BUCKET_NAME", None) + def list_subdirectories(directory_path): """ @@ -49,21 +54,33 @@ def list_files_in_directory(directory_path, file_extension=".jpg"): return file_list -def create_images(input_path): +def create_images(input_path, uprn): + # Need to create local directory if it doesn't exist + os.makedirs(f"non_invasive_photos/{uprn}", exist_ok=True) + # Load the image with Image.open(input_path) as img: + # Define output paths + thumbnail_path = f"non_invasive_photos/{uprn}/thumbnail.jpg" + full_hd_path = f"non_invasive_photos/{uprn}/1080p.jpg" + webp_path = f"non_invasive_photos/{uprn}/webp.webp" # Save as WebP format + # Create a thumbnail thumbnail = img.copy() - thumbnail.thumbnail((128, 128), Image.ANTIALIAS) # Resize to 128x128 (or any desired size) - thumbnail.save('thumbnail.jpg') + thumbnail.thumbnail((128, 128), Image.Resampling.LANCZOS) # High-quality downsampling + thumbnail.save(thumbnail_path, 'JPEG', quality=85) # Save as JPEG with quality setting # Create a 1080p version full_hd = img.copy() - full_hd.thumbnail((1920, 1080), Image.ANTIALIAS) # Resize to 1080p - full_hd.save('1080p.jpg') + full_hd.thumbnail((1920, 1080), Image.Resampling.LANCZOS) + full_hd.save(full_hd_path, 'JPEG', quality=90) # Slightly higher quality for larger image + + # Convert to WebP for better compression + webp = img.copy() + webp.save(webp_path, 'WEBP', quality=90) # Return paths to the processed images - return 'thumbnail.jpg', '1080p.jpg', input_path + return thumbnail_path, full_hd_path, webp_path def upload_to_s3(bucket_name, file_path, object_name): @@ -83,9 +100,9 @@ def generate_cdn_url(distribution_domain, object_name): return f"https://{distribution_domain}/{object_name}" -def process_and_upload_images(input_image_path, bucket_name, distribution_domain): +def process_and_upload_images(uprn, input_image_path, bucket_name, distribution_domain): # Create images - thumbnail, full_hd, original = create_images(input_image_path) + thumbnail, full_hd, original = create_images(str(uprn), input_image_path) # Upload images upload_photos_to_s3(bucket_name, [thumbnail, full_hd, original]) @@ -93,6 +110,10 @@ def process_and_upload_images(input_image_path, bucket_name, distribution_domain # Generate CDN links cdn_links = [generate_cdn_url(distribution_domain, path.split('/')[-1]) for path in [thumbnail, full_hd, original]] + # Delete local files + for path in [thumbnail, full_hd, original]: + os.remove(path) + return cdn_links @@ -112,9 +133,13 @@ def app(): # For each property, we want to list all of the photos in the directory for property_dir in property_directories: photo_files = list_files_in_directory(property_dir) + uprn = FOLDER_UPRN_LOOKUP[property_dir.name] # We now want to convert each file, and upload it to s3 for photo_filepath in photo_files: process_and_upload_images( - photo_filepath, "retrofit-datalake-dev", "cdn.retrofit.com" + uprn=uprn, + input_image_path=photo_filepath, + bucket_name=CDN_BUCKET_NAME, + distribution_domain=CLOUDFRONT_DISTRIBUTION_DOMAIN_NAME )