diff --git a/.idea/Model.iml b/.idea/Model.iml index 4413bb06..c75af922 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 6f308057..1f2c584d 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/etl/non_invasive_surveys/photos/README.md b/etl/non_invasive_surveys/photos/README.md new file mode 100644 index 00000000..9dbe951f --- /dev/null +++ b/etl/non_invasive_surveys/photos/README.md @@ -0,0 +1,19 @@ +# Non Intrusive Surveys - photo upload + +This folder contains photos taken during non-intrusive surveys. Photos are stored in folders named after the survey ID. + +## Getting started + +Install the required packages by running the following command: + +```bash +pip install -r requirements.txt +``` + +## Usage + +The main application is found in the app.py file. To run the application, use the following command: + +```bash +python app.py +``` \ No newline at end of file diff --git a/etl/non_invasive_surveys/photos/app.py b/etl/non_invasive_surveys/photos/app.py new file mode 100644 index 00000000..1b6790f9 --- /dev/null +++ b/etl/non_invasive_surveys/photos/app.py @@ -0,0 +1,120 @@ +import boto3 +from PIL import Image +from pathlib import Path +from dotenv import load_dotenv + +# Inputs +ENV_FILEPATH = "etl/non_invasive_surveys/photos/.env" +PHOTO_DIRECTORY = "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data" +FOLDER_UPRN_LOOKUP = { + "91 Osprey Drive DY1 2JS": 90048026, + "195 Ashenhurst Rd DY1 2JB": 90051858, + "6 Beech Rd DY1 4BP": 90055152, + "53 Bromley DY5 4PJ": 90060989, + "5 Oaklands B62 0JA": 90028499, + "47 Fairfield Rd DY8 5UJ": 90077535, + "150 Huntingtree Rd B63 4HP": 90093693, + "27 Milton Rd DY1 2JB": 90106884, + "21 Wells Rd DY5 3TB": 90022227, + "8 Corporation Rd DY2 7PX": 90070461 +} + + +def list_subdirectories(directory_path): + """ + List all subdirectories within a given directory. + + :param directory_path: Path to the directory. + :return: A list of paths to the subdirectories. + """ + directory = Path(directory_path) + subdirectories = [subdir for subdir in directory.iterdir() if subdir.is_dir()] + return subdirectories + + +def list_files_in_directory(directory_path, file_extension=".jpg"): + """ + List all files with a specific extension within a given directory and its subdirectories. + + :param directory_path: Path to the directory to scan. + :param file_extension: File extension to filter by. + :return: A list of paths to the files. + """ + # Convert the directory path to a Path object if it's not already one + directory = Path(directory_path) if not isinstance(directory_path, Path) else directory_path + + # List all files of the specified type in the directory and subdirectories + file_list = [file for file in directory.rglob(f'*{file_extension}')] + + return file_list + + +def create_images(input_path): + # Load the image + with Image.open(input_path) as img: + # Create a thumbnail + thumbnail = img.copy() + thumbnail.thumbnail((128, 128), Image.ANTIALIAS) # Resize to 128x128 (or any desired size) + thumbnail.save('thumbnail.jpg') + + # Create a 1080p version + full_hd = img.copy() + full_hd.thumbnail((1920, 1080), Image.ANTIALIAS) # Resize to 1080p + full_hd.save('1080p.jpg') + + # Return paths to the processed images + return 'thumbnail.jpg', '1080p.jpg', input_path + + +def upload_to_s3(bucket_name, file_path, object_name): + s3_client = boto3.client('s3') + s3_client.upload_file(file_path, bucket_name, object_name) + print(f"Uploaded {object_name} to S3 bucket {bucket_name}") + + +def upload_photos_to_s3(bucket_name, photo_paths): + # Upload each photo + for path in photo_paths: + object_name = path.split('/')[-1] # Assuming the path format is folder/filename + upload_to_s3(bucket_name, path, object_name) + + +def generate_cdn_url(distribution_domain, object_name): + return f"https://{distribution_domain}/{object_name}" + + +def process_and_upload_images(input_image_path, bucket_name, distribution_domain): + # Create images + thumbnail, full_hd, original = create_images(input_image_path) + + # Upload images + upload_photos_to_s3(bucket_name, [thumbnail, full_hd, original]) + + # Generate CDN links + cdn_links = [generate_cdn_url(distribution_domain, path.split('/')[-1]) for path in [thumbnail, full_hd, original]] + + return cdn_links + + +def app(): + """ + This application is tasked with uploading the photos, recorded during the non-invasive surveys, to s3 and the + database. + To begin with, this app will simply read the files from the local machine, however we will come up with a more + efficient way to do this in the future. + + :return: + """ + + # List all files in the directory using pathlib + property_directories = list_subdirectories(PHOTO_DIRECTORY) + + # For each property, we want to list all of the photos in the directory + for property_dir in property_directories: + photo_files = list_files_in_directory(property_dir) + + # We now want to convert each file, and upload it to s3 + for photo_filepath in photo_files: + process_and_upload_images( + photo_filepath, "retrofit-datalake-dev", "cdn.retrofit.com" + ) diff --git a/etl/non_invasive_surveys/photos/requirements.txt b/etl/non_invasive_surveys/photos/requirements.txt new file mode 100644 index 00000000..2199a0b4 --- /dev/null +++ b/etl/non_invasive_surveys/photos/requirements.txt @@ -0,0 +1,3 @@ +Pillow +boto3 +python-dotenv \ No newline at end of file