building photo upload app

This commit is contained in:
Khalim Conn-Kowlessar 2024-04-15 14:50:00 +01:00
parent fd8e4a8d64
commit 6076eb4f24
5 changed files with 144 additions and 2 deletions

2
.idea/Model.iml generated
View file

@ -7,7 +7,7 @@
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
</content>
<orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="non_invasive_surveys-photos" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyNamespacePackagesService">

2
.idea/misc.xml generated
View file

@ -3,7 +3,7 @@
<component name="Black">
<option name="sdkName" value="Python 3.10 (backend)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="non_invasive_surveys-photos" project-jdk-type="Python SDK" />
<component name="PythonCompatibilityInspectionAdvertiser">
<option name="version" value="3" />
</component>

View file

@ -0,0 +1,19 @@
# Non Intrusive Surveys - photo upload
This folder contains photos taken during non-intrusive surveys. Photos are stored in folders named after the survey ID.
## Getting started
Install the required packages by running the following command:
```bash
pip install -r requirements.txt
```
## Usage
The main application is found in the app.py file. To run the application, use the following command:
```bash
python app.py
```

View file

@ -0,0 +1,120 @@
import boto3
from PIL import Image
from pathlib import Path
from dotenv import load_dotenv
# Inputs
ENV_FILEPATH = "etl/non_invasive_surveys/photos/.env"
PHOTO_DIRECTORY = "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data"
FOLDER_UPRN_LOOKUP = {
"91 Osprey Drive DY1 2JS": 90048026,
"195 Ashenhurst Rd DY1 2JB": 90051858,
"6 Beech Rd DY1 4BP": 90055152,
"53 Bromley DY5 4PJ": 90060989,
"5 Oaklands B62 0JA": 90028499,
"47 Fairfield Rd DY8 5UJ": 90077535,
"150 Huntingtree Rd B63 4HP": 90093693,
"27 Milton Rd DY1 2JB": 90106884,
"21 Wells Rd DY5 3TB": 90022227,
"8 Corporation Rd DY2 7PX": 90070461
}
def list_subdirectories(directory_path):
"""
List all subdirectories within a given directory.
:param directory_path: Path to the directory.
:return: A list of paths to the subdirectories.
"""
directory = Path(directory_path)
subdirectories = [subdir for subdir in directory.iterdir() if subdir.is_dir()]
return subdirectories
def list_files_in_directory(directory_path, file_extension=".jpg"):
"""
List all files with a specific extension within a given directory and its subdirectories.
:param directory_path: Path to the directory to scan.
:param file_extension: File extension to filter by.
:return: A list of paths to the files.
"""
# Convert the directory path to a Path object if it's not already one
directory = Path(directory_path) if not isinstance(directory_path, Path) else directory_path
# List all files of the specified type in the directory and subdirectories
file_list = [file for file in directory.rglob(f'*{file_extension}')]
return file_list
def create_images(input_path):
# Load the image
with Image.open(input_path) as img:
# Create a thumbnail
thumbnail = img.copy()
thumbnail.thumbnail((128, 128), Image.ANTIALIAS) # Resize to 128x128 (or any desired size)
thumbnail.save('thumbnail.jpg')
# Create a 1080p version
full_hd = img.copy()
full_hd.thumbnail((1920, 1080), Image.ANTIALIAS) # Resize to 1080p
full_hd.save('1080p.jpg')
# Return paths to the processed images
return 'thumbnail.jpg', '1080p.jpg', input_path
def upload_to_s3(bucket_name, file_path, object_name):
s3_client = boto3.client('s3')
s3_client.upload_file(file_path, bucket_name, object_name)
print(f"Uploaded {object_name} to S3 bucket {bucket_name}")
def upload_photos_to_s3(bucket_name, photo_paths):
# Upload each photo
for path in photo_paths:
object_name = path.split('/')[-1] # Assuming the path format is folder/filename
upload_to_s3(bucket_name, path, object_name)
def generate_cdn_url(distribution_domain, object_name):
return f"https://{distribution_domain}/{object_name}"
def process_and_upload_images(input_image_path, bucket_name, distribution_domain):
# Create images
thumbnail, full_hd, original = create_images(input_image_path)
# Upload images
upload_photos_to_s3(bucket_name, [thumbnail, full_hd, original])
# Generate CDN links
cdn_links = [generate_cdn_url(distribution_domain, path.split('/')[-1]) for path in [thumbnail, full_hd, original]]
return cdn_links
def app():
"""
This application is tasked with uploading the photos, recorded during the non-invasive surveys, to s3 and the
database.
To begin with, this app will simply read the files from the local machine, however we will come up with a more
efficient way to do this in the future.
:return:
"""
# List all files in the directory using pathlib
property_directories = list_subdirectories(PHOTO_DIRECTORY)
# For each property, we want to list all of the photos in the directory
for property_dir in property_directories:
photo_files = list_files_in_directory(property_dir)
# We now want to convert each file, and upload it to s3
for photo_filepath in photo_files:
process_and_upload_images(
photo_filepath, "retrofit-datalake-dev", "cdn.retrofit.com"
)

View file

@ -0,0 +1,3 @@
Pillow
boto3
python-dotenv