From 4a4099901ffef9dc01202c053af984a75e71dd08 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 26 Mar 2026 16:33:35 +0000 Subject: [PATCH] upload files to s3 --- backend/pashub_fetcher/handler/handler.py | 28 +++++++++++++++-------- utils/s3.py | 11 +++++++++ 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/backend/pashub_fetcher/handler/handler.py b/backend/pashub_fetcher/handler/handler.py index c728661f..45ca74e3 100644 --- a/backend/pashub_fetcher/handler/handler.py +++ b/backend/pashub_fetcher/handler/handler.py @@ -3,11 +3,13 @@ import re from typing import Any, Dict, List, Mapping, Optional from openpyxl import load_workbook +from backend.app.config import get_settings from backend.pashub_fetcher.job import Job from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError from backend.pashub_fetcher.sharepoint_subfolders import SharepointSubfolders from backend.pashub_fetcher.token_getter import get_token_from_local_storage from utils.logger import setup_logger +from utils.s3 import upload_file_to_s3 from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient from utils.sharepoint.domna_sites import DomnaSites @@ -87,13 +89,17 @@ def upload_job_to_sharepoint( ) -def upload_job_to_s3( - job: Job, - job_files: List[str], -) -> None: - # Example: - # for file_path in job_files: - # s3_client.upload_file(...) +def upload_job_to_s3(job_files: List[str], uprn: str) -> None: + bucket = get_settings().DATA_BUCKET + + base_path = f"uprn/{uprn}" + + for file_path in job_files: + filename = os.path.basename(file_path) + file_key = f"{base_path}/{filename}" + + upload_file_to_s3(file_path, bucket, file_key) + pass @@ -106,12 +112,16 @@ def process_job( job_id = job["id"] uprn: Optional[str] = pashub_client.get_uprn_by_job_id(job_id) - logger.info(f"Got UPRN {uprn} for job {job_id}") + if uprn: + logger.info(f"Got UPRN {uprn} for job {job_id}") + else: + logger.info(f"No UPRN found for job {job_id}") job_files: List[str] = pashub_client.get_core_evidence_files_by_job_id(job_id) upload_job_to_sharepoint(sharepoint_client, base_path, job, job_files) - upload_job_to_s3(job, job_files) + if uprn: + upload_job_to_s3(job_files, uprn) return job_files diff --git a/utils/s3.py b/utils/s3.py index 6aa3f44e..242e0db5 100644 --- a/utils/s3.py +++ b/utils/s3.py @@ -472,3 +472,14 @@ def list_xmls_in_s3_folder(bucket_name, folder_name): f"Failed to list XML files in folder {folder_name} in bucket {bucket_name}: {str(e)}" ) return [] + + +def upload_file_to_s3(file_path: str, bucket_name: str, file_key: str) -> None: + try: + s3 = boto3.resource("s3") + bucket = s3.Bucket(bucket_name) + bucket.upload_file(file_path, file_key) + logger.info(f"Uploaded {file_path} to s3://{bucket_name}/{file_key}") + except Exception as e: + logger.error(f"Failed to upload {file_path} to S3: {e}") + raise