diff --git a/backend/app/db/models/uploaded_files.py b/backend/app/db/models/uploaded_file.py similarity index 91% rename from backend/app/db/models/uploaded_files.py rename to backend/app/db/models/uploaded_file.py index d4bf48d8..726ed0a3 100644 --- a/backend/app/db/models/uploaded_files.py +++ b/backend/app/db/models/uploaded_file.py @@ -22,10 +22,10 @@ class FileSourceEnum(enum.Enum): HUBSPOT = "hubspot" -class UploadedFiles(Base): +class UploadedFile(Base): __tablename__ = "uploaded_files" - id = Column(BigInteger, primary_key=True) + id = Column(BigInteger, primary_key=True, autoincrement=True) s3_file_bucket = Column(Text, nullable=False) s3_file_key = Column(Text, nullable=False) @@ -39,6 +39,6 @@ class UploadedFiles(Base): SqlEnum(FileTypeEnum, name="file_type", create_type=False), nullable=True ) - source = Column( + file_source = Column( SqlEnum(FileSourceEnum, name="file_source", create_type=False), nullable=True ) diff --git a/backend/pashub_fetcher/core_files.py b/backend/pashub_fetcher/core_files.py index 82637f70..4da10661 100644 --- a/backend/pashub_fetcher/core_files.py +++ b/backend/pashub_fetcher/core_files.py @@ -1,4 +1,7 @@ from enum import Enum +from typing import Optional + +from backend.app.db.models.uploaded_file import FileTypeEnum class CoreFiles(Enum): @@ -11,3 +14,23 @@ class CoreFiles(Enum): PAR_PHOTOPACK = "PAR Photo Pack" PAS2023_PROPERTY = "PAS 2023 Property Assessment Report" PAS2023_OCCUPANCY = "PAS 2023 Occupancy Assessment Report" + + +CORE_TO_FILETYPE_MAP = { + CoreFiles.PHOTOPACK: FileTypeEnum.PHOTO_PACK.value, + CoreFiles.SITENOTE: FileTypeEnum.SITE_NOTE.value, + CoreFiles.RDSAP_SITENOTE: FileTypeEnum.RD_SAP_SITE_NOTE.value, + CoreFiles.PAS2023_VENTILATION: FileTypeEnum.PAS_2023_VENTILATION.value, + CoreFiles.PAS2023_CONDITION: FileTypeEnum.PAS_2023_CONDITION.value, + CoreFiles.PAS_SIGNIFICANCE: FileTypeEnum.PAS_SIGNIFICANCE.value, + CoreFiles.PAR_PHOTOPACK: FileTypeEnum.PAR_PHOTO_PACK.value, + CoreFiles.PAS2023_PROPERTY: FileTypeEnum.PAS_2023_PROPERTY.value, + CoreFiles.PAS2023_OCCUPANCY: FileTypeEnum.PAS_2023_OCCUPANCY.value, +} + + +def infer_file_type(filename: str) -> Optional[str]: + for core_file, file_type in CORE_TO_FILETYPE_MAP.items(): + if filename.startswith(core_file.value): + return file_type + return None diff --git a/backend/pashub_fetcher/handler/handler.py b/backend/pashub_fetcher/handler/handler.py index 6fa2a909..34f44589 100644 --- a/backend/pashub_fetcher/handler/handler.py +++ b/backend/pashub_fetcher/handler/handler.py @@ -1,8 +1,15 @@ +from datetime import datetime, timezone import os import re from typing import Any, Dict, List, Mapping, Optional from openpyxl import load_workbook +from backend.app.db.connection import db_session +from backend.app.db.models.uploaded_file import ( + FileSourceEnum, + UploadedFile, +) +from backend.pashub_fetcher.core_files import infer_file_type from backend.pashub_fetcher.job import Job from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError from backend.pashub_fetcher.sharepoint_subfolders import SharepointSubfolders @@ -88,17 +95,35 @@ def upload_job_to_sharepoint( ) -def upload_job_to_s3(job_files: List[str], uprn: str) -> None: - bucket = "retrofit-energy-assessments-dev" # TODO: create new bucket +def upload_job_to_s3_and_update_db(job_files: List[str], uprn: str) -> None: + bucket = "retrofit-energy-assessments-dev" base_path = f"documents/uprn/{uprn}" + uploaded_files: List[UploadedFile] = [] + for file_path in job_files: filename = os.path.basename(file_path) file_key = f"{base_path}/{filename}" upload_file_to_s3(file_path, bucket, file_key) + # load row to db + uploaded_files.append( + UploadedFile( + s3_file_bucket=bucket, + s3_file_key=file_key, + s3_upload_timestamp=datetime.now(timezone.utc), + uprn=int(uprn), + file_source=FileSourceEnum.PAS_HUB.value, + file_type=infer_file_type(filename), + ) + ) + + with db_session() as session: + session.add_all(uploaded_files) + session.commit() + pass @@ -120,7 +145,9 @@ def process_job( if uprn: logger.info("Uploading files to s3") - upload_job_to_s3(job_files, uprn) + upload_job_to_s3_and_update_db(job_files, uprn) + + # add record of new file in db upload_job_to_sharepoint(sharepoint_client, base_path, job, job_files)