update db after loading file to s3

This commit is contained in:
Daniel Roth 2026-03-27 09:49:18 +00:00
parent a989700c9e
commit 68ecbe1109
3 changed files with 56 additions and 6 deletions

View file

@ -22,10 +22,10 @@ class FileSourceEnum(enum.Enum):
HUBSPOT = "hubspot" HUBSPOT = "hubspot"
class UploadedFiles(Base): class UploadedFile(Base):
__tablename__ = "uploaded_files" __tablename__ = "uploaded_files"
id = Column(BigInteger, primary_key=True) id = Column(BigInteger, primary_key=True, autoincrement=True)
s3_file_bucket = Column(Text, nullable=False) s3_file_bucket = Column(Text, nullable=False)
s3_file_key = Column(Text, nullable=False) s3_file_key = Column(Text, nullable=False)
@ -39,6 +39,6 @@ class UploadedFiles(Base):
SqlEnum(FileTypeEnum, name="file_type", create_type=False), nullable=True SqlEnum(FileTypeEnum, name="file_type", create_type=False), nullable=True
) )
source = Column( file_source = Column(
SqlEnum(FileSourceEnum, name="file_source", create_type=False), nullable=True SqlEnum(FileSourceEnum, name="file_source", create_type=False), nullable=True
) )

View file

@ -1,4 +1,7 @@
from enum import Enum from enum import Enum
from typing import Optional
from backend.app.db.models.uploaded_file import FileTypeEnum
class CoreFiles(Enum): class CoreFiles(Enum):
@ -11,3 +14,23 @@ class CoreFiles(Enum):
PAR_PHOTOPACK = "PAR Photo Pack" PAR_PHOTOPACK = "PAR Photo Pack"
PAS2023_PROPERTY = "PAS 2023 Property Assessment Report" PAS2023_PROPERTY = "PAS 2023 Property Assessment Report"
PAS2023_OCCUPANCY = "PAS 2023 Occupancy Assessment Report" PAS2023_OCCUPANCY = "PAS 2023 Occupancy Assessment Report"
CORE_TO_FILETYPE_MAP = {
CoreFiles.PHOTOPACK: FileTypeEnum.PHOTO_PACK.value,
CoreFiles.SITENOTE: FileTypeEnum.SITE_NOTE.value,
CoreFiles.RDSAP_SITENOTE: FileTypeEnum.RD_SAP_SITE_NOTE.value,
CoreFiles.PAS2023_VENTILATION: FileTypeEnum.PAS_2023_VENTILATION.value,
CoreFiles.PAS2023_CONDITION: FileTypeEnum.PAS_2023_CONDITION.value,
CoreFiles.PAS_SIGNIFICANCE: FileTypeEnum.PAS_SIGNIFICANCE.value,
CoreFiles.PAR_PHOTOPACK: FileTypeEnum.PAR_PHOTO_PACK.value,
CoreFiles.PAS2023_PROPERTY: FileTypeEnum.PAS_2023_PROPERTY.value,
CoreFiles.PAS2023_OCCUPANCY: FileTypeEnum.PAS_2023_OCCUPANCY.value,
}
def infer_file_type(filename: str) -> Optional[str]:
for core_file, file_type in CORE_TO_FILETYPE_MAP.items():
if filename.startswith(core_file.value):
return file_type
return None

View file

@ -1,8 +1,15 @@
from datetime import datetime, timezone
import os import os
import re import re
from typing import Any, Dict, List, Mapping, Optional from typing import Any, Dict, List, Mapping, Optional
from openpyxl import load_workbook from openpyxl import load_workbook
from backend.app.db.connection import db_session
from backend.app.db.models.uploaded_file import (
FileSourceEnum,
UploadedFile,
)
from backend.pashub_fetcher.core_files import infer_file_type
from backend.pashub_fetcher.job import Job from backend.pashub_fetcher.job import Job
from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError
from backend.pashub_fetcher.sharepoint_subfolders import SharepointSubfolders from backend.pashub_fetcher.sharepoint_subfolders import SharepointSubfolders
@ -88,17 +95,35 @@ def upload_job_to_sharepoint(
) )
def upload_job_to_s3(job_files: List[str], uprn: str) -> None: def upload_job_to_s3_and_update_db(job_files: List[str], uprn: str) -> None:
bucket = "retrofit-energy-assessments-dev" # TODO: create new bucket bucket = "retrofit-energy-assessments-dev"
base_path = f"documents/uprn/{uprn}" base_path = f"documents/uprn/{uprn}"
uploaded_files: List[UploadedFile] = []
for file_path in job_files: for file_path in job_files:
filename = os.path.basename(file_path) filename = os.path.basename(file_path)
file_key = f"{base_path}/{filename}" file_key = f"{base_path}/{filename}"
upload_file_to_s3(file_path, bucket, file_key) upload_file_to_s3(file_path, bucket, file_key)
# load row to db
uploaded_files.append(
UploadedFile(
s3_file_bucket=bucket,
s3_file_key=file_key,
s3_upload_timestamp=datetime.now(timezone.utc),
uprn=int(uprn),
file_source=FileSourceEnum.PAS_HUB.value,
file_type=infer_file_type(filename),
)
)
with db_session() as session:
session.add_all(uploaded_files)
session.commit()
pass pass
@ -120,7 +145,9 @@ def process_job(
if uprn: if uprn:
logger.info("Uploading files to s3") logger.info("Uploading files to s3")
upload_job_to_s3(job_files, uprn) upload_job_to_s3_and_update_db(job_files, uprn)
# add record of new file in db
upload_job_to_sharepoint(sharepoint_client, base_path, job, job_files) upload_job_to_sharepoint(sharepoint_client, base_path, job, job_files)