update db after loading file to s3

This commit is contained in:
Daniel Roth 2026-03-27 09:49:18 +00:00
parent a989700c9e
commit 68ecbe1109
3 changed files with 56 additions and 6 deletions

View file

@ -22,10 +22,10 @@ class FileSourceEnum(enum.Enum):
HUBSPOT = "hubspot"
class UploadedFiles(Base):
class UploadedFile(Base):
__tablename__ = "uploaded_files"
id = Column(BigInteger, primary_key=True)
id = Column(BigInteger, primary_key=True, autoincrement=True)
s3_file_bucket = Column(Text, nullable=False)
s3_file_key = Column(Text, nullable=False)
@ -39,6 +39,6 @@ class UploadedFiles(Base):
SqlEnum(FileTypeEnum, name="file_type", create_type=False), nullable=True
)
source = Column(
file_source = Column(
SqlEnum(FileSourceEnum, name="file_source", create_type=False), nullable=True
)

View file

@ -1,4 +1,7 @@
from enum import Enum
from typing import Optional
from backend.app.db.models.uploaded_file import FileTypeEnum
class CoreFiles(Enum):
@ -11,3 +14,23 @@ class CoreFiles(Enum):
PAR_PHOTOPACK = "PAR Photo Pack"
PAS2023_PROPERTY = "PAS 2023 Property Assessment Report"
PAS2023_OCCUPANCY = "PAS 2023 Occupancy Assessment Report"
CORE_TO_FILETYPE_MAP = {
CoreFiles.PHOTOPACK: FileTypeEnum.PHOTO_PACK.value,
CoreFiles.SITENOTE: FileTypeEnum.SITE_NOTE.value,
CoreFiles.RDSAP_SITENOTE: FileTypeEnum.RD_SAP_SITE_NOTE.value,
CoreFiles.PAS2023_VENTILATION: FileTypeEnum.PAS_2023_VENTILATION.value,
CoreFiles.PAS2023_CONDITION: FileTypeEnum.PAS_2023_CONDITION.value,
CoreFiles.PAS_SIGNIFICANCE: FileTypeEnum.PAS_SIGNIFICANCE.value,
CoreFiles.PAR_PHOTOPACK: FileTypeEnum.PAR_PHOTO_PACK.value,
CoreFiles.PAS2023_PROPERTY: FileTypeEnum.PAS_2023_PROPERTY.value,
CoreFiles.PAS2023_OCCUPANCY: FileTypeEnum.PAS_2023_OCCUPANCY.value,
}
def infer_file_type(filename: str) -> Optional[str]:
for core_file, file_type in CORE_TO_FILETYPE_MAP.items():
if filename.startswith(core_file.value):
return file_type
return None

View file

@ -1,8 +1,15 @@
from datetime import datetime, timezone
import os
import re
from typing import Any, Dict, List, Mapping, Optional
from openpyxl import load_workbook
from backend.app.db.connection import db_session
from backend.app.db.models.uploaded_file import (
FileSourceEnum,
UploadedFile,
)
from backend.pashub_fetcher.core_files import infer_file_type
from backend.pashub_fetcher.job import Job
from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError
from backend.pashub_fetcher.sharepoint_subfolders import SharepointSubfolders
@ -88,17 +95,35 @@ def upload_job_to_sharepoint(
)
def upload_job_to_s3(job_files: List[str], uprn: str) -> None:
bucket = "retrofit-energy-assessments-dev" # TODO: create new bucket
def upload_job_to_s3_and_update_db(job_files: List[str], uprn: str) -> None:
bucket = "retrofit-energy-assessments-dev"
base_path = f"documents/uprn/{uprn}"
uploaded_files: List[UploadedFile] = []
for file_path in job_files:
filename = os.path.basename(file_path)
file_key = f"{base_path}/{filename}"
upload_file_to_s3(file_path, bucket, file_key)
# load row to db
uploaded_files.append(
UploadedFile(
s3_file_bucket=bucket,
s3_file_key=file_key,
s3_upload_timestamp=datetime.now(timezone.utc),
uprn=int(uprn),
file_source=FileSourceEnum.PAS_HUB.value,
file_type=infer_file_type(filename),
)
)
with db_session() as session:
session.add_all(uploaded_files)
session.commit()
pass
@ -120,7 +145,9 @@ def process_job(
if uprn:
logger.info("Uploading files to s3")
upload_job_to_s3(job_files, uprn)
upload_job_to_s3_and_update_db(job_files, uprn)
# add record of new file in db
upload_job_to_sharepoint(sharepoint_client, base_path, job, job_files)