load files to s3 and update db

This commit is contained in:
Daniel Roth 2026-04-01 16:24:00 +00:00
parent 32e37990d2
commit 231473ecba
2 changed files with 40 additions and 2 deletions

View file

@ -20,6 +20,7 @@ class FileSourceEnum(enum.Enum):
PAS_HUB = "pas hub"
SHAREPOINT = "sharepoint"
HUBSPOT = "hubspot"
ECMK = "ecmk"
class UploadedFile(Base):

View file

@ -1,3 +1,4 @@
from datetime import datetime, timezone
import os
from enum import Enum
import re
@ -10,7 +11,10 @@ from playwright.sync_api import (
TimeoutError as PlaywrightTimeoutError,
)
from backend.app.db.connection import db_session
from backend.app.db.models.uploaded_file import FileSourceEnum, UploadedFile
from utils.logger import setup_logger
from utils.s3 import upload_file_to_s3
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
from utils.sharepoint.domna_sites import DomnaSites
@ -181,6 +185,37 @@ def download_report_by_selector(page: Page, selector: str) -> str:
return save_path
def upload_job_to_s3_and_update_db(job_files: List[str], uprn: str) -> None:
bucket = "retrofit-energy-assessments-dev"
base_path = f"documents/uprn/{uprn}"
uploaded_files: List[UploadedFile] = []
for file_path in job_files:
filename = os.path.basename(file_path)
file_key = f"{base_path}/{filename}"
upload_file_to_s3(file_path, bucket, file_key)
# load row to db
uploaded_files.append(
UploadedFile(
s3_file_bucket=bucket,
s3_file_key=file_key,
s3_upload_timestamp=datetime.now(timezone.utc),
uprn=int(uprn),
file_source=FileSourceEnum.ECMK.value,
)
)
with db_session() as session:
session.add_all(uploaded_files)
session.commit()
pass
def download_report() -> None:
username: str = ""
password: str = ""
@ -232,7 +267,7 @@ def download_report() -> None:
last_name: str = cells.nth(2).inner_text().strip()
address: str = cells.nth(5).inner_text().strip()
postcode: str = cells.nth(7).inner_text().strip()
# uprn: str = cells.nth(8).inner_text().strip()
uprn: str = cells.nth(8).inner_text().strip()
status: str = cells.nth(9).inner_text().strip()
if first_name == "Oliver" and last_name == "Stephens":
@ -268,7 +303,9 @@ def download_report() -> None:
sharepoint_path=f"{sharepoint_base_path}/{sharepoint_address}/1. Retrofit Assessment/A. Assessment",
file_name=os.path.basename(file_path),
)
# TODO: stick in s3
# TODO: could s3 load happen for all files at once to reduce db roundtrips?
if uprn:
upload_job_to_s3_and_update_db([file_path], uprn)
finally:
if os.path.exists(file_path):
os.remove(file_path)