mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
load files to s3 and update db
This commit is contained in:
parent
32e37990d2
commit
231473ecba
2 changed files with 40 additions and 2 deletions
|
|
@ -20,6 +20,7 @@ class FileSourceEnum(enum.Enum):
|
|||
PAS_HUB = "pas hub"
|
||||
SHAREPOINT = "sharepoint"
|
||||
HUBSPOT = "hubspot"
|
||||
ECMK = "ecmk"
|
||||
|
||||
|
||||
class UploadedFile(Base):
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
from datetime import datetime, timezone
|
||||
import os
|
||||
from enum import Enum
|
||||
import re
|
||||
|
|
@ -10,7 +11,10 @@ from playwright.sync_api import (
|
|||
TimeoutError as PlaywrightTimeoutError,
|
||||
)
|
||||
|
||||
from backend.app.db.connection import db_session
|
||||
from backend.app.db.models.uploaded_file import FileSourceEnum, UploadedFile
|
||||
from utils.logger import setup_logger
|
||||
from utils.s3 import upload_file_to_s3
|
||||
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
|
||||
from utils.sharepoint.domna_sites import DomnaSites
|
||||
|
||||
|
|
@ -181,6 +185,37 @@ def download_report_by_selector(page: Page, selector: str) -> str:
|
|||
return save_path
|
||||
|
||||
|
||||
def upload_job_to_s3_and_update_db(job_files: List[str], uprn: str) -> None:
|
||||
bucket = "retrofit-energy-assessments-dev"
|
||||
|
||||
base_path = f"documents/uprn/{uprn}"
|
||||
|
||||
uploaded_files: List[UploadedFile] = []
|
||||
|
||||
for file_path in job_files:
|
||||
filename = os.path.basename(file_path)
|
||||
file_key = f"{base_path}/{filename}"
|
||||
|
||||
upload_file_to_s3(file_path, bucket, file_key)
|
||||
|
||||
# load row to db
|
||||
uploaded_files.append(
|
||||
UploadedFile(
|
||||
s3_file_bucket=bucket,
|
||||
s3_file_key=file_key,
|
||||
s3_upload_timestamp=datetime.now(timezone.utc),
|
||||
uprn=int(uprn),
|
||||
file_source=FileSourceEnum.ECMK.value,
|
||||
)
|
||||
)
|
||||
|
||||
with db_session() as session:
|
||||
session.add_all(uploaded_files)
|
||||
session.commit()
|
||||
|
||||
pass
|
||||
|
||||
|
||||
def download_report() -> None:
|
||||
username: str = ""
|
||||
password: str = ""
|
||||
|
|
@ -232,7 +267,7 @@ def download_report() -> None:
|
|||
last_name: str = cells.nth(2).inner_text().strip()
|
||||
address: str = cells.nth(5).inner_text().strip()
|
||||
postcode: str = cells.nth(7).inner_text().strip()
|
||||
# uprn: str = cells.nth(8).inner_text().strip()
|
||||
uprn: str = cells.nth(8).inner_text().strip()
|
||||
status: str = cells.nth(9).inner_text().strip()
|
||||
|
||||
if first_name == "Oliver" and last_name == "Stephens":
|
||||
|
|
@ -268,7 +303,9 @@ def download_report() -> None:
|
|||
sharepoint_path=f"{sharepoint_base_path}/{sharepoint_address}/1. Retrofit Assessment/A. Assessment",
|
||||
file_name=os.path.basename(file_path),
|
||||
)
|
||||
# TODO: stick in s3
|
||||
# TODO: could s3 load happen for all files at once to reduce db roundtrips?
|
||||
if uprn:
|
||||
upload_job_to_s3_and_update_db([file_path], uprn)
|
||||
finally:
|
||||
if os.path.exists(file_path):
|
||||
os.remove(file_path)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue