mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
234 lines
6.7 KiB
Python
234 lines
6.7 KiB
Python
from datetime import datetime, timezone
|
|
import json
|
|
import os
|
|
import re
|
|
from typing import Any, Dict, List, Mapping, Optional
|
|
from openpyxl import load_workbook
|
|
|
|
from backend.app.config import get_settings
|
|
from backend.app.db.connection import db_session
|
|
from backend.app.db.models.uploaded_file import (
|
|
FileSourceEnum,
|
|
UploadedFile,
|
|
)
|
|
from backend.pashub_fetcher.core_files import infer_file_type
|
|
|
|
from backend.pashub_fetcher.job import Job
|
|
from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError
|
|
from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
|
|
PashubToAraTriggerRequest,
|
|
)
|
|
from backend.pashub_fetcher.sharepoint_subfolders import SharepointSubfolders
|
|
from backend.pashub_fetcher.token_getter import get_token_from_local_storage
|
|
from backend.utils.subtasks import task_handler
|
|
from utils.logger import setup_logger
|
|
from utils.s3 import upload_file_to_s3
|
|
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
|
|
from utils.sharepoint.domna_sites import DomnaSites
|
|
|
|
|
|
logger = setup_logger()
|
|
|
|
|
|
def extract_jobs(filepath: str) -> List[Job]:
|
|
wb = load_workbook(filepath, data_only=True)
|
|
# ws = wb["watford warm homes (wave 3) mai"]
|
|
ws = wb["filtered"]
|
|
|
|
HEADER_ROW = 3
|
|
|
|
headers: Dict[str, int] = {}
|
|
for col in range(1, ws.max_column + 1):
|
|
value = str(ws.cell(row=HEADER_ROW, column=col).value)
|
|
if value:
|
|
headers[value.strip()] = col
|
|
|
|
name_col = headers["Name"]
|
|
# link_col = headers["Pashub Link"]
|
|
link_col = headers["PasHub Link"]
|
|
|
|
jobs: List[Job] = []
|
|
|
|
for row in range(HEADER_ROW + 1, ws.max_row + 1):
|
|
name = ws.cell(row=row, column=name_col).value
|
|
link = ws.cell(row=row, column=link_col).value
|
|
|
|
if not name or not link:
|
|
continue
|
|
|
|
match = re.search(r"/jobs/([0-9a-fA-F\-]+)/", str(link))
|
|
if not match:
|
|
continue
|
|
|
|
jobs.append(
|
|
{
|
|
"id": match.group(1),
|
|
"address": str(name),
|
|
}
|
|
)
|
|
|
|
return jobs
|
|
|
|
|
|
def get_pashub_client(email: str, password: str) -> PashubClient:
|
|
token = get_token_from_local_storage(email, password)
|
|
logger.info("Token extracted successfully")
|
|
return PashubClient(token=token)
|
|
|
|
|
|
def upload_job_to_sharepoint(
|
|
sharepoint_client: DomnaSharepointClient,
|
|
# base_path: str,
|
|
sharepoint_link: str,
|
|
job_files: List[str],
|
|
) -> None:
|
|
# job_path = f"{base_path}/{job['address']}"
|
|
|
|
# Create main job folder
|
|
# sharepoint_client.makedir(job["address"], base_path)
|
|
|
|
# Create subfolders
|
|
# for folder in SharepointSubfolders:
|
|
# sharepoint_client.makedir(folder.value, job_path)
|
|
|
|
# Upload into assessment folder
|
|
assessment_path = f"{sharepoint_link}/{SharepointSubfolders.ASSESSMENT.value}"
|
|
|
|
for file_path in job_files:
|
|
filename = file_path.split("/")[-1]
|
|
|
|
sharepoint_client.upload_file(
|
|
file_path,
|
|
assessment_path,
|
|
filename,
|
|
)
|
|
|
|
|
|
def upload_job_to_s3_and_update_db(job_files: List[str], uprn: str) -> None:
|
|
bucket = "retrofit-energy-assessments-dev"
|
|
|
|
base_path = f"documents/uprn/{uprn}"
|
|
|
|
uploaded_files: List[UploadedFile] = []
|
|
|
|
for file_path in job_files:
|
|
filename = os.path.basename(file_path)
|
|
file_key = f"{base_path}/{filename}"
|
|
|
|
upload_file_to_s3(file_path, bucket, file_key)
|
|
|
|
# load row to db
|
|
uploaded_files.append(
|
|
UploadedFile(
|
|
s3_file_bucket=bucket,
|
|
s3_file_key=file_key,
|
|
s3_upload_timestamp=datetime.now(timezone.utc),
|
|
uprn=int(uprn),
|
|
file_source=FileSourceEnum.PAS_HUB.value,
|
|
file_type=infer_file_type(filename),
|
|
)
|
|
)
|
|
|
|
with db_session() as session:
|
|
session.add_all(uploaded_files)
|
|
session.commit()
|
|
|
|
pass
|
|
|
|
|
|
def process_job(
|
|
job: PashubToAraTriggerRequest,
|
|
pashub_client: PashubClient,
|
|
sharepoint_client: DomnaSharepointClient,
|
|
) -> List[str]:
|
|
job_id = job.pashub_job_id
|
|
|
|
uprn: Optional[str] = job.uprn or pashub_client.get_uprn_by_job_id(job_id)
|
|
|
|
if uprn:
|
|
logger.info(f"Got UPRN {uprn} for job {job_id}")
|
|
else:
|
|
logger.info(f"No UPRN found for job {job_id}")
|
|
|
|
job_files: List[str] = pashub_client.get_core_evidence_files_by_job_id(job_id)
|
|
|
|
if uprn:
|
|
logger.info("Uploading files to s3")
|
|
upload_job_to_s3_and_update_db(job_files, uprn)
|
|
|
|
# # Comment out sharepoint loading for now:
|
|
# Seems like the sharepoint link in pas hub is inconsistent in terms
|
|
# of whether it points to a property or a project
|
|
|
|
# if job.sharepoint_link:
|
|
# upload_job_to_sharepoint(sharepoint_client, job.sharepoint_link, job_files)
|
|
|
|
return job_files
|
|
|
|
|
|
@task_handler()
|
|
def handler(event: Mapping[str, Any], context: Any) -> None:
|
|
logger.info("Received message")
|
|
logger.info(f"Number of events: {len(event.get('Records', []))}")
|
|
|
|
settings = get_settings()
|
|
|
|
pas_hub_email = settings.PASHUB_EMAIL
|
|
pas_hub_password = settings.PASHUB_PASSWORD
|
|
|
|
if (not pas_hub_email) or (not pas_hub_password):
|
|
raise ValueError("Pas Hub credentials not provided")
|
|
|
|
pashub_client = get_pashub_client(pas_hub_email, pas_hub_password)
|
|
|
|
sharepoint_client = DomnaSharepointClient(
|
|
sharepoint_location=DomnaSites.SOCIAL_HOUSING_WAVE_3
|
|
)
|
|
|
|
saved_file_paths: List[str] = []
|
|
|
|
for record in event.get("Records", []):
|
|
try:
|
|
body_dict = json.loads(record["body"])
|
|
logger.debug("Validating request body")
|
|
|
|
payload = PashubToAraTriggerRequest.model_validate(body_dict)
|
|
|
|
logger.debug("Successfully validated request body")
|
|
|
|
try:
|
|
files: List[str] = process_job(
|
|
payload,
|
|
pashub_client,
|
|
sharepoint_client,
|
|
)
|
|
saved_file_paths.extend(files)
|
|
|
|
except UnauthorizedError:
|
|
logger.warning("Token expired - refreshing")
|
|
|
|
pashub_client = get_pashub_client(
|
|
pas_hub_email,
|
|
pas_hub_password,
|
|
)
|
|
|
|
# retry once
|
|
files: List[str] = process_job(
|
|
payload,
|
|
pashub_client,
|
|
sharepoint_client,
|
|
)
|
|
saved_file_paths.extend(files)
|
|
|
|
except Exception as e:
|
|
logger.info("Handler exception")
|
|
logger.error(f"Failed to process record: {e}")
|
|
|
|
logger.info("Successfully loaded jobs from spreadsheet")
|
|
|
|
logger.info(f"Saved {len(saved_file_paths)} files")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
event = {"Records": [{"body": "{}"}]}
|
|
handler(event, None)
|