diff --git a/backend/app/db/models/uploaded_file.py b/backend/app/db/models/uploaded_file.py index 5b34a752..a516a1df 100644 --- a/backend/app/db/models/uploaded_file.py +++ b/backend/app/db/models/uploaded_file.py @@ -38,6 +38,7 @@ class UploadedFile(Base): landlord_property_id = Column(Text, nullable=True) uprn = Column(BigInteger, nullable=True) hubspot_listing_id = Column(BigInteger, nullable=True) + hubspot_deal_id = Column(Text, nullable=True) file_type = Column( SqlEnum( diff --git a/backend/ecmk_fetcher/upload.py b/backend/ecmk_fetcher/upload.py index 8cb451b0..cc2c908d 100644 --- a/backend/ecmk_fetcher/upload.py +++ b/backend/ecmk_fetcher/upload.py @@ -40,6 +40,7 @@ def upload_excel_to_sharepoint( ) +# TODO: this should be moved to somewhere common and called by pashub fetcher def upload_file_to_s3_and_update_db( bucket: str, file_path: str, hubspot_listing_id: str, file_type: FileTypeEnum ) -> None: diff --git a/backend/pashub_fetcher/handler/handler.py b/backend/pashub_fetcher/handler/handler.py index 3689efe9..b66aca86 100644 --- a/backend/pashub_fetcher/handler/handler.py +++ b/backend/pashub_fetcher/handler/handler.py @@ -104,10 +104,17 @@ def upload_job_to_sharepoint( ) -def upload_job_to_s3_and_update_db(job_files: List[str], uprn: str) -> None: +def upload_job_to_s3_and_update_db( + job_files: List[str], uprn: Optional[str], hubspot_deal_id: Optional[str] +) -> None: bucket = "retrofit-energy-assessments-dev" - base_path = f"documents/uprn/{uprn}" + if not uprn and not hubspot_deal_id: + return + + base_path = ( + f"documents/uprn/{uprn}" if uprn else f"documents/uprn/{hubspot_deal_id}" + ) uploaded_files: List[UploadedFile] = [] @@ -118,12 +125,14 @@ def upload_job_to_s3_and_update_db(job_files: List[str], uprn: str) -> None: upload_file_to_s3(file_path, bucket, file_key) # load row to db + # TODO: use same upload_file_to_s3_and_update_db method as ecmk fetcher does uploaded_files.append( UploadedFile( s3_file_bucket=bucket, s3_file_key=file_key, s3_upload_timestamp=datetime.now(timezone.utc), - uprn=int(uprn), + uprn=int(uprn) if uprn else None, + hubspot_deal_id=hubspot_deal_id, file_source=FileSourceEnum.PAS_HUB.value, file_type=infer_file_type(filename), ) @@ -144,6 +153,7 @@ def process_job( job_id = job.pashub_job_id uprn: Optional[str] = job.uprn or pashub_client.get_uprn_by_job_id(job_id) + hubspot_deal_id: Optional[str] = job.hubspot_deal_id if uprn: logger.info(f"Got UPRN {uprn} for job {job_id}") @@ -152,9 +162,9 @@ def process_job( job_files: List[str] = pashub_client.get_core_evidence_files_by_job_id(job_id) - if uprn: + if uprn or hubspot_deal_id: logger.info("Uploading files to s3") - upload_job_to_s3_and_update_db(job_files, uprn) + upload_job_to_s3_and_update_db(job_files, uprn, hubspot_deal_id) # # Comment out sharepoint loading for now: # Seems like the sharepoint link in pas hub is inconsistent in terms @@ -166,7 +176,6 @@ def process_job( return job_files -@task_handler() def handler(event: Mapping[str, Any], context: Any) -> None: logger.info("Received message") logger.info(f"Number of events: {len(event.get('Records', []))}") diff --git a/backend/pashub_fetcher/trigger_lambda_from_file.py b/backend/pashub_fetcher/trigger_lambda_from_file.py index e69de29b..8763c5fe 100644 --- a/backend/pashub_fetcher/trigger_lambda_from_file.py +++ b/backend/pashub_fetcher/trigger_lambda_from_file.py @@ -0,0 +1,63 @@ +import json +import os +import re +from typing import Any, Dict, List + +from openpyxl import load_workbook + +from backend.pashub_fetcher.pashub_to_ara_trigger_request import ( + PashubToAraTriggerRequest, +) +from backend.pashub_fetcher.handler.handler import handler + + +if __name__ == "__main__": + BASE_DIR = os.path.dirname(os.path.dirname(__file__)) + filepath: str = os.path.join( + BASE_DIR, + "pashub_fetcher", + "The_Guinness_Partnership_AtkinsR_alis_Coordination_Design_Board_1774881298.xlsx", + ) + + wb = load_workbook(filepath, data_only=True) + ws = wb["filtered"] + + HEADER_ROW = 3 + + headers: Dict[str, int] = {} + for col in range(1, ws.max_column + 1): + value = str(ws.cell(row=HEADER_ROW, column=col).value) + if value: + headers[value.strip()] = col + + name_col = headers["Name"] + link_col = headers["PasHub Link"] + hubspot_deal_id_col = headers["HubSpot ID"] + + trigger_requests: List[PashubToAraTriggerRequest] = [] + + for row in range(HEADER_ROW + 1, ws.max_row + 1): + name = ws.cell(row=row, column=name_col).value + link = ws.cell(row=row, column=link_col).value + hubspot_deal_id = ws.cell(row=row, column=hubspot_deal_id_col).value + + if not name or not link or not hubspot_deal_id: + continue + + match = re.search(r"/jobs/([0-9a-fA-F\-]+)/", str(link)) + if not match: + continue + + trigger_requests.append( + PashubToAraTriggerRequest( + pashub_link=str(link), hubspot_deal_id=str(hubspot_deal_id) + ) + ) + + # ---- Build fake SQS event ---- + event: Dict[str, Any] = { + "Records": [{"body": json.dumps(req.model_dump())} for req in trigger_requests] + } + + context = None + handler(event, context)