diff --git a/backend/app/db/models/uploaded_file.py b/backend/app/db/models/uploaded_file.py index 5b34a752..a516a1df 100644 --- a/backend/app/db/models/uploaded_file.py +++ b/backend/app/db/models/uploaded_file.py @@ -38,6 +38,7 @@ class UploadedFile(Base): landlord_property_id = Column(Text, nullable=True) uprn = Column(BigInteger, nullable=True) hubspot_listing_id = Column(BigInteger, nullable=True) + hubspot_deal_id = Column(Text, nullable=True) file_type = Column( SqlEnum( diff --git a/backend/ecmk_fetcher/upload.py b/backend/ecmk_fetcher/upload.py index 8cb451b0..cc2c908d 100644 --- a/backend/ecmk_fetcher/upload.py +++ b/backend/ecmk_fetcher/upload.py @@ -40,6 +40,7 @@ def upload_excel_to_sharepoint( ) +# TODO: this should be moved to somewhere common and called by pashub fetcher def upload_file_to_s3_and_update_db( bucket: str, file_path: str, hubspot_listing_id: str, file_type: FileTypeEnum ) -> None: diff --git a/backend/pashub_fetcher/The_Guinness_Partnership_AtkinsR_alis_Coordination_Design_Board_1774881298.xlsx b/backend/pashub_fetcher/The_Guinness_Partnership_AtkinsR_alis_Coordination_Design_Board_1774881298.xlsx index a6478e3b..beb679c1 100644 Binary files a/backend/pashub_fetcher/The_Guinness_Partnership_AtkinsR_alis_Coordination_Design_Board_1774881298.xlsx and b/backend/pashub_fetcher/The_Guinness_Partnership_AtkinsR_alis_Coordination_Design_Board_1774881298.xlsx differ diff --git a/backend/pashub_fetcher/handler/Dockerfile b/backend/pashub_fetcher/handler/Dockerfile index d045becd..e450d340 100644 --- a/backend/pashub_fetcher/handler/Dockerfile +++ b/backend/pashub_fetcher/handler/Dockerfile @@ -22,5 +22,5 @@ ENTRYPOINT ["python", "-m", "awslambdaric"] # ----------------------------- # Lambda handler # ----------------------------- -CMD ["backend.pashub_fetcher.handler.test_handler.handler"] +CMD ["backend.pashub_fetcher.handler.handler"] # CMD ["backend.pashub_fetcher.handler.handler.handler"] \ No newline at end of file diff --git a/backend/pashub_fetcher/handler/handler.py b/backend/pashub_fetcher/handler/handler.py index 3689efe9..60b946c1 100644 --- a/backend/pashub_fetcher/handler/handler.py +++ b/backend/pashub_fetcher/handler/handler.py @@ -1,8 +1,7 @@ from datetime import datetime, timezone -import json import os import re -from typing import Any, Dict, List, Mapping, Optional +from typing import Any, Dict, List, Optional from openpyxl import load_workbook from backend.app.config import get_settings @@ -104,10 +103,19 @@ def upload_job_to_sharepoint( ) -def upload_job_to_s3_and_update_db(job_files: List[str], uprn: str) -> None: +def upload_job_to_s3_and_update_db( + job_files: List[str], uprn: Optional[str], hubspot_deal_id: Optional[str] +) -> None: bucket = "retrofit-energy-assessments-dev" - base_path = f"documents/uprn/{uprn}" + if not uprn and not hubspot_deal_id: + return + + base_path = ( + f"documents/uprn/{uprn}" + if uprn + else f"documents/hubspot_deal_id/{hubspot_deal_id}" + ) uploaded_files: List[UploadedFile] = [] @@ -118,12 +126,14 @@ def upload_job_to_s3_and_update_db(job_files: List[str], uprn: str) -> None: upload_file_to_s3(file_path, bucket, file_key) # load row to db + # TODO: use same upload_file_to_s3_and_update_db method as ecmk fetcher does uploaded_files.append( UploadedFile( s3_file_bucket=bucket, s3_file_key=file_key, s3_upload_timestamp=datetime.now(timezone.utc), - uprn=int(uprn), + uprn=int(uprn) if uprn else None, + hubspot_deal_id=hubspot_deal_id, file_source=FileSourceEnum.PAS_HUB.value, file_type=infer_file_type(filename), ) @@ -144,6 +154,7 @@ def process_job( job_id = job.pashub_job_id uprn: Optional[str] = job.uprn or pashub_client.get_uprn_by_job_id(job_id) + hubspot_deal_id: Optional[str] = job.hubspot_deal_id if uprn: logger.info(f"Got UPRN {uprn} for job {job_id}") @@ -152,9 +163,9 @@ def process_job( job_files: List[str] = pashub_client.get_core_evidence_files_by_job_id(job_id) - if uprn: + if uprn or hubspot_deal_id: logger.info("Uploading files to s3") - upload_job_to_s3_and_update_db(job_files, uprn) + upload_job_to_s3_and_update_db(job_files, uprn, hubspot_deal_id) # # Comment out sharepoint loading for now: # Seems like the sharepoint link in pas hub is inconsistent in terms @@ -167,9 +178,8 @@ def process_job( @task_handler() -def handler(event: Mapping[str, Any], context: Any) -> None: +def handler(body: Dict[str, Any], context: Any) -> List[str]: logger.info("Received message") - logger.info(f"Number of events: {len(event.get('Records', []))}") settings = get_settings() @@ -185,48 +195,34 @@ def handler(event: Mapping[str, Any], context: Any) -> None: sharepoint_location=DomnaSites.SOCIAL_HOUSING_WAVE_3 ) - saved_file_paths: List[str] = [] + logger.debug("Validating request body") + payload = PashubToAraTriggerRequest.model_validate(body) + logger.debug("Successfully validated request body") - for record in event.get("Records", []): - try: - body_dict = json.loads(record["body"]) - logger.debug("Validating request body") + try: + files: List[str] = process_job( + payload, + pashub_client, + sharepoint_client, + ) + except UnauthorizedError: + logger.warning("Token expired - refreshing") - payload = PashubToAraTriggerRequest.model_validate(body_dict) + pashub_client = get_pashub_client( + pas_hub_email, + pas_hub_password, + ) - logger.debug("Successfully validated request body") + # retry once + files = process_job( + payload, + pashub_client, + sharepoint_client, + ) - try: - files: List[str] = process_job( - payload, - pashub_client, - sharepoint_client, - ) - saved_file_paths.extend(files) + logger.info(f"Saved {len(files)} files") - except UnauthorizedError: - logger.warning("Token expired - refreshing") - - pashub_client = get_pashub_client( - pas_hub_email, - pas_hub_password, - ) - - # retry once - files: List[str] = process_job( - payload, - pashub_client, - sharepoint_client, - ) - saved_file_paths.extend(files) - - except Exception as e: - logger.info("Handler exception") - logger.error(f"Failed to process record: {e}") - - logger.info("Successfully loaded jobs from spreadsheet") - - logger.info(f"Saved {len(saved_file_paths)} files") + return files if __name__ == "__main__": diff --git a/backend/pashub_fetcher/local_handler/invoke_local_lambda.py b/backend/pashub_fetcher/local_handler/invoke_local_lambda.py index 463ef9d8..219446fd 100644 --- a/backend/pashub_fetcher/local_handler/invoke_local_lambda.py +++ b/backend/pashub_fetcher/local_handler/invoke_local_lambda.py @@ -12,7 +12,8 @@ payload = { { "body": json.dumps( { - "uprn": 123456, + "pashub_link": "https://pashub.net/jobs/00000000-0000-0000-0000-000000000000/details", + "uprn": "123456", } ) } diff --git a/backend/pashub_fetcher/pashub_to_ara_trigger_request.py b/backend/pashub_fetcher/pashub_to_ara_trigger_request.py index 2e4f8380..518a8dc3 100644 --- a/backend/pashub_fetcher/pashub_to_ara_trigger_request.py +++ b/backend/pashub_fetcher/pashub_to_ara_trigger_request.py @@ -12,6 +12,8 @@ class PashubToAraTriggerRequest(BaseModel): uprn: Optional[str] = None landlord_property_id: Optional[str] = None deal_stage: Optional[str] = None + hubspot_listing_id: Optional[int] = None + hubspot_deal_id: Optional[str] = None @property def pashub_job_id(self) -> str: diff --git a/backend/pashub_fetcher/trigger_lambda_from_file.py b/backend/pashub_fetcher/trigger_lambda_from_file.py new file mode 100644 index 00000000..fb9d1cbf --- /dev/null +++ b/backend/pashub_fetcher/trigger_lambda_from_file.py @@ -0,0 +1,63 @@ +import json +import os +import re +from typing import Any, Dict, List + +from openpyxl import load_workbook + +from backend.pashub_fetcher.pashub_to_ara_trigger_request import ( + PashubToAraTriggerRequest, +) +from backend.pashub_fetcher.handler.handler import handler + + +if __name__ == "__main__": + BASE_DIR = os.path.dirname(os.path.dirname(__file__)) + filepath: str = os.path.join( + BASE_DIR, + "pashub_fetcher", + "The_Guinness_Partnership_AtkinsR_alis_Coordination_Design_Board_1774881298.xlsx", + ) + + wb = load_workbook(filepath, data_only=True) + ws = wb["filtered_2"] + + HEADER_ROW = 3 + + headers: Dict[str, int] = {} + for col in range(1, ws.max_column + 1): + value = str(ws.cell(row=HEADER_ROW, column=col).value) + if value: + headers[value.strip()] = col + + name_col = headers["Name"] + link_col = headers["PasHub Link"] + hubspot_deal_id_col = headers["HubSpot ID"] + + trigger_requests: List[PashubToAraTriggerRequest] = [] + + for row in range(HEADER_ROW + 1, ws.max_row + 1): + name = ws.cell(row=row, column=name_col).value + link = ws.cell(row=row, column=link_col).value + hubspot_deal_id = ws.cell(row=row, column=hubspot_deal_id_col).value + + if not name or not link or not hubspot_deal_id: + continue + + match = re.search(r"/jobs/([0-9a-fA-F\-]+)/", str(link)) + if not match: + continue + + trigger_requests.append( + PashubToAraTriggerRequest( + pashub_link=str(link), hubspot_deal_id=str(hubspot_deal_id) + ) + ) + + # ---- Build fake SQS event ---- + event: Dict[str, Any] = { + "Records": [{"body": json.dumps(req.model_dump())} for req in trigger_requests] + } + + context = None + handler(event, context)