passing sheet count to lambda

2026-07-27 23:35:01 +00:00 · 2025-07-22 18:04:53 +01:00 · 2025-07-22 18:04:53 +01:00 · 38d0dcdb77
commit 38d0dcdb77
parent 8c5bd19992
1 changed files with 1 additions and 57 deletions
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@ -2,8 +2,6 @@ import boto3
 import json
 import math
 from datetime import datetime
-from openpyxl import load_workbook
-from io import BytesIO

 from fastapi import APIRouter, Depends
 from backend.app.dependencies import validate_token
@ -15,53 +13,6 @@ from backend.app.db.connection import db_engine

 from backend.app.db.functions.recommendations_functions import create_scenario

-
-def read_excel_from_s3(bucket_name, file_key, header_row=0, drop_all_na=True, sheet_name=None):
-    """
-    Reads an Excel file from S3 and returns it as a list of dictionaries.
-
-    :param bucket_name: Name of the S3 bucket.
-    :param file_key: S3 key/path to the file.
-    :param header_row: Row number (0-indexed) to use as header.
-    :param drop_all_na: If True, drop columns where all values are None.
-    :param sheet_name: Name of the worksheet to read. Defaults to the first.
-    :return: List of dicts, one per row.
-    """
-    s3 = boto3.client("s3")
-    response = s3.get_object(Bucket=bucket_name, Key=file_key)
-    excel_buffer = BytesIO(response["Body"].read())
-
-    wb = load_workbook(filename=excel_buffer, data_only=True)
-    ws = wb[sheet_name] if sheet_name else wb.active
-
-    rows = list(ws.iter_rows(values_only=True))
-    if len(rows) <= header_row:
-        raise ValueError("Header row index is out of range.")
-
-    headers = [str(h).strip() if h is not None else f"__col_{i}" for i, h in enumerate(rows[header_row])]
-    data_rows = rows[header_row + 1:]
-
-    # Drop columns where all values are None if required
-    if drop_all_na:
-        # Transpose rows to get columns
-        col_data = list(zip(*data_rows))
-        keep_indices = [i for i, col in enumerate(col_data) if not all(v is None for v in col)]
-        headers = [h for i, h in enumerate(headers) if i in keep_indices]
-        data_rows = [
-            [row[i] for i in keep_indices]
-            for row in data_rows
-        ]
-
-    # Create list of dicts
-    result = [
-        {headers[i]: cell for i, cell in enumerate(row)}
-        for row in data_rows
-        if any(cell is not None for cell in row)  # skip fully empty rows
-    ]
-
-    return result
-
-
 logger = setup_logger()

 router = APIRouter(
@ -93,14 +44,7 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest):
    if data.get("file_format") == "domna_asset_list" and data.get("file_type") == "xlsx":
        try:

-            input_data = read_excel_from_s3(
-                bucket_name=settings.PLAN_TRIGGER_BUCKET,
-                file_key=data.get("trigger_file_path"),
-                sheet_name=data.get("sheet_name"),
-                header_row=0,
-            )
-
-            total_rows = len(input_data)
+            total_rows = body.get("sheet_count", 0)
            chunk_size = 30
            total_chunks = math.ceil(total_rows / chunk_size)