From 38d0dcdb772e4d73e852af7e830196513c939d7e Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 22 Jul 2025 18:04:53 +0100 Subject: [PATCH] passing sheet count to lambda --- backend/app/plan/router.py | 58 +------------------------------------- 1 file changed, 1 insertion(+), 57 deletions(-) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index a0eca27a..f059c568 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -2,8 +2,6 @@ import boto3 import json import math from datetime import datetime -from openpyxl import load_workbook -from io import BytesIO from fastapi import APIRouter, Depends from backend.app.dependencies import validate_token @@ -15,53 +13,6 @@ from backend.app.db.connection import db_engine from backend.app.db.functions.recommendations_functions import create_scenario - -def read_excel_from_s3(bucket_name, file_key, header_row=0, drop_all_na=True, sheet_name=None): - """ - Reads an Excel file from S3 and returns it as a list of dictionaries. - - :param bucket_name: Name of the S3 bucket. - :param file_key: S3 key/path to the file. - :param header_row: Row number (0-indexed) to use as header. - :param drop_all_na: If True, drop columns where all values are None. - :param sheet_name: Name of the worksheet to read. Defaults to the first. - :return: List of dicts, one per row. - """ - s3 = boto3.client("s3") - response = s3.get_object(Bucket=bucket_name, Key=file_key) - excel_buffer = BytesIO(response["Body"].read()) - - wb = load_workbook(filename=excel_buffer, data_only=True) - ws = wb[sheet_name] if sheet_name else wb.active - - rows = list(ws.iter_rows(values_only=True)) - if len(rows) <= header_row: - raise ValueError("Header row index is out of range.") - - headers = [str(h).strip() if h is not None else f"__col_{i}" for i, h in enumerate(rows[header_row])] - data_rows = rows[header_row + 1:] - - # Drop columns where all values are None if required - if drop_all_na: - # Transpose rows to get columns - col_data = list(zip(*data_rows)) - keep_indices = [i for i, col in enumerate(col_data) if not all(v is None for v in col)] - headers = [h for i, h in enumerate(headers) if i in keep_indices] - data_rows = [ - [row[i] for i in keep_indices] - for row in data_rows - ] - - # Create list of dicts - result = [ - {headers[i]: cell for i, cell in enumerate(row)} - for row in data_rows - if any(cell is not None for cell in row) # skip fully empty rows - ] - - return result - - logger = setup_logger() router = APIRouter( @@ -93,14 +44,7 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): if data.get("file_format") == "domna_asset_list" and data.get("file_type") == "xlsx": try: - input_data = read_excel_from_s3( - bucket_name=settings.PLAN_TRIGGER_BUCKET, - file_key=data.get("trigger_file_path"), - sheet_name=data.get("sheet_name"), - header_row=0, - ) - - total_rows = len(input_data) + total_rows = body.get("sheet_count", 0) chunk_size = 30 total_chunks = math.ceil(total_rows / chunk_size)