mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
passing sheet count to lambda
This commit is contained in:
parent
8c5bd19992
commit
38d0dcdb77
1 changed files with 1 additions and 57 deletions
|
|
@ -2,8 +2,6 @@ import boto3
|
|||
import json
|
||||
import math
|
||||
from datetime import datetime
|
||||
from openpyxl import load_workbook
|
||||
from io import BytesIO
|
||||
|
||||
from fastapi import APIRouter, Depends
|
||||
from backend.app.dependencies import validate_token
|
||||
|
|
@ -15,53 +13,6 @@ from backend.app.db.connection import db_engine
|
|||
|
||||
from backend.app.db.functions.recommendations_functions import create_scenario
|
||||
|
||||
|
||||
def read_excel_from_s3(bucket_name, file_key, header_row=0, drop_all_na=True, sheet_name=None):
|
||||
"""
|
||||
Reads an Excel file from S3 and returns it as a list of dictionaries.
|
||||
|
||||
:param bucket_name: Name of the S3 bucket.
|
||||
:param file_key: S3 key/path to the file.
|
||||
:param header_row: Row number (0-indexed) to use as header.
|
||||
:param drop_all_na: If True, drop columns where all values are None.
|
||||
:param sheet_name: Name of the worksheet to read. Defaults to the first.
|
||||
:return: List of dicts, one per row.
|
||||
"""
|
||||
s3 = boto3.client("s3")
|
||||
response = s3.get_object(Bucket=bucket_name, Key=file_key)
|
||||
excel_buffer = BytesIO(response["Body"].read())
|
||||
|
||||
wb = load_workbook(filename=excel_buffer, data_only=True)
|
||||
ws = wb[sheet_name] if sheet_name else wb.active
|
||||
|
||||
rows = list(ws.iter_rows(values_only=True))
|
||||
if len(rows) <= header_row:
|
||||
raise ValueError("Header row index is out of range.")
|
||||
|
||||
headers = [str(h).strip() if h is not None else f"__col_{i}" for i, h in enumerate(rows[header_row])]
|
||||
data_rows = rows[header_row + 1:]
|
||||
|
||||
# Drop columns where all values are None if required
|
||||
if drop_all_na:
|
||||
# Transpose rows to get columns
|
||||
col_data = list(zip(*data_rows))
|
||||
keep_indices = [i for i, col in enumerate(col_data) if not all(v is None for v in col)]
|
||||
headers = [h for i, h in enumerate(headers) if i in keep_indices]
|
||||
data_rows = [
|
||||
[row[i] for i in keep_indices]
|
||||
for row in data_rows
|
||||
]
|
||||
|
||||
# Create list of dicts
|
||||
result = [
|
||||
{headers[i]: cell for i, cell in enumerate(row)}
|
||||
for row in data_rows
|
||||
if any(cell is not None for cell in row) # skip fully empty rows
|
||||
]
|
||||
|
||||
return result
|
||||
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
router = APIRouter(
|
||||
|
|
@ -93,14 +44,7 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest):
|
|||
if data.get("file_format") == "domna_asset_list" and data.get("file_type") == "xlsx":
|
||||
try:
|
||||
|
||||
input_data = read_excel_from_s3(
|
||||
bucket_name=settings.PLAN_TRIGGER_BUCKET,
|
||||
file_key=data.get("trigger_file_path"),
|
||||
sheet_name=data.get("sheet_name"),
|
||||
header_row=0,
|
||||
)
|
||||
|
||||
total_rows = len(input_data)
|
||||
total_rows = body.get("sheet_count", 0)
|
||||
chunk_size = 30
|
||||
total_chunks = math.ceil(total_rows / chunk_size)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue