From c000ce4704322e33ee398dcf93d8388d2cf167c5 Mon Sep 17 00:00:00 2001 From: Jun-te kim Date: Wed, 20 Aug 2025 10:51:43 +0000 Subject: [PATCH] modified lambda job to have dynamic ip and file_uri --- .../lambda/extractor_and_loader/docker/app.py | 63 +++++++++++++------ 1 file changed, 45 insertions(+), 18 deletions(-) diff --git a/deployment/lambda/extractor_and_loader/docker/app.py b/deployment/lambda/extractor_and_loader/docker/app.py index 087896b..36cda55 100644 --- a/deployment/lambda/extractor_and_loader/docker/app.py +++ b/deployment/lambda/extractor_and_loader/docker/app.py @@ -166,27 +166,54 @@ def upload_json_to_s3(json_obj, dest_uri: str) -> str: # Return an HTTPS-style S3 URL (matches your input style) return f"https://{bucket}.s3.{aws_region}.amazonaws.com/{json_key}" +def get_file_uri(id): + with get_db_session() as session: + obj = ( + session + .query(uploaded_files) + .filter(uploaded_files.id == id) + .first() + ) + if obj is None: + raise RuntimeError(f"Failed to find uploaded_files record with id {id}") + + return obj.s3_file_uri + + def handler(event, context): try: - id_ = "aa0a22aa-2e08-40b9-8309-fa6df1efffbc" - file_uri = "https://retrofit-energy-assessments-dev.s3.eu-west-2.amazonaws.com/documents/10034911080/osmosis_condition_pas_2035_report/20250819_123831.pdf" - - local_path = download_private_s3_file(file_uri) - print(f"File saved to: {local_path}") - reader = pdfReaderToText(local_path) - print(reader.text_list) - obj = WarmHomesConditionReport(reader.text_list) - json_ = make_final_json(obj.master_obj[0], obj.master_obj[1]) - from pprint import pprint - pprint(json_) - json_uri = upload_json_to_s3(json_, file_uri) init_db() - with get_db_session() as session: - update_uploaded_file_json_uri_by_query( - session, - id_, - json_uri, - ) + for r in event.get("Records", []): + body = json.loads(r["body"]) + id_ = body.get("id") + if not id_: # covers None or empty string + raise ValueError(f"❌ Missing 'id' in SQS body: {body}") + + print(f"Retrieving file uri with id {id_}") + file_uri = get_file_uri(id_) + print(f"Retrieved file uri with {file_uri}") + + print("Downloading file locally for extraction...") + local_path = download_private_s3_file(file_uri) + + print("Extracting file...") + reader = pdfReaderToText(local_path) + obj = WarmHomesConditionReport(reader.text_list) + json_ = make_final_json(obj.master_obj[0], obj.master_obj[1]) + print("Extracted completed, made json") + + print("uploading json to s3 bucket...") + json_uri = upload_json_to_s3(json_, file_uri) + + print("Updating Database with json_uri") + with get_db_session() as session: + update_uploaded_file_json_uri_by_query( + session, + id_, + json_uri, + ) + print("job completed successfully") + except Exception as e: print(f"❌ Error: {e}") return {