modified lambda job to have dynamic ip and file_uri

This commit is contained in:
Jun-te kim 2025-08-20 10:51:43 +00:00
parent b790d650b0
commit c000ce4704

View file

@ -166,27 +166,54 @@ def upload_json_to_s3(json_obj, dest_uri: str) -> str:
# Return an HTTPS-style S3 URL (matches your input style)
return f"https://{bucket}.s3.{aws_region}.amazonaws.com/{json_key}"
def get_file_uri(id):
with get_db_session() as session:
obj = (
session
.query(uploaded_files)
.filter(uploaded_files.id == id)
.first()
)
if obj is None:
raise RuntimeError(f"Failed to find uploaded_files record with id {id}")
return obj.s3_file_uri
def handler(event, context):
try:
id_ = "aa0a22aa-2e08-40b9-8309-fa6df1efffbc"
file_uri = "https://retrofit-energy-assessments-dev.s3.eu-west-2.amazonaws.com/documents/10034911080/osmosis_condition_pas_2035_report/20250819_123831.pdf"
local_path = download_private_s3_file(file_uri)
print(f"File saved to: {local_path}")
reader = pdfReaderToText(local_path)
print(reader.text_list)
obj = WarmHomesConditionReport(reader.text_list)
json_ = make_final_json(obj.master_obj[0], obj.master_obj[1])
from pprint import pprint
pprint(json_)
json_uri = upload_json_to_s3(json_, file_uri)
init_db()
with get_db_session() as session:
update_uploaded_file_json_uri_by_query(
session,
id_,
json_uri,
)
for r in event.get("Records", []):
body = json.loads(r["body"])
id_ = body.get("id")
if not id_: # covers None or empty string
raise ValueError(f"❌ Missing 'id' in SQS body: {body}")
print(f"Retrieving file uri with id {id_}")
file_uri = get_file_uri(id_)
print(f"Retrieved file uri with {file_uri}")
print("Downloading file locally for extraction...")
local_path = download_private_s3_file(file_uri)
print("Extracting file...")
reader = pdfReaderToText(local_path)
obj = WarmHomesConditionReport(reader.text_list)
json_ = make_final_json(obj.master_obj[0], obj.master_obj[1])
print("Extracted completed, made json")
print("uploading json to s3 bucket...")
json_uri = upload_json_to_s3(json_, file_uri)
print("Updating Database with json_uri")
with get_db_session() as session:
update_uploaded_file_json_uri_by_query(
session,
id_,
json_uri,
)
print("job completed successfully")
except Exception as e:
print(f"❌ Error: {e}")
return {