mirror of
https://github.com/Hestia-Homes/survey-extraction.git
synced 2026-06-30 13:10:56 +00:00
modified lambda job to have dynamic ip and file_uri
This commit is contained in:
parent
b790d650b0
commit
c000ce4704
1 changed files with 45 additions and 18 deletions
|
|
@ -166,27 +166,54 @@ def upload_json_to_s3(json_obj, dest_uri: str) -> str:
|
|||
# Return an HTTPS-style S3 URL (matches your input style)
|
||||
return f"https://{bucket}.s3.{aws_region}.amazonaws.com/{json_key}"
|
||||
|
||||
def get_file_uri(id):
|
||||
with get_db_session() as session:
|
||||
obj = (
|
||||
session
|
||||
.query(uploaded_files)
|
||||
.filter(uploaded_files.id == id)
|
||||
.first()
|
||||
)
|
||||
if obj is None:
|
||||
raise RuntimeError(f"Failed to find uploaded_files record with id {id}")
|
||||
|
||||
return obj.s3_file_uri
|
||||
|
||||
|
||||
def handler(event, context):
|
||||
try:
|
||||
id_ = "aa0a22aa-2e08-40b9-8309-fa6df1efffbc"
|
||||
file_uri = "https://retrofit-energy-assessments-dev.s3.eu-west-2.amazonaws.com/documents/10034911080/osmosis_condition_pas_2035_report/20250819_123831.pdf"
|
||||
|
||||
local_path = download_private_s3_file(file_uri)
|
||||
print(f"File saved to: {local_path}")
|
||||
reader = pdfReaderToText(local_path)
|
||||
print(reader.text_list)
|
||||
obj = WarmHomesConditionReport(reader.text_list)
|
||||
json_ = make_final_json(obj.master_obj[0], obj.master_obj[1])
|
||||
from pprint import pprint
|
||||
pprint(json_)
|
||||
json_uri = upload_json_to_s3(json_, file_uri)
|
||||
init_db()
|
||||
with get_db_session() as session:
|
||||
update_uploaded_file_json_uri_by_query(
|
||||
session,
|
||||
id_,
|
||||
json_uri,
|
||||
)
|
||||
for r in event.get("Records", []):
|
||||
body = json.loads(r["body"])
|
||||
id_ = body.get("id")
|
||||
if not id_: # covers None or empty string
|
||||
raise ValueError(f"❌ Missing 'id' in SQS body: {body}")
|
||||
|
||||
print(f"Retrieving file uri with id {id_}")
|
||||
file_uri = get_file_uri(id_)
|
||||
print(f"Retrieved file uri with {file_uri}")
|
||||
|
||||
print("Downloading file locally for extraction...")
|
||||
local_path = download_private_s3_file(file_uri)
|
||||
|
||||
print("Extracting file...")
|
||||
reader = pdfReaderToText(local_path)
|
||||
obj = WarmHomesConditionReport(reader.text_list)
|
||||
json_ = make_final_json(obj.master_obj[0], obj.master_obj[1])
|
||||
print("Extracted completed, made json")
|
||||
|
||||
print("uploading json to s3 bucket...")
|
||||
json_uri = upload_json_to_s3(json_, file_uri)
|
||||
|
||||
print("Updating Database with json_uri")
|
||||
with get_db_session() as session:
|
||||
update_uploaded_file_json_uri_by_query(
|
||||
session,
|
||||
id_,
|
||||
json_uri,
|
||||
)
|
||||
print("job completed successfully")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error: {e}")
|
||||
return {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue