diff --git a/deployment/lambda/walthamforest_etl/docker/app.py b/deployment/lambda/walthamforest_etl/docker/app.py index 3a4a625..2f82c66 100644 --- a/deployment/lambda/walthamforest_etl/docker/app.py +++ b/deployment/lambda/walthamforest_etl/docker/app.py @@ -1,2 +1,48 @@ +import pandas as pd +import json +from pprint import pprint + def handler(event, context): - print("hello world") \ No newline at end of file + # read data for houses only + df = pd.read_excel("../../home/Downloads/data.xlsx", sheet_name="Houses Asset Data") + + element_cols = [ + "ELEMENT GROUP", "ELEMENT CODE", "ELEMENT CODE DESCRIPTION", + "ATTRIBUTE CODE", "ATTRIBUTE CODE DESCRIPTION", + "ELEMENT DATE VALUE", "ELEMENT NUMERIC VALUE", + "ELEMENT TEXT VALUE", "QUANTITY", + "INSTALL DATE", "REMAINING LIFE", "ELEMENT COMMENTS" + ] + + property_cols = [ + "PROP REF", "Domna", "ADDRESS", "OWNERSHIP", + "PROP STATUS", "PROP TYPE", "PROP SUB TYPE" + ] + + # Group by ADDRESS (and other identifiers if needed) + result = ( + df.groupby(["ADDRESS"]) + .apply(lambda g: { + "property_info": g[property_cols].drop_duplicates().iloc[0].to_dict(), + "elements_info": [ + { + "ELEMENT GROUP": eg_name, + "elements": eg_df.drop(columns=["ELEMENT GROUP"]).to_dict(orient="records") + } + for eg_name, eg_df in g[element_cols].groupby("ELEMENT GROUP") + ] + }) + .reset_index() + .rename(columns={0: "data"}) + ) + + # Convert to list of dicts + records = [] + for _, row in result.iterrows(): + records.append({ + "ADDRESS": row["ADDRESS"], + **row["data"] + }) + + json_output = json.dumps(records, ensure_ascii=False, default=str) + pprint(json_output) diff --git a/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf b/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf index 2ac9b38..2780add 100644 --- a/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf +++ b/deployment/lambda/walthamforest_etl/walthamforest_etl_lambda.tf @@ -73,7 +73,7 @@ resource "aws_lambda_function" "waltham_forest_etl" { } # SQS trigger -resource "aws_lambda_event_source_mapping" "extractor_and_loader_trigger" { +resource "aws_lambda_event_source_mapping" "walthamforest_etl_trigger" { event_source_arn = aws_sqs_queue.walthamforest_etl_queue.arn function_name = aws_lambda_function.walthamforest_etl.arn batch_size = 1