mirror of
https://github.com/Hestia-Homes/survey-extraction.git
synced 2026-06-08 11:17:29 +00:00
50 lines
1.6 KiB
Python
50 lines
1.6 KiB
Python
import pandas as pd
|
|
import json
|
|
from pprint import pprint
|
|
|
|
def handler(event, context):
|
|
# read data for houses only
|
|
print("waltham forest set up correctly")
|
|
return None
|
|
df = pd.read_excel("../../home/Downloads/data.xlsx", sheet_name="Houses Asset Data")
|
|
|
|
element_cols = [
|
|
"ELEMENT GROUP", "ELEMENT CODE", "ELEMENT CODE DESCRIPTION",
|
|
"ATTRIBUTE CODE", "ATTRIBUTE CODE DESCRIPTION",
|
|
"ELEMENT DATE VALUE", "ELEMENT NUMERIC VALUE",
|
|
"ELEMENT TEXT VALUE", "QUANTITY",
|
|
"INSTALL DATE", "REMAINING LIFE", "ELEMENT COMMENTS"
|
|
]
|
|
|
|
property_cols = [
|
|
"PROP REF", "Domna", "ADDRESS", "OWNERSHIP",
|
|
"PROP STATUS", "PROP TYPE", "PROP SUB TYPE"
|
|
]
|
|
|
|
# Group by ADDRESS (and other identifiers if needed)
|
|
result = (
|
|
df.groupby(["ADDRESS"])
|
|
.apply(lambda g: {
|
|
"property_info": g[property_cols].drop_duplicates().iloc[0].to_dict(),
|
|
"elements_info": [
|
|
{
|
|
"ELEMENT GROUP": eg_name,
|
|
"elements": eg_df.drop(columns=["ELEMENT GROUP"]).to_dict(orient="records")
|
|
}
|
|
for eg_name, eg_df in g[element_cols].groupby("ELEMENT GROUP")
|
|
]
|
|
})
|
|
.reset_index()
|
|
.rename(columns={0: "data"})
|
|
)
|
|
|
|
# Convert to list of dicts
|
|
records = []
|
|
for _, row in result.iterrows():
|
|
records.append({
|
|
"ADDRESS": row["ADDRESS"],
|
|
**row["data"]
|
|
})
|
|
|
|
json_output = json.dumps(records, ensure_ascii=False, default=str)
|
|
pprint(json_output)
|