mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
24 lines
785 B
Python
24 lines
785 B
Python
import msgpack
|
|
from etl.bill_savings.KwhData import KwhData
|
|
from utils.s3 import read_from_s3
|
|
|
|
|
|
def app():
|
|
"""
|
|
Given the files written in our datalake in s3, this application will collate the data into a single file
|
|
and store it back in s3 for analysis
|
|
:return:
|
|
"""
|
|
|
|
cleaned = read_from_s3(
|
|
s3_file_name="cleaned_epc_data/cleaned.bson",
|
|
bucket_name="retrofit-data-dev"
|
|
)
|
|
|
|
cleaned = msgpack.unpackb(cleaned, raw=False)
|
|
|
|
# If there is any problematic data, it could be:
|
|
# s3://retrofit-datalake-dev/energy_consumption_data/2024-08-10 18:48:06.866647.pkl
|
|
kwh_data_client = KwhData(bucket="retrofit-datalake-dev")
|
|
kwh_data_client.combine()
|
|
kwh_data_client.transform(data=kwh_data_client.data, cleaned=cleaned, save=True)
|