import msgpack from etl.bill_savings.KwhData import KwhData from utils.s3 import read_from_s3 def app(): """ Given the files written in our datalake in s3, this application will collate the data into a single file and store it back in s3 for analysis :return: """ cleaned = read_from_s3( s3_file_name="cleaned_epc_data/cleaned.bson", bucket_name="retrofit-data-dev" ) cleaned = msgpack.unpackb(cleaned, raw=False) # If there is any problematic data, it could be: # s3://retrofit-datalake-dev/energy_consumption_data/2024-08-10 18:48:06.866647.pkl kwh_data_client = KwhData(bucket="retrofit-datalake-dev") kwh_data_client.combine() kwh_data_client.transform(data=kwh_data_client.data, cleaned=cleaned, save=True)