diff --git a/etl/epc/Pipeline.py b/etl/epc/Pipeline.py index c678c830..6abf05bd 100644 --- a/etl/epc/Pipeline.py +++ b/etl/epc/Pipeline.py @@ -1,5 +1,6 @@ import msgpack import pandas as pd +from datetime import datetime from typing import List from pathlib import Path @@ -82,9 +83,9 @@ class EPCPipeline: run_mode="training", epc_local_file="certificates.csv", epc_bucket_name="retrofit-data-dev", - epc_cleaning_dataset_key="sap_change_model/cleaning_dataset_rooms.parquet", - epc_all_equal_rows_key="sap_change_model/all_equal_rows_rooms.parquet", - epc_compiled_dataset_key="sap_change_model/dataset_rooms.parquet", + epc_cleaning_dataset_key="sap_change_model/{}/cleaning_dataset_rooms.parquet", + epc_all_equal_rows_key="sap_change_model/{}/all_equal_rows_rooms.parquet", + epc_compiled_dataset_key="sap_change_model/{}/dataset_rooms.parquet", use_parallel=False, ): """ @@ -107,10 +108,13 @@ class EPCPipeline: self.run_mode = run_mode self.epc_local_file = epc_local_file self.epc_bucket_name = epc_bucket_name - self.epc_cleaning_dataset_key = epc_cleaning_dataset_key - self.epc_all_equal_rows_key = epc_all_equal_rows_key - self.epc_compiled_dataset_key = epc_compiled_dataset_key + self.use_parallel = use_parallel + self.timeprefix = datetime.now().strftime("%Y-%m-%d-%H-%M-%S") + + self.epc_cleaning_dataset_key = epc_cleaning_dataset_key.format(self.timeprefix) + self.epc_all_equal_rows_key = epc_all_equal_rows_key.format(self.timeprefix) + self.epc_compiled_dataset_key = epc_compiled_dataset_key.format(self.timeprefix) def run(self): """ diff --git a/etl/epc/generate_scenarios_data.py b/etl/epc/generate_scenarios_data.py index 172e8a27..d5bece8b 100644 --- a/etl/epc/generate_scenarios_data.py +++ b/etl/epc/generate_scenarios_data.py @@ -20,6 +20,10 @@ from recommendations.Recommendations import Recommendations from utils.logger import setup_logger from utils.s3 import read_dataframe_from_s3_parquet, save_dataframe_to_s3_parquet +from datetime import datetime + +now = datetime.now().strftime("%d-%m-%Y-%H-%M-%S") + logger = setup_logger() logger.info("Connecting to db") @@ -132,7 +136,7 @@ for scenario_property in scenario_properties: p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds) recommender = Recommendations(property_instance=p, materials=materials) - property_recommendations = recommender.recommend() + property_recommendations = recommender.recommend("0") wall_recommendations = recommender.wall_recomender.recommendations loft_recommendations = recommender.roof_recommender.recommendations @@ -247,5 +251,5 @@ all_predictions = model_api.predict_all( save_dataframe_to_s3_parquet( recommendations_scoring_data, "retrofit-data-dev", - "scenario_data/recommendations_scoring_data.parquet", + f"scenario_data/{now}/recommendations_scoring_data.parquet", ) diff --git a/etl/epc/requirements.txt b/etl/epc/requirements.txt index 9f972bde..87148180 100644 --- a/etl/epc/requirements.txt +++ b/etl/epc/requirements.txt @@ -1,4 +1,5 @@ pandas==2.1.3 tqdm==4.66.1 msgpack==1.0.7 -boto3==1.29.6 \ No newline at end of file +boto3==1.29.6 +pyarrow==15.0.2 \ No newline at end of file