add requirements for pyarrow, add timestemp infromation to dataset for loose version control

This commit is contained in:
Michael Duong 2024-03-22 19:21:21 +00:00
parent fe40d1b1ec
commit 0ea8a40143
3 changed files with 18 additions and 9 deletions

View file

@ -1,5 +1,6 @@
import msgpack
import pandas as pd
from datetime import datetime
from typing import List
from pathlib import Path
@ -82,9 +83,9 @@ class EPCPipeline:
run_mode="training",
epc_local_file="certificates.csv",
epc_bucket_name="retrofit-data-dev",
epc_cleaning_dataset_key="sap_change_model/cleaning_dataset_rooms.parquet",
epc_all_equal_rows_key="sap_change_model/all_equal_rows_rooms.parquet",
epc_compiled_dataset_key="sap_change_model/dataset_rooms.parquet",
epc_cleaning_dataset_key="sap_change_model/{}/cleaning_dataset_rooms.parquet",
epc_all_equal_rows_key="sap_change_model/{}/all_equal_rows_rooms.parquet",
epc_compiled_dataset_key="sap_change_model/{}/dataset_rooms.parquet",
use_parallel=False,
):
"""
@ -107,10 +108,13 @@ class EPCPipeline:
self.run_mode = run_mode
self.epc_local_file = epc_local_file
self.epc_bucket_name = epc_bucket_name
self.epc_cleaning_dataset_key = epc_cleaning_dataset_key
self.epc_all_equal_rows_key = epc_all_equal_rows_key
self.epc_compiled_dataset_key = epc_compiled_dataset_key
self.use_parallel = use_parallel
self.timeprefix = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
self.epc_cleaning_dataset_key = epc_cleaning_dataset_key.format(self.timeprefix)
self.epc_all_equal_rows_key = epc_all_equal_rows_key.format(self.timeprefix)
self.epc_compiled_dataset_key = epc_compiled_dataset_key.format(self.timeprefix)
def run(self):
"""

View file

@ -20,6 +20,10 @@ from recommendations.Recommendations import Recommendations
from utils.logger import setup_logger
from utils.s3 import read_dataframe_from_s3_parquet, save_dataframe_to_s3_parquet
from datetime import datetime
now = datetime.now().strftime("%d-%m-%Y-%H-%M-%S")
logger = setup_logger()
logger.info("Connecting to db")
@ -132,7 +136,7 @@ for scenario_property in scenario_properties:
p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
recommender = Recommendations(property_instance=p, materials=materials)
property_recommendations = recommender.recommend()
property_recommendations = recommender.recommend("0")
wall_recommendations = recommender.wall_recomender.recommendations
loft_recommendations = recommender.roof_recommender.recommendations
@ -247,5 +251,5 @@ all_predictions = model_api.predict_all(
save_dataframe_to_s3_parquet(
recommendations_scoring_data,
"retrofit-data-dev",
"scenario_data/recommendations_scoring_data.parquet",
f"scenario_data/{now}/recommendations_scoring_data.parquet",
)

View file

@ -1,4 +1,5 @@
pandas==2.1.3
tqdm==4.66.1
msgpack==1.0.7
boto3==1.29.6
boto3==1.29.6
pyarrow==15.0.2