mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
add requirements for pyarrow, add timestemp infromation to dataset for loose version control
This commit is contained in:
parent
fe40d1b1ec
commit
0ea8a40143
3 changed files with 18 additions and 9 deletions
|
|
@ -1,5 +1,6 @@
|
|||
import msgpack
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
|
||||
from typing import List
|
||||
from pathlib import Path
|
||||
|
|
@ -82,9 +83,9 @@ class EPCPipeline:
|
|||
run_mode="training",
|
||||
epc_local_file="certificates.csv",
|
||||
epc_bucket_name="retrofit-data-dev",
|
||||
epc_cleaning_dataset_key="sap_change_model/cleaning_dataset_rooms.parquet",
|
||||
epc_all_equal_rows_key="sap_change_model/all_equal_rows_rooms.parquet",
|
||||
epc_compiled_dataset_key="sap_change_model/dataset_rooms.parquet",
|
||||
epc_cleaning_dataset_key="sap_change_model/{}/cleaning_dataset_rooms.parquet",
|
||||
epc_all_equal_rows_key="sap_change_model/{}/all_equal_rows_rooms.parquet",
|
||||
epc_compiled_dataset_key="sap_change_model/{}/dataset_rooms.parquet",
|
||||
use_parallel=False,
|
||||
):
|
||||
"""
|
||||
|
|
@ -107,10 +108,13 @@ class EPCPipeline:
|
|||
self.run_mode = run_mode
|
||||
self.epc_local_file = epc_local_file
|
||||
self.epc_bucket_name = epc_bucket_name
|
||||
self.epc_cleaning_dataset_key = epc_cleaning_dataset_key
|
||||
self.epc_all_equal_rows_key = epc_all_equal_rows_key
|
||||
self.epc_compiled_dataset_key = epc_compiled_dataset_key
|
||||
|
||||
self.use_parallel = use_parallel
|
||||
self.timeprefix = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
|
||||
|
||||
self.epc_cleaning_dataset_key = epc_cleaning_dataset_key.format(self.timeprefix)
|
||||
self.epc_all_equal_rows_key = epc_all_equal_rows_key.format(self.timeprefix)
|
||||
self.epc_compiled_dataset_key = epc_compiled_dataset_key.format(self.timeprefix)
|
||||
|
||||
def run(self):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -20,6 +20,10 @@ from recommendations.Recommendations import Recommendations
|
|||
from utils.logger import setup_logger
|
||||
from utils.s3 import read_dataframe_from_s3_parquet, save_dataframe_to_s3_parquet
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
now = datetime.now().strftime("%d-%m-%Y-%H-%M-%S")
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
logger.info("Connecting to db")
|
||||
|
|
@ -132,7 +136,7 @@ for scenario_property in scenario_properties:
|
|||
p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
|
||||
|
||||
recommender = Recommendations(property_instance=p, materials=materials)
|
||||
property_recommendations = recommender.recommend()
|
||||
property_recommendations = recommender.recommend("0")
|
||||
|
||||
wall_recommendations = recommender.wall_recomender.recommendations
|
||||
loft_recommendations = recommender.roof_recommender.recommendations
|
||||
|
|
@ -247,5 +251,5 @@ all_predictions = model_api.predict_all(
|
|||
save_dataframe_to_s3_parquet(
|
||||
recommendations_scoring_data,
|
||||
"retrofit-data-dev",
|
||||
"scenario_data/recommendations_scoring_data.parquet",
|
||||
f"scenario_data/{now}/recommendations_scoring_data.parquet",
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
pandas==2.1.3
|
||||
tqdm==4.66.1
|
||||
msgpack==1.0.7
|
||||
boto3==1.29.6
|
||||
boto3==1.29.6
|
||||
pyarrow==15.0.2
|
||||
Loading…
Add table
Reference in a new issue