From 1db02fc4f49c5b4a72ea4054cbf424f0ae2477fa Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 10 Oct 2023 13:29:05 +0800 Subject: [PATCH] debugging prediction process in backend --- backend/app/plan/router.py | 19 ++++++++++--------- backend/ml_models/sap_change_model/api.py | 2 +- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 05377bec..d7af58ac 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -183,9 +183,13 @@ async def trigger_plan(body: PlanTriggerRequest): logger.info("Preparing data for scoring in sap change api") recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data) - # Merge the cleaning data onto recommendations_scoring_data - # Perform the same cleaning as in the model + recommendations_scoring_data = DataProcessor.apply_averages_cleaning( + data_to_clean=recommendations_scoring_data, + cleaning_data=cleaning_data, + cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"] + ).drop(columns=["LOCAL_AUTHORITY"]) + recommendations_scoring_data = DataProcessor.clean_missings_after_description_process( recommendations_scoring_data, [ c for c in recommendations_scoring_data.columns if @@ -193,12 +197,6 @@ async def trigger_plan(body: PlanTriggerRequest): ] ) - recommendations_scoring_data = DataProcessor.apply_averages_cleaning( - data_to_clean=recommendations_scoring_data, - cleaning_data=cleaning_data, - cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"] - ).drop(columns=["LOCAL_AUTHORITY"]) - sap_change_model_api = SAPChangeModelAPI(portfolio_id=body.portfolio_id, timestamp=created_at) file_location = sap_change_model_api.upload_scoring_data( df=recommendations_scoring_data, bucket=get_settings().DATA_BUCKET @@ -209,7 +207,10 @@ async def trigger_plan(body: PlanTriggerRequest): # Retrieve the predictions predictions = pd.DataFrame( - read_csv_from_s3(bucket_name=get_settings().PREDICTIONS_BUCKET, filepath=response["storage_filepath"]) + read_csv_from_s3( + bucket_name=get_settings().PREDICTIONS_BUCKET, + filepath=response["storage_filepath"].split(get_settings().PREDICTIONS_BUCKET + "/")[1] + ) ) predictions["RDSAP_CHANGE"] = predictions["RDSAP_CHANGE"].astype(float).round(1) diff --git a/backend/ml_models/sap_change_model/api.py b/backend/ml_models/sap_change_model/api.py index fd15ccd1..0db5a05d 100644 --- a/backend/ml_models/sap_change_model/api.py +++ b/backend/ml_models/sap_change_model/api.py @@ -62,7 +62,7 @@ class SAPChangeModelAPI: logger.info("Making request to sap change api") url = f"{self.base_url}/sapmodel/predict" payload = { - "file_location": f"s3://retrofit-data-dev/{file_location}", + "file_location": file_location, "property_id": "", # This should get removed "portfolio_id": self.portfolio_id, "created_at": self.timestamp