add identifier column to datasets

This commit is contained in:
Michael Duong 2023-10-03 22:29:55 +00:00
parent 6b7171adc0
commit 961773f58a
2 changed files with 14 additions and 6 deletions

View file

@ -6,7 +6,7 @@ Once we have the features, we build a model
import os
import yaml
import pandas as pd
from typing import Union
from typing import Union, List
from pathlib import Path
from core.Logger import logger
from core.interface.InterfaceMetrics import MLMetrics
@ -31,6 +31,9 @@ generate_metrics_params = settings.generate_metrics
model_type = build_model_params["model_type"]
target = feature_process_params["feature_processor_config"]["target"]
identifier_columns = feature_process_params["feature_processor_config"][
"identifier_columns"
]
model_save_location = build_model_params["model_save_filepath"]
model_hyperparameters = build_model_params[model_type]
train_filepath = prepare_data_params["output_train_filepath"]
@ -62,6 +65,7 @@ def build_model(
model: MLModel,
metrics: MLMetrics,
target: str,
identifier_columns: List[str],
model_save_location: str,
model_hyperparameters: dict,
fit_metrics_filepath: str,
@ -89,18 +93,20 @@ def build_model(
logger.info("--- Training model ---")
logger.info("----------------------")
model_train_data = train_data.drop(columns=identifier_columns)
model.train_model(
data=train_data, target=target, model_hyperparameters=model_hyperparameters
data=model_train_data,
target=target,
model_hyperparameters=model_hyperparameters,
)
logger.info("----------------------------------")
logger.info("--- Generating fit predictions ---")
logger.info("----------------------------------")
prediction_data = train_data.drop(columns=target)
fit_predictions = model.predict(
data=prediction_data, post_prediction_logic=post_prediction_logic
data=train_data, post_prediction_logic=post_prediction_logic
)
logger.info("------------------------------")
@ -142,6 +148,7 @@ if __name__ == "__main__":
model=model,
metrics=metrics,
target=target,
identifier_columns=identifier_columns,
model_save_location=model_save_location,
model_hyperparameters=model_hyperparameters,
train_filepath=train_filepath,

View file

@ -32,7 +32,8 @@ default:
subsample_amount: null
subsample_seed: 0
target: SAP_ENDING
drop_columns: ["UPRN", "HEAT_DEMAND_CHANGE", "CARBON_CHANGE", "RDSAP_CHANGE", "HEAT_DEMAND_ENDING", "CARBON_ENDING"]
identifier_columns: ["UPRN"]
drop_columns: ["HEAT_DEMAND_CHANGE", "CARBON_CHANGE", "RDSAP_CHANGE", "HEAT_DEMAND_ENDING", "CARBON_ENDING"]
# retain_features: ["SAP_STARTING", "TOTAL_FLOOR_AREA_DIFF"]
retain_features: null