mirror of
https://github.com/Hestia-Homes/ML.git
synced 2026-06-08 11:17:25 +00:00
add identifier column to datasets
This commit is contained in:
parent
6b7171adc0
commit
961773f58a
2 changed files with 14 additions and 6 deletions
|
|
@ -6,7 +6,7 @@ Once we have the features, we build a model
|
|||
import os
|
||||
import yaml
|
||||
import pandas as pd
|
||||
from typing import Union
|
||||
from typing import Union, List
|
||||
from pathlib import Path
|
||||
from core.Logger import logger
|
||||
from core.interface.InterfaceMetrics import MLMetrics
|
||||
|
|
@ -31,6 +31,9 @@ generate_metrics_params = settings.generate_metrics
|
|||
|
||||
model_type = build_model_params["model_type"]
|
||||
target = feature_process_params["feature_processor_config"]["target"]
|
||||
identifier_columns = feature_process_params["feature_processor_config"][
|
||||
"identifier_columns"
|
||||
]
|
||||
model_save_location = build_model_params["model_save_filepath"]
|
||||
model_hyperparameters = build_model_params[model_type]
|
||||
train_filepath = prepare_data_params["output_train_filepath"]
|
||||
|
|
@ -62,6 +65,7 @@ def build_model(
|
|||
model: MLModel,
|
||||
metrics: MLMetrics,
|
||||
target: str,
|
||||
identifier_columns: List[str],
|
||||
model_save_location: str,
|
||||
model_hyperparameters: dict,
|
||||
fit_metrics_filepath: str,
|
||||
|
|
@ -89,18 +93,20 @@ def build_model(
|
|||
logger.info("--- Training model ---")
|
||||
logger.info("----------------------")
|
||||
|
||||
model_train_data = train_data.drop(columns=identifier_columns)
|
||||
|
||||
model.train_model(
|
||||
data=train_data, target=target, model_hyperparameters=model_hyperparameters
|
||||
data=model_train_data,
|
||||
target=target,
|
||||
model_hyperparameters=model_hyperparameters,
|
||||
)
|
||||
|
||||
logger.info("----------------------------------")
|
||||
logger.info("--- Generating fit predictions ---")
|
||||
logger.info("----------------------------------")
|
||||
|
||||
prediction_data = train_data.drop(columns=target)
|
||||
|
||||
fit_predictions = model.predict(
|
||||
data=prediction_data, post_prediction_logic=post_prediction_logic
|
||||
data=train_data, post_prediction_logic=post_prediction_logic
|
||||
)
|
||||
|
||||
logger.info("------------------------------")
|
||||
|
|
@ -142,6 +148,7 @@ if __name__ == "__main__":
|
|||
model=model,
|
||||
metrics=metrics,
|
||||
target=target,
|
||||
identifier_columns=identifier_columns,
|
||||
model_save_location=model_save_location,
|
||||
model_hyperparameters=model_hyperparameters,
|
||||
train_filepath=train_filepath,
|
||||
|
|
|
|||
|
|
@ -32,7 +32,8 @@ default:
|
|||
subsample_amount: null
|
||||
subsample_seed: 0
|
||||
target: SAP_ENDING
|
||||
drop_columns: ["UPRN", "HEAT_DEMAND_CHANGE", "CARBON_CHANGE", "RDSAP_CHANGE", "HEAT_DEMAND_ENDING", "CARBON_ENDING"]
|
||||
identifier_columns: ["UPRN"]
|
||||
drop_columns: ["HEAT_DEMAND_CHANGE", "CARBON_CHANGE", "RDSAP_CHANGE", "HEAT_DEMAND_ENDING", "CARBON_ENDING"]
|
||||
# retain_features: ["SAP_STARTING", "TOTAL_FLOOR_AREA_DIFF"]
|
||||
retain_features: null
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue