ML/modules/ml-pipeline/src/training/core/DataClient.py
2023-09-08 18:57:06 +01:00

66 lines
2 KiB
Python

"""
Implementations of the DataClient Protocol
"""
import pandas as pd
from typing import List
from core.interface.InterfaceDataClient import DataClient
def dataclient_factory(dataclient_type: str) -> DataClient:
"""
Determine which dataclient to use
"""
dataclients = {
"minio": MinioClient(),
# ADD MORE DATACLIENTS HERE
}
if dataclient_type not in dataclients:
raise ValueError("Dataclient type specified is not in factory")
return dataclients[dataclient_type]
def validate_dict_keys(keys_1: List[str], keys_2: List[str], config_type: str):
if not set(keys_1).issubset(keys_2):
raise ValueError(f"Incorrect {config_type} keys specified")
class MinioClient:
"""
Using the Minio s3 client, to do local testing
"""
ACCEPTED_CONFIG_KEYS = ["aws_access_key_id", "aws_secret_access_key", "endpoint_url"]
ACCEPTED_LOAD_CONFIG_KEYS = []
ACCEPTED_SAVE_CONFIG_KEYS = []
def ingest_configurations(self, config: dict) -> None:
"""
Load all configuration into the instance (self.config)
"""
validate_dict_keys(keys_1=list(config.keys()), keys_2=self.ACCEPTED_CONFIG_KEYS, config_type="config")
self.config = config
def establish_client(self) -> None:
"""
With the given configurations, create the connection to the client (self.client)
"""
...
def load_data(self, load_config: dict) -> pd.DataFrame:
"""
When the client is established, we can load data
"""
validate_dict_keys(keys_1=list(load_config.keys()), keys_2=self.ACCEPTED_LOAD_CONFIG_KEYS, config_type="load_config")
return pd.DataFrame()
def save_data(self, obj: object, save_config: dict) -> None:
"""
When the client is established, we can save out objects
"""
validate_dict_keys(keys_1=list(save_config.keys()), keys_2=self.ACCEPTED_SAVE_CONFIG_KEYS, config_type="save_config")