diff --git a/modules/ml-pipeline/.gitignore b/modules/ml-pipeline/.gitignore
index e4d8729..2a3b661 100644
--- a/modules/ml-pipeline/.gitignore
+++ b/modules/ml-pipeline/.gitignore
@@ -1 +1,3 @@
 .dev_env/
+data/
+__pycache__/
diff --git a/modules/ml-pipeline/README.MD b/modules/ml-pipeline/README.MD
index 080838e..cf9316a 100644
--- a/modules/ml-pipeline/README.MD
+++ b/modules/ml-pipeline/README.MD
@@ -11,3 +11,11 @@ Within `src` folder, the structure is as follows:
     - i.e. for a product, we might require multuple pipelines do deliver a result
     - i.e. multiple models
 - these models can be all tracked within the same gto model registry
+
+To enable the virtual envrionemnt created in vscode:
+- Open settings
+- Search 'env'
+- Under the extensions tab, there will be **Venv path**
+- Copy the path of the '.dev_env' folder into there.
+- When you select a kernel, clcik through create environment and refresh
+- The virutal environment should be there
diff --git a/modules/ml-pipeline/src/pipeline/training/configs/prepare_data.yaml b/modules/ml-pipeline/src/pipeline/training/configs/prepare_data.yaml
index 1fd65b3..17b36ce 100644
--- a/modules/ml-pipeline/src/pipeline/training/configs/prepare_data.yaml
+++ b/modules/ml-pipeline/src/pipeline/training/configs/prepare_data.yaml
@@ -1,4 +1,4 @@
-dataclient: minio
+dataclient_type: minio
 data_location: s3://dev_bucket
 train_proportion: 0.8
 output_location: ./data/prepared_data/
diff --git a/modules/ml-pipeline/src/pipeline/training/prepare_data.py b/modules/ml-pipeline/src/pipeline/training/prepare_data.py
index 08e84d6..d25efd9 100644
--- a/modules/ml-pipeline/src/pipeline/training/prepare_data.py
+++ b/modules/ml-pipeline/src/pipeline/training/prepare_data.py
@@ -69,7 +69,7 @@ def prepare_data(
     # TODO: REPLACE WITH CLIENT
     output_path = Path(output_location)
     if not output_path.exists():
-        os.mkdir(output_path)
+        os.makedirs(output_path)
 
     logger.info("--- Outputting train and test data ---")
     train.to_csv(output_path / output_train_filename, index=False)