mirror of
https://github.com/Hestia-Homes/ML.git
synced 2026-06-08 11:17:25 +00:00
add gitignroe
This commit is contained in:
parent
8d2e96bdba
commit
14887054be
4 changed files with 12 additions and 2 deletions
2
modules/ml-pipeline/.gitignore
vendored
2
modules/ml-pipeline/.gitignore
vendored
|
|
@ -1 +1,3 @@
|
|||
.dev_env/
|
||||
data/
|
||||
__pycache__/
|
||||
|
|
|
|||
|
|
@ -11,3 +11,11 @@ Within `src` folder, the structure is as follows:
|
|||
- i.e. for a product, we might require multuple pipelines do deliver a result
|
||||
- i.e. multiple models
|
||||
- these models can be all tracked within the same gto model registry
|
||||
|
||||
To enable the virtual envrionemnt created in vscode:
|
||||
- Open settings
|
||||
- Search 'env'
|
||||
- Under the extensions tab, there will be **Venv path**
|
||||
- Copy the path of the '.dev_env' folder into there.
|
||||
- When you select a kernel, clcik through create environment and refresh
|
||||
- The virutal environment should be there
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
dataclient: minio
|
||||
dataclient_type: minio
|
||||
data_location: s3://dev_bucket
|
||||
train_proportion: 0.8
|
||||
output_location: ./data/prepared_data/
|
||||
|
|
|
|||
|
|
@ -69,7 +69,7 @@ def prepare_data(
|
|||
# TODO: REPLACE WITH CLIENT
|
||||
output_path = Path(output_location)
|
||||
if not output_path.exists():
|
||||
os.mkdir(output_path)
|
||||
os.makedirs(output_path)
|
||||
|
||||
logger.info("--- Outputting train and test data ---")
|
||||
train.to_csv(output_path / output_train_filename, index=False)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue