use dvc to version data and store remotely

This commit is contained in:
Michael Duong 2023-09-04 16:26:21 +01:00
parent 713a29cfc6
commit 8b37b906a5
9 changed files with 12 additions and 1 deletions

View file

@ -0,0 +1,4 @@
['remote "build-data-remote"']
url = s3://retrofit-data-dev/dvc/etl_data/change_data/
['remote "etl-data-remote"']
url = s3://retrofit-data-dev/dvc/model_build_data/change_data/rdsap/

1
.gitignore vendored
View file

@ -258,3 +258,4 @@ model_data/simulation_system/.idea/
model_data/simulation_system/data*
model_data/simulation_system/model_directory/
model_data/simulation_system/predictions/
model_data/simulation_system/s3-mock/

View file

@ -7,7 +7,7 @@ services:
- "9000:9000"
- "9001:9001"
volumes:
- ./data:/data
- ./s3-mock:/data
environment:
MINIO_ROOT_USER: &MINIO_USER admin
MINIO_ROOT_PASSWORD: &MINIO_PASS password

View file

@ -0,0 +1 @@
/dataset.parquet

View file

@ -0,0 +1,5 @@
outs:
- md5: 17f01a663bbdfb3a6bca734f347431c0
size: 35369443
hash: md5
path: dataset.parquet