diff --git a/.dvc/config b/.dvc/config index e69de29b..684207dc 100644 --- a/.dvc/config +++ b/.dvc/config @@ -0,0 +1,4 @@ +['remote "build-data-remote"'] + url = s3://retrofit-data-dev/dvc/etl_data/change_data/ +['remote "etl-data-remote"'] + url = s3://retrofit-data-dev/dvc/model_build_data/change_data/rdsap/ diff --git a/.gitignore b/.gitignore index 4e43467e..4163716e 100644 --- a/.gitignore +++ b/.gitignore @@ -258,3 +258,4 @@ model_data/simulation_system/.idea/ model_data/simulation_system/data* model_data/simulation_system/model_directory/ model_data/simulation_system/predictions/ +model_data/simulation_system/s3-mock/ diff --git a/model_data/simulation_system/docker-compose.yml b/model_data/simulation_system/docker-compose.yml index 058dc062..b14efeed 100644 --- a/model_data/simulation_system/docker-compose.yml +++ b/model_data/simulation_system/docker-compose.yml @@ -7,7 +7,7 @@ services: - "9000:9000" - "9001:9001" volumes: - - ./data:/data + - ./s3-mock:/data environment: MINIO_ROOT_USER: &MINIO_USER admin MINIO_ROOT_PASSWORD: &MINIO_PASS password diff --git a/model_data/simulation_system/preprocessed_data/dataset.parquet b/model_data/simulation_system/preprocessed_data/dataset.parquet deleted file mode 100644 index 4b6247d6..00000000 Binary files a/model_data/simulation_system/preprocessed_data/dataset.parquet and /dev/null differ diff --git a/model_data/simulation_system/versioned_data/etl_data/change_data/.gitignore b/model_data/simulation_system/versioned_data/etl_data/change_data/.gitignore new file mode 100644 index 00000000..9c97d002 --- /dev/null +++ b/model_data/simulation_system/versioned_data/etl_data/change_data/.gitignore @@ -0,0 +1 @@ +/dataset.parquet diff --git a/model_data/simulation_system/versioned_data/etl_data/change_data/dataset.parquet.dvc b/model_data/simulation_system/versioned_data/etl_data/change_data/dataset.parquet.dvc new file mode 100644 index 00000000..dd27aa0d --- /dev/null +++ b/model_data/simulation_system/versioned_data/etl_data/change_data/dataset.parquet.dvc @@ -0,0 +1,5 @@ +outs: +- md5: 17f01a663bbdfb3a6bca734f347431c0 + size: 35369443 + hash: md5 + path: dataset.parquet diff --git a/model_data/simulation_system/model_build_data/change_data/rdsap_full/.gitignore b/model_data/simulation_system/versioned_data/model_build_data/change_data/.gitignore similarity index 100% rename from model_data/simulation_system/model_build_data/change_data/rdsap_full/.gitignore rename to model_data/simulation_system/versioned_data/model_build_data/change_data/.gitignore diff --git a/model_data/simulation_system/model_build_data/change_data/rdsap_full/test_data.parquet.dvc b/model_data/simulation_system/versioned_data/model_build_data/change_data/test_data.parquet.dvc similarity index 100% rename from model_data/simulation_system/model_build_data/change_data/rdsap_full/test_data.parquet.dvc rename to model_data/simulation_system/versioned_data/model_build_data/change_data/test_data.parquet.dvc diff --git a/model_data/simulation_system/model_build_data/change_data/rdsap_full/train_validation_data.parquet.dvc b/model_data/simulation_system/versioned_data/model_build_data/change_data/train_validation_data.parquet.dvc similarity index 100% rename from model_data/simulation_system/model_build_data/change_data/rdsap_full/train_validation_data.parquet.dvc rename to model_data/simulation_system/versioned_data/model_build_data/change_data/train_validation_data.parquet.dvc