diff --git a/etl/README.md b/etl/README.md index d8e0725..666519c 100644 --- a/etl/README.md +++ b/etl/README.md @@ -16,7 +16,7 @@ Definition of one place: Add a local file: - [x] mount a local folder directory wiht what Khalim sharepoint he has shared -- [x] REad files and do something with it +- [x] REad files and file path Once I have sharepoint api working: @@ -24,9 +24,26 @@ Once I have sharepoint api working: - [] once validated, produce a csv file - [] show some cool productivity metric - +Currently working on: +- [] Validator + - [x] check names + - [in progress, blocked unitl sharepoint. Easy to add] check it has dates +- [] Useful file reader: + - [] Khalim showed me a useful pdf, that I should try to extract and get some information - With Khalim: - [] Check if I have access to sharepoint - [] Try and get his client API working and see if I can read files + +MVP: + Script we can run that will + Go to share point fetch all the data + provide some form of output + that shows the number of surverys done + +Flat table +
+ +Billing: +Billing table, left join \ No newline at end of file diff --git a/etl/src/etl/main.py b/etl/src/etl/main.py index 282f405..588f4ef 100644 --- a/etl/src/etl/main.py +++ b/etl/src/etl/main.py @@ -2,17 +2,18 @@ import os from validator.retrohome import RetroHomeFileStructureValidator DATA_LOC = "/workspaces/survey-extraction/data/" +INTERESTING_FILE_LOC = "/workspaces/survey-extraction/data/first last/Submission 03.03.25/customer/10 Sandbeck Lane DN21 3LZ/PRE SITE NOTES.pdf" def main(): RetroHomeFileStructureValidator(DATA_LOC) + if __name__ == "__main__": main() -# Make a file checker to see if retrohomes as sumbitted the correct structure # Read file from local file path directory # proof of concept of some validator # proof of concept of something i do with a particular flie # the important file at the moment is "Pre site notes" -# ask khalim how sharepoint is going \ No newline at end of file +# Ask khalim how sharepoint is going \ No newline at end of file diff --git a/etl/src/etl/validator/retrohome.py b/etl/src/etl/validator/retrohome.py index b6b8eac..6aa0b7a 100644 --- a/etl/src/etl/validator/retrohome.py +++ b/etl/src/etl/validator/retrohome.py @@ -19,9 +19,7 @@ class RetroHomeFileStructureValidator(): self.logger.warning(f"Found a file when expecting directory. Ignoring file {filepath}") self.logger.info(self.innocent) - self.valid_name() - self.valid_file_structure() def valid_name(self):