From bb53d35faba6b8ff2d28fdcba8d4e457530b1501 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 13 Mar 2025 06:57:03 +0000 Subject: [PATCH] make logging less verbose --- etl/daily_script.py | 43 ++++++++++++++---------------- etl/utils/sharepoint/sharepoint.py | 10 +++---- run_daily_script.sh | 2 ++ 3 files changed, 27 insertions(+), 28 deletions(-) create mode 100644 run_daily_script.sh diff --git a/etl/daily_script.py b/etl/daily_script.py index 1e63b5c..57f634f 100644 --- a/etl/daily_script.py +++ b/etl/daily_script.py @@ -7,37 +7,34 @@ from etl.utils.logger import Logger from etl.validator.validator import DomnaSharePointValidator logger = Logger(name="main.py", level=logging.DEBUG).get_logger() -DATA_LOC_1 = "/tmp/sharepoint/Abdul Koddus/W.C. 03.03.2025/Southern Housing/10 Turnberry Close TN38 0WL/PRE SITE NOTES.pdf" -DATA_LOC_2 = "/tmp/sharepoint/Abdul Koddus/W.C. 03.03.2025/Southern Housing/16 Sunningdale Drive TN38 0WB/PRE SITE NOTES.pdf" - -pdfReader = pdfReaderToText(DATA_LOC_1) -doc2 = pdfReader.get_reader() -pdfReader2 = pdfReaderToText(DATA_LOC_2) -doc1 = pdfReader2.get_reader() -vars(doc1) def main(): - # POC PDF Reader - # list_ = pdfReaderToText(INTERESTING_FILE_LOC).get_list_of_test() - # pprint(list_) + """ + This script returns a list of names that is misformatted in the sharepoint + """ - POC Scraper -> This part of the code get ths names of wrong format + # Correct dates format checker south_coast_scraper = SharePointScraper(SharePointInstaller.SOUTH_COAST_INSULATION_SERVICE) - south_coast_scraper = SharePointScraper(SharePointInstaller.JJC_SERVICE) - list_of_names = south_coast_scraper.list_of_names_that_has_the_wrong_date_format() - logger.info(pformat(list_of_names)) + south_coast_names = south_coast_scraper.list_of_names_that_has_the_wrong_date_format() - # # POC Scraper -> This part of the code gets every variation of housing_assocation names - # south_coast_scraper = SharePointScraper(SharePointInstaller.SOUTH_COAST_INSULATION_SERVICE) - # list_of_house_ass_names = south_coast_scraper.get_housing_association_names() - # logger.info(pformat(list_of_house_ass_names)) + jjc_scraper = SharePointScraper(SharePointInstaller.JJC_SERVICE) + jjc_names = jjc_scraper.list_of_names_that_has_the_wrong_date_format() - # POC of downloading each file - south_coast_scraper = SharePointScraper(SharePointInstaller.SOUTH_COAST_INSULATION_SERVICE) - south_coast_scraper.download_file_for_each_address() + logger.info("Good morning Cyrus") + if south_coast_names: + logger.info("South Coast with wrong date format:") + logger.info(pformat(south_coast_names)) + + if jjc_names: + logger.info("JJC with wrong date format") + logger.info(pformat(jjc_names)) - # POC of pdf reader + # Make a quick script that checks if the Pictures folder exists in a certain fail directory + + # Make a cron job in github runner for Cyrus for this + + logger.info("Hope this helps! <3") if __name__ == "__main__": diff --git a/etl/utils/sharepoint/sharepoint.py b/etl/utils/sharepoint/sharepoint.py index b649997..1d1de17 100644 --- a/etl/utils/sharepoint/sharepoint.py +++ b/etl/utils/sharepoint/sharepoint.py @@ -74,7 +74,7 @@ def api_call_decorator(func): # Check and refresh the access token if needed if self.is_access_token_expired(): self.retrieve_access_token() - logger.info("Access token refreshed.") + logger.debug("Access token refreshed.") # Get the HTTP method, URL, and optionally data from the function http_method, url, data = func(self, *args, **kwargs) @@ -201,7 +201,7 @@ class SharePointClient: access_token_request_timestamp = datetime.now() if refresh: - logger.info("Forcing refresh of access token.") + logger.debug("Forcing refresh of access token.") token = app.acquire_token_for_client(scopes=scope) else: # Check if a token is already cached @@ -225,7 +225,7 @@ class SharePointClient: 'Authorization': f"Bearer {self.access_token['access_token']}" } - logger.info("Access token retrieved successfully.") + logger.debug("Access token retrieved successfully.") @api_call_decorator def get_documents_drive(self): @@ -234,7 +234,7 @@ class SharePointClient: :return: Tuple containing HTTP method, URL, and None for data. """ url = f"https://graph.microsoft.com/v1.0/sites/{self.site_id}/drive" - logger.info(f"Getting document drive from URL: {url}") + logger.debug(f"Getting document drive from URL: {url}") return 'GET', url, None @api_call_decorator @@ -247,7 +247,7 @@ class SharePointClient: :return: Tuple containing HTTP method, URL, and None for data. """ url = f"https://graph.microsoft.com/v1.0/drives/{self.document_drive_id}/root:/{folder_path}:/children?$top={page_size}" - logger.info(f"Listing folder contents from URL: {url}") + logger.debug(f"Listing folder contents from URL: {url}") return 'GET', url, None @staticmethod diff --git a/run_daily_script.sh b/run_daily_script.sh new file mode 100644 index 0000000..d33ac3b --- /dev/null +++ b/run_daily_script.sh @@ -0,0 +1,2 @@ +# Example of how to run python code in this environment +poetry run python etl/daily_script.py --debug \ No newline at end of file