make logging less verbose

This commit is contained in:
Jun-te Kim 2025-03-13 06:57:03 +00:00
parent 8d87acbcfa
commit bb53d35fab
3 changed files with 27 additions and 28 deletions

View file

@ -7,37 +7,34 @@ from etl.utils.logger import Logger
from etl.validator.validator import DomnaSharePointValidator
logger = Logger(name="main.py", level=logging.DEBUG).get_logger()
DATA_LOC_1 = "/tmp/sharepoint/Abdul Koddus/W.C. 03.03.2025/Southern Housing/10 Turnberry Close TN38 0WL/PRE SITE NOTES.pdf"
DATA_LOC_2 = "/tmp/sharepoint/Abdul Koddus/W.C. 03.03.2025/Southern Housing/16 Sunningdale Drive TN38 0WB/PRE SITE NOTES.pdf"
pdfReader = pdfReaderToText(DATA_LOC_1)
doc2 = pdfReader.get_reader()
pdfReader2 = pdfReaderToText(DATA_LOC_2)
doc1 = pdfReader2.get_reader()
vars(doc1)
def main():
# POC PDF Reader
# list_ = pdfReaderToText(INTERESTING_FILE_LOC).get_list_of_test()
# pprint(list_)
"""
This script returns a list of names that is misformatted in the sharepoint
"""
POC Scraper -> This part of the code get ths names of wrong format
# Correct dates format checker
south_coast_scraper = SharePointScraper(SharePointInstaller.SOUTH_COAST_INSULATION_SERVICE)
south_coast_scraper = SharePointScraper(SharePointInstaller.JJC_SERVICE)
list_of_names = south_coast_scraper.list_of_names_that_has_the_wrong_date_format()
logger.info(pformat(list_of_names))
south_coast_names = south_coast_scraper.list_of_names_that_has_the_wrong_date_format()
# # POC Scraper -> This part of the code gets every variation of housing_assocation names
# south_coast_scraper = SharePointScraper(SharePointInstaller.SOUTH_COAST_INSULATION_SERVICE)
# list_of_house_ass_names = south_coast_scraper.get_housing_association_names()
# logger.info(pformat(list_of_house_ass_names))
jjc_scraper = SharePointScraper(SharePointInstaller.JJC_SERVICE)
jjc_names = jjc_scraper.list_of_names_that_has_the_wrong_date_format()
# POC of downloading each file
south_coast_scraper = SharePointScraper(SharePointInstaller.SOUTH_COAST_INSULATION_SERVICE)
south_coast_scraper.download_file_for_each_address()
logger.info("Good morning Cyrus")
if south_coast_names:
logger.info("South Coast with wrong date format:")
logger.info(pformat(south_coast_names))
if jjc_names:
logger.info("JJC with wrong date format")
logger.info(pformat(jjc_names))
# POC of pdf reader
# Make a quick script that checks if the Pictures folder exists in a certain fail directory
# Make a cron job in github runner for Cyrus for this
logger.info("Hope this helps! <3")
if __name__ == "__main__":

View file

@ -74,7 +74,7 @@ def api_call_decorator(func):
# Check and refresh the access token if needed
if self.is_access_token_expired():
self.retrieve_access_token()
logger.info("Access token refreshed.")
logger.debug("Access token refreshed.")
# Get the HTTP method, URL, and optionally data from the function
http_method, url, data = func(self, *args, **kwargs)
@ -201,7 +201,7 @@ class SharePointClient:
access_token_request_timestamp = datetime.now()
if refresh:
logger.info("Forcing refresh of access token.")
logger.debug("Forcing refresh of access token.")
token = app.acquire_token_for_client(scopes=scope)
else:
# Check if a token is already cached
@ -225,7 +225,7 @@ class SharePointClient:
'Authorization': f"Bearer {self.access_token['access_token']}"
}
logger.info("Access token retrieved successfully.")
logger.debug("Access token retrieved successfully.")
@api_call_decorator
def get_documents_drive(self):
@ -234,7 +234,7 @@ class SharePointClient:
:return: Tuple containing HTTP method, URL, and None for data.
"""
url = f"https://graph.microsoft.com/v1.0/sites/{self.site_id}/drive"
logger.info(f"Getting document drive from URL: {url}")
logger.debug(f"Getting document drive from URL: {url}")
return 'GET', url, None
@api_call_decorator
@ -247,7 +247,7 @@ class SharePointClient:
:return: Tuple containing HTTP method, URL, and None for data.
"""
url = f"https://graph.microsoft.com/v1.0/drives/{self.document_drive_id}/root:/{folder_path}:/children?$top={page_size}"
logger.info(f"Listing folder contents from URL: {url}")
logger.debug(f"Listing folder contents from URL: {url}")
return 'GET', url, None
@staticmethod

2
run_daily_script.sh Normal file
View file

@ -0,0 +1,2 @@
# Example of how to run python code in this environment
poetry run python etl/daily_script.py --debug