mirror of
https://github.com/Hestia-Homes/survey-extraction.git
synced 2026-06-08 11:17:29 +00:00
make logging less verbose
This commit is contained in:
parent
8d87acbcfa
commit
bb53d35fab
3 changed files with 27 additions and 28 deletions
|
|
@ -7,37 +7,34 @@ from etl.utils.logger import Logger
|
|||
from etl.validator.validator import DomnaSharePointValidator
|
||||
|
||||
logger = Logger(name="main.py", level=logging.DEBUG).get_logger()
|
||||
DATA_LOC_1 = "/tmp/sharepoint/Abdul Koddus/W.C. 03.03.2025/Southern Housing/10 Turnberry Close TN38 0WL/PRE SITE NOTES.pdf"
|
||||
DATA_LOC_2 = "/tmp/sharepoint/Abdul Koddus/W.C. 03.03.2025/Southern Housing/16 Sunningdale Drive TN38 0WB/PRE SITE NOTES.pdf"
|
||||
|
||||
pdfReader = pdfReaderToText(DATA_LOC_1)
|
||||
doc2 = pdfReader.get_reader()
|
||||
pdfReader2 = pdfReaderToText(DATA_LOC_2)
|
||||
doc1 = pdfReader2.get_reader()
|
||||
vars(doc1)
|
||||
|
||||
def main():
|
||||
# POC PDF Reader
|
||||
# list_ = pdfReaderToText(INTERESTING_FILE_LOC).get_list_of_test()
|
||||
# pprint(list_)
|
||||
"""
|
||||
This script returns a list of names that is misformatted in the sharepoint
|
||||
"""
|
||||
|
||||
POC Scraper -> This part of the code get ths names of wrong format
|
||||
# Correct dates format checker
|
||||
south_coast_scraper = SharePointScraper(SharePointInstaller.SOUTH_COAST_INSULATION_SERVICE)
|
||||
south_coast_scraper = SharePointScraper(SharePointInstaller.JJC_SERVICE)
|
||||
list_of_names = south_coast_scraper.list_of_names_that_has_the_wrong_date_format()
|
||||
logger.info(pformat(list_of_names))
|
||||
south_coast_names = south_coast_scraper.list_of_names_that_has_the_wrong_date_format()
|
||||
|
||||
# # POC Scraper -> This part of the code gets every variation of housing_assocation names
|
||||
# south_coast_scraper = SharePointScraper(SharePointInstaller.SOUTH_COAST_INSULATION_SERVICE)
|
||||
# list_of_house_ass_names = south_coast_scraper.get_housing_association_names()
|
||||
# logger.info(pformat(list_of_house_ass_names))
|
||||
jjc_scraper = SharePointScraper(SharePointInstaller.JJC_SERVICE)
|
||||
jjc_names = jjc_scraper.list_of_names_that_has_the_wrong_date_format()
|
||||
|
||||
# POC of downloading each file
|
||||
south_coast_scraper = SharePointScraper(SharePointInstaller.SOUTH_COAST_INSULATION_SERVICE)
|
||||
south_coast_scraper.download_file_for_each_address()
|
||||
logger.info("Good morning Cyrus")
|
||||
if south_coast_names:
|
||||
logger.info("South Coast with wrong date format:")
|
||||
logger.info(pformat(south_coast_names))
|
||||
|
||||
if jjc_names:
|
||||
logger.info("JJC with wrong date format")
|
||||
logger.info(pformat(jjc_names))
|
||||
|
||||
# POC of pdf reader
|
||||
# Make a quick script that checks if the Pictures folder exists in a certain fail directory
|
||||
|
||||
|
||||
# Make a cron job in github runner for Cyrus for this
|
||||
|
||||
logger.info("Hope this helps! <3")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
|
|
@ -74,7 +74,7 @@ def api_call_decorator(func):
|
|||
# Check and refresh the access token if needed
|
||||
if self.is_access_token_expired():
|
||||
self.retrieve_access_token()
|
||||
logger.info("Access token refreshed.")
|
||||
logger.debug("Access token refreshed.")
|
||||
|
||||
# Get the HTTP method, URL, and optionally data from the function
|
||||
http_method, url, data = func(self, *args, **kwargs)
|
||||
|
|
@ -201,7 +201,7 @@ class SharePointClient:
|
|||
access_token_request_timestamp = datetime.now()
|
||||
|
||||
if refresh:
|
||||
logger.info("Forcing refresh of access token.")
|
||||
logger.debug("Forcing refresh of access token.")
|
||||
token = app.acquire_token_for_client(scopes=scope)
|
||||
else:
|
||||
# Check if a token is already cached
|
||||
|
|
@ -225,7 +225,7 @@ class SharePointClient:
|
|||
'Authorization': f"Bearer {self.access_token['access_token']}"
|
||||
}
|
||||
|
||||
logger.info("Access token retrieved successfully.")
|
||||
logger.debug("Access token retrieved successfully.")
|
||||
|
||||
@api_call_decorator
|
||||
def get_documents_drive(self):
|
||||
|
|
@ -234,7 +234,7 @@ class SharePointClient:
|
|||
:return: Tuple containing HTTP method, URL, and None for data.
|
||||
"""
|
||||
url = f"https://graph.microsoft.com/v1.0/sites/{self.site_id}/drive"
|
||||
logger.info(f"Getting document drive from URL: {url}")
|
||||
logger.debug(f"Getting document drive from URL: {url}")
|
||||
return 'GET', url, None
|
||||
|
||||
@api_call_decorator
|
||||
|
|
@ -247,7 +247,7 @@ class SharePointClient:
|
|||
:return: Tuple containing HTTP method, URL, and None for data.
|
||||
"""
|
||||
url = f"https://graph.microsoft.com/v1.0/drives/{self.document_drive_id}/root:/{folder_path}:/children?$top={page_size}"
|
||||
logger.info(f"Listing folder contents from URL: {url}")
|
||||
logger.debug(f"Listing folder contents from URL: {url}")
|
||||
return 'GET', url, None
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
2
run_daily_script.sh
Normal file
2
run_daily_script.sh
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
# Example of how to run python code in this environment
|
||||
poetry run python etl/daily_script.py --debug
|
||||
Loading…
Add table
Reference in a new issue