From 371d6cd3ed1b65b9e69aed4a9ba6c1d93320a2fe Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 17 Mar 2025 21:25:17 +0000 Subject: [PATCH] make output nicer --- etl/pdfReader/sitenotes.py | 1 - etl/scraper/scraper.py | 2 ++ etl/validator/validator.py | 7 +++++-- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/etl/pdfReader/sitenotes.py b/etl/pdfReader/sitenotes.py index d53764f..4161395 100644 --- a/etl/pdfReader/sitenotes.py +++ b/etl/pdfReader/sitenotes.py @@ -201,7 +201,6 @@ class QuidosSiteNotesExtractor(SiteNotesExtractor): door = self.get_door() windows = self.get_windows() - print(windows["main_property"]) # Section 12 ventilationAndCooling = self.get_ventilation_and_cooling() diff --git a/etl/scraper/scraper.py b/etl/scraper/scraper.py index 1f4334a..cfa5481 100644 --- a/etl/scraper/scraper.py +++ b/etl/scraper/scraper.py @@ -216,6 +216,7 @@ class SharePointScraper(): if any(file["name"].endswith(ext) for ext in only_pdf): file_names_to_download.update({file["name"]: file['@microsoft.graph.downloadUrl']}) for file_name, url in file_names_to_download.items(): + print(pformat(file_names_to_download)) content = self.get_file_content(url) path = self.create_temp_file(content, f"{name}/{WEEK_COMMENCING}/{house_ass}/{address}/{file_name}") if DomnaSharePointValidator.is_quidos_presite(path): @@ -224,6 +225,7 @@ class SharePointScraper(): else: self.surveyor_work_completed.update({name: 1}) break + print("trololol") return self.surveyor_work_completed diff --git a/etl/validator/validator.py b/etl/validator/validator.py index 760a663..4a5ce3a 100644 --- a/etl/validator/validator.py +++ b/etl/validator/validator.py @@ -13,6 +13,8 @@ class DomnaSharePointValidator(): def __init__(self): self.logger = Logger(name='DomnaSharePointValidator', level=logging.DEBUG).get_logger() + + @staticmethod def valid_dates(list_of_dates_to_check): # Patten Nic wants: W.C. DD.MM.YYYY # TODO: Ideally split the date and W.C. and use a datatime library so its more standardised @@ -23,14 +25,15 @@ class DomnaSharePointValidator(): if not re.match(pattern, date): return False return True - + + @staticmethod def is_quidos_presite(file_path): file = pdfReaderToText(file_path) type = file.get_file_type() - print(type) return type == ReportType.QUIDOS_PRESITE_NOTE + @staticmethod def valid_social_housing_associations(list_): """ Nic gave me a list of housing association names, will most likely use in future so leaving it here