run it live and see what happens

This commit is contained in:
Jun-te Kim 2025-03-17 21:09:29 +00:00
parent 5e4790e723
commit 7c887d8531
5 changed files with 59 additions and 15 deletions

View file

@ -34,8 +34,10 @@ def main():
total_dict = dict(Counter(south_coast_submissions) + Counter(jjc_coast_submission) + Counter(sgec_submission) + Counter(BAXTER_KELLY_submissions))
logger.info("-------------------------------------------")
logger.info("Good morning Cyrus")
logger.info("-------------------------------------------")
logger.info("-------------WRONG DATE FORMAT-------------")
logger.info("-------------------------------------------")
if south_coast_names:
logger.info("South Coast with wrong date format:")
logger.info(pformat(south_coast_names))
@ -53,15 +55,21 @@ def main():
logger.info("Baxter Kelly with wrong date format")
logger.info(pformat(b_names))
logger.info("-------------------------------------------")
logger.info(f"For week commencing: {WEEK_COMMENCING}")
logger.info(f"Submissions: {pformat(total_dict)}")
logger.info("------EACH PRE SITE NOTES SUBMISSIONS------")
logger.info("-------------------------------------------")
logger.info(f"For week commencing: {WEEK_COMMENCING}")
logger.info(f"South Coast Submissions: {pformat(south_coast_submissions)}")
logger.info(f"JJC: {pformat(jjc_coast_submission)}")
logger.info(f"SGEC Submissions: {pformat(sgec_submission)}")
logger.info(f"Baxter Kelly: {pformat(BAXTER_KELLY_submissions)}")
logger.info("-------------------------------------------")
logger.info("-----TOTAL PRE SITE NOTES SUBMISSIONS------")
logger.info("-------------------------------------------")
logger.info(f"For week commencing: {WEEK_COMMENCING}")
logger.info(f"Total Submissions: {pformat(total_dict)}")
logger.info("-------------------------------------------")
logger.info("---BROUGHT TO YOU BY THE DOMNA TECH TEAM---")
logger.info("-------------------------------------------")
# Make a quick script that checks if the Pictures folder exists in a certain fail directory
# Make a cron job in github runner for Cyrus for this
logger.info("Hope this helps! <3")
if __name__ == "__main__":

View file

@ -30,10 +30,11 @@ class pdfReaderToText():
def get_file_type(self):
if len(self.text_list) > 1:
if "Quidos Ltd using Argyle software BRE approved calculator".lower() in self.text_list[0].lower():
self.type = ReportType.QUIDOS_SITE_NOTE
self.type = ReportType.QUIDOS_PRESITE_NOTE
return self.type
else:
raise NotImplementedError("New type of file - please contact Jun-te Kim")
return None
# raise NotImplementedError("New type of file - please contact Jun-te Kim")
def get_reader(self):
self.get_file_type()

View file

@ -2,4 +2,4 @@ from enum import Enum
class ReportType(Enum):
QUIDOS_SITE_NOTE = 1
QUIDOS_PRESITE_NOTE = 1

View file

@ -193,12 +193,37 @@ class SharePointScraper():
def get_number_of_surverys_completed(self):
for name in self.surveyor_names:
if name in self.surveyor_to_housing_assosications:
for house_assosication in self.surveyor_to_housing_assosications[name]:
address_folders = self.get_folders_in_path(f"/{name}/{WEEK_COMMENCING}/{house_assosication}")
for house_ass in self.surveyor_to_housing_assosications[name]:
address_folders = self.get_folders_in_path(f"/{name}/{WEEK_COMMENCING}/{house_ass}")
if 'value' not in address_folders:
raise RuntimeError("Failed to get address folders")
else:
self.surveyor_work_completed.update({name: len(address_folders['value'])})
allAddress = []
for address in address_folders['value']:
if 'file' not in address:
allAddress.append(address['name'])
for address in allAddress:
path = f"/{name}/{WEEK_COMMENCING}/{house_ass}/{address}"
files_to_download_sharepoint_info = self.get_folders_in_path(path)
if 'value' not in files_to_download_sharepoint_info:
raise RuntimeError("Failed to get files to download")
else:
file_names_to_download = {}
only_pdf = [".pdf"]
for file in files_to_download_sharepoint_info['value']:
if 'file' in file:
if any(file["name"].endswith(ext) for ext in only_pdf):
file_names_to_download.update({file["name"]: file['@microsoft.graph.downloadUrl']})
for file_name, url in file_names_to_download.items():
content = self.get_file_content(url)
path = self.create_temp_file(content, f"{name}/{WEEK_COMMENCING}/{house_ass}/{address}/{file_name}")
if DomnaSharePointValidator.is_quidos_presite(path):
if name in self.surveyor_work_completed:
self.surveyor_work_completed[name] += 1
else:
self.surveyor_work_completed.update({name: 1})
break
return self.surveyor_work_completed

View file

@ -2,6 +2,9 @@ import os
import logging
from etl.utils.logger import Logger
import re
from etl.pdfReader.pdfReaderToText import pdfReaderToText
from etl.pdfReader.reportType import ReportType
class DomnaSharePointValidator():
"""
@ -21,6 +24,13 @@ class DomnaSharePointValidator():
return False
return True
def is_quidos_presite(file_path):
file = pdfReaderToText(file_path)
type = file.get_file_type()
print(type)
return type == ReportType.QUIDOS_PRESITE_NOTE
def valid_social_housing_associations(list_):
"""
Nic gave me a list of housing association names, will most likely use in future so leaving it here