mirror of
https://github.com/Hestia-Homes/survey-extraction.git
synced 2026-06-08 11:17:29 +00:00
run it live and see what happens
This commit is contained in:
parent
5e4790e723
commit
7c887d8531
5 changed files with 59 additions and 15 deletions
|
|
@ -34,8 +34,10 @@ def main():
|
|||
|
||||
total_dict = dict(Counter(south_coast_submissions) + Counter(jjc_coast_submission) + Counter(sgec_submission) + Counter(BAXTER_KELLY_submissions))
|
||||
|
||||
logger.info("-------------------------------------------")
|
||||
logger.info("Good morning Cyrus")
|
||||
logger.info("-------------------------------------------")
|
||||
logger.info("-------------WRONG DATE FORMAT-------------")
|
||||
logger.info("-------------------------------------------")
|
||||
if south_coast_names:
|
||||
logger.info("South Coast with wrong date format:")
|
||||
logger.info(pformat(south_coast_names))
|
||||
|
|
@ -53,15 +55,21 @@ def main():
|
|||
logger.info("Baxter Kelly with wrong date format")
|
||||
logger.info(pformat(b_names))
|
||||
logger.info("-------------------------------------------")
|
||||
logger.info(f"For week commencing: {WEEK_COMMENCING}")
|
||||
logger.info(f"Submissions: {pformat(total_dict)}")
|
||||
logger.info("------EACH PRE SITE NOTES SUBMISSIONS------")
|
||||
logger.info("-------------------------------------------")
|
||||
logger.info(f"For week commencing: {WEEK_COMMENCING}")
|
||||
logger.info(f"South Coast Submissions: {pformat(south_coast_submissions)}")
|
||||
logger.info(f"JJC: {pformat(jjc_coast_submission)}")
|
||||
logger.info(f"SGEC Submissions: {pformat(sgec_submission)}")
|
||||
logger.info(f"Baxter Kelly: {pformat(BAXTER_KELLY_submissions)}")
|
||||
logger.info("-------------------------------------------")
|
||||
logger.info("-----TOTAL PRE SITE NOTES SUBMISSIONS------")
|
||||
logger.info("-------------------------------------------")
|
||||
logger.info(f"For week commencing: {WEEK_COMMENCING}")
|
||||
logger.info(f"Total Submissions: {pformat(total_dict)}")
|
||||
logger.info("-------------------------------------------")
|
||||
logger.info("---BROUGHT TO YOU BY THE DOMNA TECH TEAM---")
|
||||
logger.info("-------------------------------------------")
|
||||
# Make a quick script that checks if the Pictures folder exists in a certain fail directory
|
||||
|
||||
|
||||
# Make a cron job in github runner for Cyrus for this
|
||||
|
||||
logger.info("Hope this helps! <3")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
|
|
@ -30,10 +30,11 @@ class pdfReaderToText():
|
|||
def get_file_type(self):
|
||||
if len(self.text_list) > 1:
|
||||
if "Quidos Ltd using Argyle software BRE approved calculator".lower() in self.text_list[0].lower():
|
||||
self.type = ReportType.QUIDOS_SITE_NOTE
|
||||
self.type = ReportType.QUIDOS_PRESITE_NOTE
|
||||
return self.type
|
||||
else:
|
||||
raise NotImplementedError("New type of file - please contact Jun-te Kim")
|
||||
return None
|
||||
# raise NotImplementedError("New type of file - please contact Jun-te Kim")
|
||||
|
||||
def get_reader(self):
|
||||
self.get_file_type()
|
||||
|
|
|
|||
|
|
@ -2,4 +2,4 @@ from enum import Enum
|
|||
|
||||
|
||||
class ReportType(Enum):
|
||||
QUIDOS_SITE_NOTE = 1
|
||||
QUIDOS_PRESITE_NOTE = 1
|
||||
|
|
@ -193,12 +193,37 @@ class SharePointScraper():
|
|||
def get_number_of_surverys_completed(self):
|
||||
for name in self.surveyor_names:
|
||||
if name in self.surveyor_to_housing_assosications:
|
||||
for house_assosication in self.surveyor_to_housing_assosications[name]:
|
||||
address_folders = self.get_folders_in_path(f"/{name}/{WEEK_COMMENCING}/{house_assosication}")
|
||||
for house_ass in self.surveyor_to_housing_assosications[name]:
|
||||
address_folders = self.get_folders_in_path(f"/{name}/{WEEK_COMMENCING}/{house_ass}")
|
||||
if 'value' not in address_folders:
|
||||
raise RuntimeError("Failed to get address folders")
|
||||
else:
|
||||
self.surveyor_work_completed.update({name: len(address_folders['value'])})
|
||||
allAddress = []
|
||||
for address in address_folders['value']:
|
||||
if 'file' not in address:
|
||||
allAddress.append(address['name'])
|
||||
|
||||
for address in allAddress:
|
||||
path = f"/{name}/{WEEK_COMMENCING}/{house_ass}/{address}"
|
||||
files_to_download_sharepoint_info = self.get_folders_in_path(path)
|
||||
if 'value' not in files_to_download_sharepoint_info:
|
||||
raise RuntimeError("Failed to get files to download")
|
||||
else:
|
||||
file_names_to_download = {}
|
||||
only_pdf = [".pdf"]
|
||||
for file in files_to_download_sharepoint_info['value']:
|
||||
if 'file' in file:
|
||||
if any(file["name"].endswith(ext) for ext in only_pdf):
|
||||
file_names_to_download.update({file["name"]: file['@microsoft.graph.downloadUrl']})
|
||||
for file_name, url in file_names_to_download.items():
|
||||
content = self.get_file_content(url)
|
||||
path = self.create_temp_file(content, f"{name}/{WEEK_COMMENCING}/{house_ass}/{address}/{file_name}")
|
||||
if DomnaSharePointValidator.is_quidos_presite(path):
|
||||
if name in self.surveyor_work_completed:
|
||||
self.surveyor_work_completed[name] += 1
|
||||
else:
|
||||
self.surveyor_work_completed.update({name: 1})
|
||||
break
|
||||
|
||||
return self.surveyor_work_completed
|
||||
|
||||
|
|
|
|||
|
|
@ -2,6 +2,9 @@ import os
|
|||
import logging
|
||||
from etl.utils.logger import Logger
|
||||
import re
|
||||
from etl.pdfReader.pdfReaderToText import pdfReaderToText
|
||||
from etl.pdfReader.reportType import ReportType
|
||||
|
||||
|
||||
class DomnaSharePointValidator():
|
||||
"""
|
||||
|
|
@ -21,6 +24,13 @@ class DomnaSharePointValidator():
|
|||
return False
|
||||
return True
|
||||
|
||||
def is_quidos_presite(file_path):
|
||||
file = pdfReaderToText(file_path)
|
||||
type = file.get_file_type()
|
||||
print(type)
|
||||
return type == ReportType.QUIDOS_PRESITE_NOTE
|
||||
|
||||
|
||||
def valid_social_housing_associations(list_):
|
||||
"""
|
||||
Nic gave me a list of housing association names, will most likely use in future so leaving it here
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue