import os import logging from etl.utils.logger import Logger import re from etl.pdfReader.pdfReaderToText import pdfReaderToText from etl.pdfReader.reportType import ReportType class DomnaSharePointValidator(): """ A simple class to check certain things are in certain format in Domna sharepoint with surveyors """ def __init__(self): self.logger = Logger(name='DomnaSharePointValidator', level=logging.DEBUG).get_logger() @staticmethod def valid_dates(list_of_dates_to_check): # Patten Nic wants: W.C. DD.MM.YYYY # TODO: Ideally split the date and W.C. and use a datatime library so its more standardised pattern = r"^W\.C\. (0[1-9]|[12][0-9]|3[01])\.(0[1-9]|1[0-2])\.(\d{4})$" for date in list_of_dates_to_check: if not re.match(pattern, date): return False return True @staticmethod def is_quidos_presite(file_path): file = pdfReaderToText(file_path) type = file.get_file_type() return type == ReportType.QUIDOS_PRESITE_NOTE @staticmethod def valid_social_housing_associations(list_): """ Nic gave me a list of housing association names, will most likely use in future so leaving it here """ housing_organisations = [ "SOUTHERN HOUSING", "UNITAS", "SETTLE", "PLUS DANE", "MIDLANDS HEART", "EASTLIGHT", "ROOFTOP", "RMG GROUP", "BROMFORD", "PLACES FOR PEOPLE", "SOUTHEND-ON-SEA COMMUNITY HOUSING", "THRIVE HOUSING", "ANCHOR GROUP", "LAMBETH COUNCIL", "ACIS GROUP", "WATFORD HOUSING", "ASPIRE", "BROADLANDS", "LIVE WEST", "GUINNESS", "SOVEREIGN", "WHITE HORSE HOUSING", "PRIVATE HOUSING", "MUIR", "TOWER HAMLETS", "FOR HOUSING", "CAMBRIDGE", "PAPWORTH TRUST" ] for name in list_: if name.upper() not in housing_organisations: return False return True