mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
adding file detection for elmhurst project handover
This commit is contained in:
parent
2cfc881044
commit
3cd9670d1a
2 changed files with 49 additions and 1 deletions
|
|
@ -98,7 +98,8 @@ def handler():
|
|||
"osmosis condition report": OsmosisConditionReportParser,
|
||||
"elmhurst evidence report": None,
|
||||
"full sap xml": FullSapParser,
|
||||
"pulse air permeability": file_extraction_tools.PulseAirPermeabilityExtractor
|
||||
"pulse air permeability": file_extraction_tools.PulseAirPermeabilityExtractor,
|
||||
"elmhurst project handover": file_extraction_tools.ElmhurstProjectHandoverExtractor,
|
||||
}
|
||||
|
||||
extracted = []
|
||||
|
|
@ -159,6 +160,32 @@ def handler():
|
|||
|
||||
extracted_contents[report_type] = file_extractor(filepath).extract()
|
||||
|
||||
lodgement_folder = os.path.join(
|
||||
property_folder_path, [f for f in subfolders if "TrustMark Lodgement" in f][0]
|
||||
)
|
||||
# Within the lodgement folder, we want the required documents sub-folder
|
||||
lodgement_subfolders = [
|
||||
file for file in os.listdir(lodgement_folder) if os.path.isdir(os.path.join(lodgement_folder, file))
|
||||
]
|
||||
required_documents_folder = os.path.join(
|
||||
lodgement_folder, [f for f in lodgement_subfolders if "required documents" in f.lower()][0]
|
||||
)
|
||||
# List the contents
|
||||
required_documents_contents = [
|
||||
file for file in os.listdir(required_documents_folder) if
|
||||
os.path.isfile(os.path.join(required_documents_folder, file))
|
||||
]
|
||||
|
||||
# There are only a few file types we actually want to process in here for the moment
|
||||
for filename in required_documents_contents:
|
||||
filepath = os.path.join(required_documents_folder, filename)
|
||||
if file_extraction_tools.is_pdf(filepath):
|
||||
report_type = file_extraction_tools.detect_pdf_report_type(pdf_path=filepath)
|
||||
if report_type != "elmhurst project handover":
|
||||
continue
|
||||
blah
|
||||
file_extractor = extractors[report_type]
|
||||
|
||||
output_row_data = output_template.copy()
|
||||
|
||||
# dict_keys([ 'City/County', 'District/Town',
|
||||
|
|
|
|||
|
|
@ -50,6 +50,13 @@ def is_pulse_air_permeability(text):
|
|||
return text.startswith("Air Permeability Test Report @O PULSE")
|
||||
|
||||
|
||||
def is_elmhurst_project_handover(text):
|
||||
"""
|
||||
Determines if the provided text indicates that the PDF is an Elmhurst Project Handover Report.
|
||||
"""
|
||||
return "Retrofit_Project_Handover" in text or "Retrofit Project Handover" in text
|
||||
|
||||
|
||||
def detect_pdf_report_type(pdf_path):
|
||||
"""
|
||||
Detects the type of report based on content or filename.
|
||||
|
|
@ -78,6 +85,8 @@ def detect_pdf_report_type(pdf_path):
|
|||
return "elmhurst evidence report"
|
||||
elif is_pulse_air_permeability(first_page_text):
|
||||
return "pulse air permeability"
|
||||
elif is_elmhurst_project_handover(first_page_text):
|
||||
return "elmhurst project handover"
|
||||
|
||||
return None
|
||||
|
||||
|
|
@ -1057,3 +1066,15 @@ class PulseAirPermeabilityExtractor:
|
|||
}
|
||||
|
||||
return data
|
||||
|
||||
|
||||
class ElmhurstProjectHandoverExtractor:
|
||||
"""
|
||||
A utility class for extracting specific data from The Elmhurst Project Handover document
|
||||
"""
|
||||
|
||||
def __init__(self, file_path):
|
||||
self.file_path = file_path
|
||||
|
||||
def extract(self):
|
||||
pass
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue