mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
[Crefactoring structure of extraction code
This commit is contained in:
parent
7513e475d3
commit
0332c77098
1 changed files with 57 additions and 23 deletions
|
|
@ -43,6 +43,42 @@ def extract_summary_report(pdf_path):
|
|||
return data
|
||||
|
||||
|
||||
def extract_retrofit_assessment_folder(retrofit_folder_path):
|
||||
"""
|
||||
Handles extraction from a retrofit assessment folder if it exists and has content.
|
||||
"""
|
||||
retrofit_files = os.listdir(retrofit_folder_path)
|
||||
|
||||
# Find the summary report in the retrofit folder
|
||||
summary_report = next(
|
||||
(name for name in retrofit_files if "summary" in name.lower() and name.endswith(".pdf")), None
|
||||
)
|
||||
|
||||
if summary_report:
|
||||
pdf_path = os.path.join(retrofit_folder_path, summary_report)
|
||||
return extract_summary_report(pdf_path)
|
||||
|
||||
return None # If no relevant PDF is found
|
||||
|
||||
|
||||
def extract_from_survey_folder_files(survey_folder_path):
|
||||
"""
|
||||
Handles extraction directly from files in the survey folder when no 'retrofit assessment' folder exists.
|
||||
"""
|
||||
survey_files = os.listdir(survey_folder_path)
|
||||
|
||||
# Look for a summary report directly in the survey folder
|
||||
summary_report = next(
|
||||
(name for name in survey_files if "summary" in name.lower() and name.endswith(".pdf")), None
|
||||
)
|
||||
|
||||
if summary_report:
|
||||
pdf_path = os.path.join(survey_folder_path, summary_report)
|
||||
return extract_summary_report(pdf_path)
|
||||
|
||||
return None # If no relevant PDF is found
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
This code prepares the data for the Warm Homes: Social Housing Fund Wave 3, for Stonewater.
|
||||
|
|
@ -52,40 +88,38 @@ def main():
|
|||
|
||||
extracted_data = []
|
||||
for survey_folder in survey_folders:
|
||||
survey_folder_path = os.path.join(FILE_PATH, survey_folder)
|
||||
|
||||
# List the folders inside of the survey folder
|
||||
survey_subfolders = [name for name in os.listdir(os.path.join(FILE_PATH, survey_folder))
|
||||
if os.path.isdir(os.path.join(FILE_PATH, survey_folder, name))]
|
||||
survey_subfolders = [name for name in os.listdir(survey_folder_path)
|
||||
if os.path.isdir(os.path.join(survey_folder_path, name))]
|
||||
|
||||
if not survey_subfolders:
|
||||
continue
|
||||
|
||||
# Check for a folder inside of the survey_subfolders containing the phrase "retrofit assessment:
|
||||
# If it exists, we will use the data from that folder
|
||||
# Check if there's a "retrofit assessment" folder
|
||||
retrofit_folder = next((name for name in survey_subfolders if "retrofit assessment" in name.lower()), None)
|
||||
|
||||
# List contents of the retrofit folder
|
||||
retrofit_files = os.listdir(os.path.join(FILE_PATH, survey_folder, retrofit_folder))
|
||||
# If retrofit assessment folder exists, check if it has content
|
||||
if retrofit_folder:
|
||||
retrofit_folder_path = os.path.join(survey_folder_path, retrofit_folder)
|
||||
if os.listdir(retrofit_folder_path): # If not empty
|
||||
summary_data = extract_retrofit_assessment_folder(retrofit_folder_path)
|
||||
if summary_data:
|
||||
summary_data = {
|
||||
"survey_folder": survey_folder,
|
||||
**summary_data
|
||||
}
|
||||
extracted_data.append(summary_data)
|
||||
continue
|
||||
|
||||
if not retrofit_files:
|
||||
continue
|
||||
|
||||
# We now look for specific files:
|
||||
# 1) Check the summary report.- the title will contain the word "summary" (lowercase) and the file extension is
|
||||
# .pdf
|
||||
summary_report = next(
|
||||
(name for name in retrofit_files if "summary" in name.lower() and name.endswith(".pdf")), None
|
||||
)
|
||||
if summary_report is not None:
|
||||
pdf_path = os.path.join(FILE_PATH, survey_folder, retrofit_folder, summary_report)
|
||||
summary_data = extract_summary_report(pdf_path)
|
||||
# If no retrofit folder or it was empty, check files in survey_folder
|
||||
summary_data = extract_from_survey_folder_files(survey_folder_path)
|
||||
if summary_data:
|
||||
summary_data = {
|
||||
"survey_folder": survey_folder,
|
||||
**summary_data
|
||||
}
|
||||
extracted_data.append(summary_data)
|
||||
continue
|
||||
|
||||
raise NotImplementedError("IMPLEMENT ME!")
|
||||
print("Extracted Data:", extracted_data)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue