diff --git a/etl/lodgement/app.py b/etl/lodgement/app.py index b8b7e393..4ff8bdf1 100644 --- a/etl/lodgement/app.py +++ b/etl/lodgement/app.py @@ -126,7 +126,7 @@ def handler(): file_extractor = extractors.get(report_type) if file_extractor is None: continue - + extracted_contents[report_type] = file_extractor(filepath).extract() if file_extraction_tools.is_xml(filepath): @@ -136,6 +136,7 @@ def handler(): file_extractor = extractors.get(xml_type) if file_extractor is None: continue + extracted_contents[xml_type] = file_extractor(filepath).extract() output_row_data = output_template.copy() @@ -144,10 +145,12 @@ def handler(): # 'Local Authority', # 'Trustmark Lodgement ID', # 'Certificate Number', 'EWI UMR', 'Loft UMR', 'Windows UMR', - # 'Doors UMR', 'Measure Lodgement Date', 'Full Lodgement Date', 'Name', 'Phone', 'Email', 'Secondary Contact + # 'Doors UMR', 'Measure Lodgement Date', 'Full Lodgement Date', + # 'Name', 'Phone', 'Email', (owner) + # 'Secondary Contact # Name', 'Secondary Contact Phone', 'Trustmark Licence Number', 'Retrofit Assessment Date', 'Company Name', # 'Retrofit Designer Name', , 'No. of Bedrooms', - # , 'Pre Heat Transfer', 'Pre Total Floor Area', 'Pre Heat Demand', + # , # 'Pre Air Tightness', 'SAP Rating Post (from EPC)', 'Post Heat Transfer', 'Post Total Floor Area', # 'Post Heat Demand', 'Post Air Tightness', 'Number of Eligible Measures Installed', 'Total Cost of Works', # 'Annual Fuel Saving (MTP)', 'Work Type ID', 'Measure Category', 'Installer', 'Operative Name', 'Operative @@ -159,7 +162,12 @@ def handler(): total_floor_area = sum( [x["Floor Area (m2)"] for x in extracted_contents["elmhurst epr"]["Building Parts"]] + # Get the conservatory floor area - extracted_contents["elmhurst epr"]["Conservatory"]["Floor Area (m2)"] + [extracted_contents["elmhurst epr"]["Conservatory"]["Conservatory Floor Area"]] + ) + + pre_heat_transfer = extracted_contents["elmhurst epr"]["Primary Energy Use Intensity (kWh/m2/yr)"] + pre_heat_demand = ( + extracted_contents["elmhurst epr"]["Primary Energy Use Intensity (kWh/m2/yr)"] * total_floor_area ) to_insert = { @@ -172,8 +180,9 @@ def handler(): "Local Authority": None, 'Property Age': extracted_contents["elmhurst epr"]["Property Age"], 'SAP Rating Pre (from IMA)': extracted_contents["elmhurst epr"]["Current SAP Rating"], - 'Pre Heat Transfer': extracted_contents["elmhurst epr"][ - "Primary Energy Use Intensity (kWh/m2/yr)"] * total_floor_area, + 'Pre Heat Transfer': pre_heat_transfer, + 'Pre Total Floor Area': total_floor_area, + 'Pre Heat Demand': pre_heat_demand, } output_row_data["Property Address"] = property_folder.split(")")[1].strip() diff --git a/utils/file_data_extraction.py b/utils/file_data_extraction.py index f0d341c6..ae75735b 100644 --- a/utils/file_data_extraction.py +++ b/utils/file_data_extraction.py @@ -387,6 +387,8 @@ class ElmhurstEprExtractor: reader = PyPDF2.PdfReader(file) text = "".join(page.extract_text() for page in reader.pages) + data["Assessor Name"] = re.search(r"Created by:\s*(.*?)\n", text).group(1).strip() + # Extracting individual components address_match = re.search(r"ENERGY REPORT\nDwelling Address\s*(.*?)\s*\nReference", text, re.DOTALL) if not address_match: