mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
extending extraction
This commit is contained in:
parent
749faaebca
commit
63521dd1e3
2 changed files with 17 additions and 6 deletions
|
|
@ -126,7 +126,7 @@ def handler():
|
|||
file_extractor = extractors.get(report_type)
|
||||
if file_extractor is None:
|
||||
continue
|
||||
|
||||
|
||||
extracted_contents[report_type] = file_extractor(filepath).extract()
|
||||
|
||||
if file_extraction_tools.is_xml(filepath):
|
||||
|
|
@ -136,6 +136,7 @@ def handler():
|
|||
file_extractor = extractors.get(xml_type)
|
||||
if file_extractor is None:
|
||||
continue
|
||||
|
||||
extracted_contents[xml_type] = file_extractor(filepath).extract()
|
||||
|
||||
output_row_data = output_template.copy()
|
||||
|
|
@ -144,10 +145,12 @@ def handler():
|
|||
# 'Local Authority',
|
||||
# 'Trustmark Lodgement ID',
|
||||
# 'Certificate Number', 'EWI UMR', 'Loft UMR', 'Windows UMR',
|
||||
# 'Doors UMR', 'Measure Lodgement Date', 'Full Lodgement Date', 'Name', 'Phone', 'Email', 'Secondary Contact
|
||||
# 'Doors UMR', 'Measure Lodgement Date', 'Full Lodgement Date',
|
||||
# 'Name', 'Phone', 'Email', (owner)
|
||||
# 'Secondary Contact
|
||||
# Name', 'Secondary Contact Phone', 'Trustmark Licence Number', 'Retrofit Assessment Date', 'Company Name',
|
||||
# 'Retrofit Designer Name', , 'No. of Bedrooms',
|
||||
# , 'Pre Heat Transfer', 'Pre Total Floor Area', 'Pre Heat Demand',
|
||||
# ,
|
||||
# 'Pre Air Tightness', 'SAP Rating Post (from EPC)', 'Post Heat Transfer', 'Post Total Floor Area',
|
||||
# 'Post Heat Demand', 'Post Air Tightness', 'Number of Eligible Measures Installed', 'Total Cost of Works',
|
||||
# 'Annual Fuel Saving (MTP)', 'Work Type ID', 'Measure Category', 'Installer', 'Operative Name', 'Operative
|
||||
|
|
@ -159,7 +162,12 @@ def handler():
|
|||
total_floor_area = sum(
|
||||
[x["Floor Area (m2)"] for x in extracted_contents["elmhurst epr"]["Building Parts"]] +
|
||||
# Get the conservatory floor area
|
||||
extracted_contents["elmhurst epr"]["Conservatory"]["Floor Area (m2)"]
|
||||
[extracted_contents["elmhurst epr"]["Conservatory"]["Conservatory Floor Area"]]
|
||||
)
|
||||
|
||||
pre_heat_transfer = extracted_contents["elmhurst epr"]["Primary Energy Use Intensity (kWh/m2/yr)"]
|
||||
pre_heat_demand = (
|
||||
extracted_contents["elmhurst epr"]["Primary Energy Use Intensity (kWh/m2/yr)"] * total_floor_area
|
||||
)
|
||||
|
||||
to_insert = {
|
||||
|
|
@ -172,8 +180,9 @@ def handler():
|
|||
"Local Authority": None,
|
||||
'Property Age': extracted_contents["elmhurst epr"]["Property Age"],
|
||||
'SAP Rating Pre (from IMA)': extracted_contents["elmhurst epr"]["Current SAP Rating"],
|
||||
'Pre Heat Transfer': extracted_contents["elmhurst epr"][
|
||||
"Primary Energy Use Intensity (kWh/m2/yr)"] * total_floor_area,
|
||||
'Pre Heat Transfer': pre_heat_transfer,
|
||||
'Pre Total Floor Area': total_floor_area,
|
||||
'Pre Heat Demand': pre_heat_demand,
|
||||
}
|
||||
|
||||
output_row_data["Property Address"] = property_folder.split(")")[1].strip()
|
||||
|
|
|
|||
|
|
@ -387,6 +387,8 @@ class ElmhurstEprExtractor:
|
|||
reader = PyPDF2.PdfReader(file)
|
||||
text = "".join(page.extract_text() for page in reader.pages)
|
||||
|
||||
data["Assessor Name"] = re.search(r"Created by:\s*(.*?)\n", text).group(1).strip()
|
||||
|
||||
# Extracting individual components
|
||||
address_match = re.search(r"ENERGY REPORT\nDwelling Address\s*(.*?)\s*\nReference", text, re.DOTALL)
|
||||
if not address_match:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue