mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
extracting dimensions from epr
This commit is contained in:
parent
48369ae150
commit
5af1836aa7
1 changed files with 78 additions and 4 deletions
|
|
@ -16,6 +16,7 @@ def extract_summary_report(pdf_path):
|
|||
- Fuel Bill
|
||||
- Address
|
||||
"""
|
||||
blah
|
||||
data = {
|
||||
"Address": None,
|
||||
"Postcode": None,
|
||||
|
|
@ -56,8 +57,8 @@ def extract_summary_report(pdf_path):
|
|||
data["Number of Storeys"] = int(storeys_match.group(1))
|
||||
|
||||
# Extract Carbon Emissions
|
||||
carbon_match = re.search(r"Emissions \(t/year\):\s*([\d.]+)\s*tonnes", text)
|
||||
data["Carbon Emissions (t/year)"] = float(carbon_match.group(1))
|
||||
# carbon_match = re.search(r"Emissions \(t/year\):\s*([\d.]+)\s*tonnes", text)
|
||||
# data["Carbon Emissions (t/year)"] = float(carbon_match.group(1))
|
||||
|
||||
# Extract Fuel Bill
|
||||
fuel_bill_match = re.search(r"Fuel Bill:\s*£(\d+)", text)
|
||||
|
|
@ -204,6 +205,69 @@ def extract_window_age_description(windows_text):
|
|||
}
|
||||
|
||||
|
||||
def extract_building_parts_epr(text):
|
||||
"""
|
||||
Extracts building parts and associated dimensions from the provided PDF file.
|
||||
Each building part (main and extensions) includes floor area, room height, perimeter, and party wall length.
|
||||
"""
|
||||
data = []
|
||||
|
||||
# Pattern to locate each "Building part" section
|
||||
building_part_pattern = re.compile(
|
||||
r"Construction details: Building part: (.*?)\nFloor Area \[m2\] Room Height \[m\] Perimeter \[m\] Party "
|
||||
r"Wall Length \[m\]\n(.*?)(?=Construction details|Data inputs|$)",
|
||||
re.DOTALL
|
||||
)
|
||||
|
||||
# Extract each building part
|
||||
for match in building_part_pattern.finditer(text):
|
||||
part_name = match.group(1).strip()
|
||||
# Clean up building part name to keep only the descriptor (e.g., "Main" or "1st Extension")
|
||||
cleaned_part_name = re.sub(r" - built in.*", "", part_name)
|
||||
|
||||
floor_data = match.group(2)
|
||||
|
||||
# Pattern to match each floor's measurements
|
||||
floor_pattern = re.compile(
|
||||
r"(Lowest floor|First floor)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)"
|
||||
)
|
||||
|
||||
# Extract floor details for each building part
|
||||
for floor_match in floor_pattern.finditer(floor_data):
|
||||
floor_level = floor_match.group(1)
|
||||
floor_area = float(floor_match.group(2))
|
||||
room_height = float(floor_match.group(3))
|
||||
perimeter = float(floor_match.group(4))
|
||||
party_wall_length = float(floor_match.group(5))
|
||||
|
||||
# Append to data
|
||||
data.append({
|
||||
"Building Part": cleaned_part_name,
|
||||
"Floor Level": floor_level,
|
||||
"Floor Area (m2)": floor_area,
|
||||
"Room Height (m)": room_height,
|
||||
"Perimeter (m)": perimeter,
|
||||
"Party Wall Length (m)": party_wall_length
|
||||
})
|
||||
|
||||
# We now extract out the aggregated data
|
||||
|
||||
main_building = [part for part in data if "Main" in part["Building Part"]]
|
||||
first_extension = [part for part in data if "1st Extension" in part["Building Part"]]
|
||||
dimensions = {
|
||||
"Total Floor Area (m2)": sum([part["Floor Area (m2)"] for part in data]),
|
||||
"Total Ground Floor Area": sum(
|
||||
[part["Floor Area (m2)"] for part in data if "Lowest floor" in part["Floor Level"]]
|
||||
),
|
||||
"RIR Floor Area": 0,
|
||||
"Main Building Wall Area (m2)": sum([x["Perimeter (m)"] * x["Room Height (m)"] for x in main_building]),
|
||||
"First Extension Wall Area (m2)": sum(
|
||||
[x["Perimeter (m)"] * x["Room Height (m)"] for x in first_extension]) if first_extension else 0,
|
||||
}
|
||||
|
||||
return dimensions
|
||||
|
||||
|
||||
def extract_epr(pdf_path):
|
||||
"""
|
||||
Extracts specific data from an Energy Report (EPR) PDF file.
|
||||
|
|
@ -212,6 +276,7 @@ def extract_epr(pdf_path):
|
|||
"Address": None,
|
||||
"Postcode": None,
|
||||
"Current SAP Rating": None,
|
||||
"Primary Energy Use (kWh/yr)": None,
|
||||
"Primary Energy Use Intensity (kWh/m2/yr)": None,
|
||||
"Number of Storeys": None,
|
||||
"Fuel Bill": None,
|
||||
|
|
@ -232,6 +297,11 @@ def extract_epr(pdf_path):
|
|||
"Existing Secondary Heating % of Heat": None,
|
||||
"Secondary Heating Code": None,
|
||||
"Water Heating Code": None,
|
||||
'Total Floor Area (m2)': None,
|
||||
'Total Ground Floor Area': None,
|
||||
'RIR Floor Area': None,
|
||||
'Main Building Wall Area (m2)': None,
|
||||
'First Extension Wall Area (m2)': None
|
||||
}
|
||||
|
||||
with open(pdf_path, "rb") as file:
|
||||
|
|
@ -336,6 +406,9 @@ def extract_epr(pdf_path):
|
|||
window_data = extract_window_age_description(windows_text)
|
||||
data.update(window_data)
|
||||
|
||||
building_parts = extract_building_parts_epr(text)
|
||||
data.update(building_parts)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
|
|
@ -465,7 +538,7 @@ def main():
|
|||
if summary_data:
|
||||
summary_data = {
|
||||
"survey_folder": survey_folder,
|
||||
**summary_data
|
||||
**summary_data,
|
||||
}
|
||||
extracted_data.append(summary_data)
|
||||
continue
|
||||
|
|
@ -474,11 +547,12 @@ def main():
|
|||
continue
|
||||
|
||||
# If no retrofit folder or it was empty, check files in survey_folder
|
||||
|
||||
summary_data = extract_retrofit_pdfs(data_folder_path=survey_folder_path)
|
||||
if summary_data:
|
||||
summary_data = {
|
||||
"survey_folder": survey_folder,
|
||||
**summary_data
|
||||
**summary_data,
|
||||
}
|
||||
extracted_data.append(summary_data)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue