mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
extracting heating systems from summary report
This commit is contained in:
parent
b7f402ba9d
commit
753bda6cb0
1 changed files with 84 additions and 2 deletions
|
|
@ -19,10 +19,26 @@ def extract_summary_report(pdf_path):
|
||||||
data = {
|
data = {
|
||||||
"Address": None,
|
"Address": None,
|
||||||
"Current SAP Rating": None,
|
"Current SAP Rating": None,
|
||||||
"Number of Storeys": None,
|
"Space Heating": None,
|
||||||
|
"Water Heating": None,
|
||||||
"Fuel Bill": None,
|
"Fuel Bill": None,
|
||||||
"Window Age Description": None,
|
"Window Age Description": None,
|
||||||
"Window Age Description Proportion (%)": None,
|
"Window Age Description Proportion (%)": None,
|
||||||
|
"Secondary Window Age Description": None,
|
||||||
|
"Secondary Window Age Description Proportion (%)": None,
|
||||||
|
"Number of Windows": None,
|
||||||
|
"Total Number of Doors": None,
|
||||||
|
"Number of Insulated Doors": None,
|
||||||
|
"Existing Primary Heating System": None,
|
||||||
|
"Existing Primary Heating PCDF Reference": None,
|
||||||
|
"Existing Primary Heating Controls": None,
|
||||||
|
"Existing Primary Heating % of Heat": None,
|
||||||
|
"Existing Secondary Heating System": None,
|
||||||
|
"Existing Secondary Heating PCDF Reference": None,
|
||||||
|
"Existing Secondary Heating Controls": None,
|
||||||
|
"Existing Secondary Heating % of Heat": None,
|
||||||
|
"Secondary Heating Code": None,
|
||||||
|
"Water Heating Code": None,
|
||||||
}
|
}
|
||||||
|
|
||||||
with open(pdf_path, "rb") as file:
|
with open(pdf_path, "rb") as file:
|
||||||
|
|
@ -39,6 +55,10 @@ def extract_summary_report(pdf_path):
|
||||||
storeys_match = re.search(r"Number of Storeys:\s*(\d+)", text)
|
storeys_match = re.search(r"Number of Storeys:\s*(\d+)", text)
|
||||||
data["Number of Storeys"] = int(storeys_match.group(1))
|
data["Number of Storeys"] = int(storeys_match.group(1))
|
||||||
|
|
||||||
|
# Extract Carbon Emissions
|
||||||
|
carbon_match = re.search(r"Emissions \(t/year\):\s*([\d.]+)\s*tonnes", text)
|
||||||
|
data["Carbon Emissions (t/year)"] = float(carbon_match.group(1))
|
||||||
|
|
||||||
# Extract Fuel Bill
|
# Extract Fuel Bill
|
||||||
fuel_bill_match = re.search(r"Fuel Bill:\s*£(\d+)", text)
|
fuel_bill_match = re.search(r"Fuel Bill:\s*£(\d+)", text)
|
||||||
data["Fuel Bill"] = f"£{fuel_bill_match.group(1)}"
|
data["Fuel Bill"] = f"£{fuel_bill_match.group(1)}"
|
||||||
|
|
@ -66,12 +86,58 @@ def extract_summary_report(pdf_path):
|
||||||
|
|
||||||
# Join non-empty parts with a comma
|
# Join non-empty parts with a comma
|
||||||
data["Address"] = ", ".join([part for part in address_parts if part])
|
data["Address"] = ", ".join([part for part in address_parts if part])
|
||||||
|
data["Postcode"] = postcode.group(1).strip()
|
||||||
|
|
||||||
windows_section = re.search(r"Windows\s*(.*?)\s*Draught Proofing", text, re.DOTALL)
|
windows_section = re.search(r"Windows\s*(.*?)\s*Draught Proofing", text, re.DOTALL)
|
||||||
windows_text = windows_section.group(1)
|
windows_text = windows_section.group(1)
|
||||||
window_data = extract_window_age_description(windows_text)
|
window_data = extract_window_age_description(windows_text)
|
||||||
data.update(window_data)
|
data.update(window_data)
|
||||||
|
|
||||||
|
# Extract Total Number of Doors
|
||||||
|
total_doors_match = re.search(r"Total Number of Doors\s*(\d+)", text)
|
||||||
|
data["Total Number of Doors"] = int(total_doors_match.group(1))
|
||||||
|
|
||||||
|
# Extract Number of Insulated Doors
|
||||||
|
insulated_doors_match = re.search(r"Number of Insulated Doors\s*(\d+)", text)
|
||||||
|
data["Number of Insulated Doors"] = int(insulated_doors_match.group(1))
|
||||||
|
|
||||||
|
# Extract heating system
|
||||||
|
# Extract Primary Heating Data
|
||||||
|
# Extract Primary Heating Section
|
||||||
|
primary_heating_section = re.search(r"Main\s*Heating1\s*(.*?)\s*Main\s*Heating2", text, re.DOTALL)
|
||||||
|
primary_text = primary_heating_section.group(1)
|
||||||
|
|
||||||
|
data["Existing Primary Heating System"] = re.search(r"Main Heating Code\s*(.*?)\n", primary_text).group(
|
||||||
|
1).strip()
|
||||||
|
data["Existing Primary Heating PCDF Reference"] = re.search(r"PCDF boiler Reference\s*(\d+)",
|
||||||
|
primary_text).group(1)
|
||||||
|
data["Existing Primary Heating Controls"] = re.search(r"Main Heating Controls\s*(.*?)\n", primary_text).group(
|
||||||
|
1).strip()
|
||||||
|
data["Existing Primary Heating % of Heat"] = int(
|
||||||
|
re.search(r"Percentage of Heat\s*(\d+)\s*%", primary_text).group(1)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Extract Secondary Heating Section
|
||||||
|
secondary_heating_section = re.search(r"Main\s*Heating2\s*(.*?)\s*Water\s*Heating", text, re.DOTALL)
|
||||||
|
secondary_text = secondary_heating_section.group(1)
|
||||||
|
|
||||||
|
data["Existing Secondary Heating System"] = re.search(r"Main Heating Code\s*(.*?)\n", secondary_text).group(
|
||||||
|
1).strip()
|
||||||
|
data["Existing Secondary Heating PCDF Reference"] = re.search(r"PCDF boiler Reference\s*(\d+)",
|
||||||
|
secondary_text).group(1)
|
||||||
|
data["Existing Secondary Heating Controls"] = re.search(r"Main Heating Controls\s*(.*?)\n",
|
||||||
|
secondary_text).group(1).strip()
|
||||||
|
data["Existing Secondary Heating % of Heat"] = int(
|
||||||
|
re.search(r"Percentage of Heat\s*(\d+)\s*%", secondary_text).group(1)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Extract Secondary Heating and Water Heating Codes
|
||||||
|
secondary_heating_code_match = re.search(r"Secondary Heating Code\s*(.*?)\n", text)
|
||||||
|
water_heating_code_match = re.search(r"Water Heating Code\s*(.*?)\n", text)
|
||||||
|
|
||||||
|
data["Secondary Heating Code"] = secondary_heating_code_match.group(1).strip()
|
||||||
|
data["Water Heating Code"] = water_heating_code_match.group(1).strip()
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -111,9 +177,20 @@ def extract_window_age_description(windows_text):
|
||||||
most_common_description, window_count = description_counts.most_common(1)[0]
|
most_common_description, window_count = description_counts.most_common(1)[0]
|
||||||
window_proportion = window_count / sum(description_counts.values()) * 100
|
window_proportion = window_count / sum(description_counts.values()) * 100
|
||||||
|
|
||||||
|
# Get the second most common and the proportion
|
||||||
|
if window_proportion == 100:
|
||||||
|
second_most_common_description = None
|
||||||
|
second_most_common_proportion = 0
|
||||||
|
else:
|
||||||
|
second_most_common_description, second_window_count = description_counts.most_common(2)[1]
|
||||||
|
second_most_common_proportion = second_window_count / sum(description_counts.values()) * 100
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"Window Age Description": most_common_description,
|
"Window Age Description": most_common_description,
|
||||||
"Window Age Description Proportion (%)": window_proportion
|
"Window Age Description Proportion (%)": window_proportion,
|
||||||
|
"Secondary Window Age Description": second_most_common_description,
|
||||||
|
"Secondary Window Age Description Proportion (%)": second_most_common_proportion,
|
||||||
|
"Number of Windows": sum(description_counts.values())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -129,6 +206,11 @@ def extract_epr(pdf_path):
|
||||||
"Fuel Bill": None,
|
"Fuel Bill": None,
|
||||||
"Window Age Description": None,
|
"Window Age Description": None,
|
||||||
"Window Age Description Proportion (%)": None,
|
"Window Age Description Proportion (%)": None,
|
||||||
|
"Secondary Window Age Description": None,
|
||||||
|
"Secondary Window Age Description Proportion (%)": None,
|
||||||
|
"Number of Windows": None,
|
||||||
|
"Total Number of Doors": None,
|
||||||
|
"Number of Insulated Doors": None,
|
||||||
}
|
}
|
||||||
|
|
||||||
with open(pdf_path, "rb") as file:
|
with open(pdf_path, "rb") as file:
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue