mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
implementing summary report extraction
This commit is contained in:
parent
8b875cbccf
commit
5a2ffe646c
2 changed files with 71 additions and 31 deletions
|
|
@ -170,8 +170,8 @@ def handler():
|
|||
|
||||
epr_to_insert = {
|
||||
"Postcode": extracted_contents["elmhurst epr"]["Postcode"],
|
||||
"City/County": None,
|
||||
"District/Town": None,
|
||||
"City/County": extracted_contents["elmhurst epr"]["County"],
|
||||
"District/Town": extracted_contents["elmhurst epr"]["Town"],
|
||||
"Local Authority": None,
|
||||
'SAP Rating Pre (from IMA)': extracted_contents["elmhurst epr"]["Current SAP Rating"],
|
||||
'Pre Heat Transfer': pre_heat_transfer,
|
||||
|
|
@ -207,6 +207,35 @@ def handler():
|
|||
cr_to_insert
|
||||
)
|
||||
|
||||
if extracted_contents.get("elmhurst summary report"):
|
||||
total_floor_area = sum(
|
||||
[x["Floor Area (m2)"] for x in extracted_contents["elmhurst summary report"]["Building Parts"]] +
|
||||
# Get the conservatory floor area
|
||||
[extracted_contents["elmhurst summary report"]["Conservatory"]["Conservatory Floor Area"]]
|
||||
)
|
||||
|
||||
pre_heat_transfer = (
|
||||
extracted_contents["elmhurst summary report"]["Primary Energy Use Intensity (kWh/m2/yr)"]
|
||||
)
|
||||
pre_heat_demand = None # Don't have this
|
||||
|
||||
summary_to_insert = {
|
||||
"Postcode": extracted_contents["elmhurst summary report"]["Postcode"],
|
||||
"City/County": extracted_contents["elmhurst summary report"]["County"],
|
||||
"District/Town": extracted_contents["elmhurst summary report"]["Town"],
|
||||
'SAP Rating Pre (from IMA)': extracted_contents["elmhurst summary report"]["Current SAP Rating"],
|
||||
'Pre Heat Transfer': pre_heat_transfer,
|
||||
'Pre Total Floor Area': total_floor_area,
|
||||
'Pre Heat Demand': pre_heat_demand,
|
||||
"R. Assessor - Name": extracted_contents["elmhurst summary report"]["Assessor Name"],
|
||||
"Retrofit Assessment Date": extracted_contents["elmhurst summary report"]["Assessment Date"],
|
||||
}
|
||||
|
||||
update_dictionary_with_check(
|
||||
output_row_data,
|
||||
summary_to_insert
|
||||
)
|
||||
|
||||
extracted.append(output_row_data)
|
||||
|
||||
extracted_df = pd.DataFrame(extracted)
|
||||
|
|
|
|||
|
|
@ -398,6 +398,15 @@ class ElmhurstEprExtractor:
|
|||
data["Address"] = address_match.group(1).strip()
|
||||
data["Postcode"] = data["Address"].split(",")[-1].strip()
|
||||
|
||||
# TODO:
|
||||
data["Region"] = None
|
||||
data["House Name"] = None
|
||||
data["House No"] = None
|
||||
data["Street"] = None
|
||||
data["Locality"] = None
|
||||
data["Town"] = None
|
||||
data["County"] = None
|
||||
|
||||
sap_match = re.search(r"GG \(1-20\)\s*(\d{1,2})\s*(\d{1,2})", text)
|
||||
if not sap_match:
|
||||
logger.error("Failed to extract SAP rating.")
|
||||
|
|
@ -657,26 +666,7 @@ class ElmhurstSummaryReportExtractor:
|
|||
}
|
||||
)
|
||||
|
||||
# Calculate aggregated dimensions
|
||||
main_property = [part for part in data if "Main Property" in part["Building Part"]]
|
||||
first_extensions = [part for part in data if "1st Extension" in part["Building Part"]]
|
||||
dimensions = {
|
||||
"Total Floor Area (m2)": sum([part["Floor Area (m2)"] for part in data]),
|
||||
"Total Ground Floor Area (m2)": sum(
|
||||
[part["Floor Area (m2)"] for part in data if "Lowest Floor" in part["Floor Level"]]
|
||||
),
|
||||
"RIR Floor Area": sum(
|
||||
[part["Floor Area (m2)"] for part in data if "Room in Roof" in part["Floor Level"]]
|
||||
),
|
||||
"Main Building Wall Area (m2)": sum([x["Perimeter (m)"] * x["Room Height (m)"] for x in main_property if
|
||||
x["Perimeter (m)"] and x["Room Height (m)"]]),
|
||||
"First Extension Wall Area (m2)": sum(
|
||||
[x["Perimeter (m)"] * x["Room Height (m)"] for x in first_extensions if
|
||||
x["Perimeter (m)"] and x["Room Height (m)"]]
|
||||
),
|
||||
}
|
||||
|
||||
return dimensions
|
||||
return data
|
||||
|
||||
@staticmethod
|
||||
def extract_roof_details(text):
|
||||
|
|
@ -869,7 +859,6 @@ class ElmhurstSummaryReportExtractor:
|
|||
"""
|
||||
|
||||
data = {}
|
||||
|
||||
with (open(self.file_path, "rb") as file):
|
||||
reader = PyPDF2.PdfReader(file)
|
||||
text = ""
|
||||
|
|
@ -885,29 +874,51 @@ class ElmhurstSummaryReportExtractor:
|
|||
|
||||
# Address and postcode
|
||||
postcode = re.search(r"Postcode:\s*(.*?)\nRegion:", text)
|
||||
postcode = postcode.group(1).strip() if postcode else ""
|
||||
|
||||
region = re.search(r"Region:\s*(.*?)\nHouse Name:", text)
|
||||
region = region.group(1).strip() if region else ""
|
||||
|
||||
house_name = re.search(r"House Name:\s*(.*?)\nHouse No:", text)
|
||||
house_name = house_name.group(1).strip() if house_name else ""
|
||||
|
||||
house_no = re.search(r"House No:\s*(.*?)\nStreet:", text)
|
||||
house_no = house_no.group(1).strip() if house_no else ""
|
||||
|
||||
street = re.search(r"Street:\s*(.*?)\nLocality:", text)
|
||||
street = street.group(1).strip() if street else ""
|
||||
|
||||
locality = re.search(r"Locality:\s*(.*?)\nTown:", text)
|
||||
locality = locality.group(1).strip() if locality else ""
|
||||
|
||||
town = re.search(r"Town:\s*(.*?)\nCounty:", text)
|
||||
town = town.group(1).strip() if town else ""
|
||||
|
||||
county = re.search(r"County:\s*(.*?)\nProperty Tenure:", text)
|
||||
county = county.group(1).strip() if county else ""
|
||||
|
||||
# Clean extracted values and remove any prefixes
|
||||
address_parts = [
|
||||
house_no.group(1).strip() if house_no else "",
|
||||
house_name.group(1).strip() if house_name else "",
|
||||
street.group(1).strip() if street else "",
|
||||
locality.group(1).strip() if locality else "",
|
||||
town.group(1).strip() if town else "",
|
||||
county.group(1).strip() if county else "",
|
||||
region.group(1).strip() if region else "",
|
||||
postcode.group(1).strip() if postcode else ""
|
||||
house_no,
|
||||
house_name,
|
||||
street,
|
||||
locality,
|
||||
town,
|
||||
county,
|
||||
region,
|
||||
postcode
|
||||
]
|
||||
|
||||
# Join non-empty parts with a comma
|
||||
data["Address"] = ", ".join([part for part in address_parts if part])
|
||||
data["Postcode"] = postcode.group(1).strip()
|
||||
data["Region"] = region
|
||||
data["House Name"] = house_name
|
||||
data["House No"] = house_no
|
||||
data["Street"] = street
|
||||
data["Locality"] = locality
|
||||
data["Town"] = town
|
||||
data["County"] = county
|
||||
|
||||
# Extract Current SAP rating
|
||||
sap_match = re.search(r"Current SAP rating:\s*([A-Z] \d+)", text)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue