done with stonewater for now

This commit is contained in:
Khalim Conn-Kowlessar 2024-10-31 12:03:17 +00:00
parent 7e26fb4b86
commit a9ea89d2ae

View file

@ -76,10 +76,13 @@ def extract_summary_report(pdf_path):
'First Extension Wall Area (m2)': None,
"Number of Light Fittings": None,
"Number of LEL Fittings": None,
"Number of fittings needing LEL": None
"Number of fittings needing LEL": None,
"Main Roof Type": None,
"Main Roof Insulation": None,
"Main Roof Insulation Thickness": None,
}
with open(pdf_path, "rb") as file:
with (open(pdf_path, "rb") as file):
reader = PyPDF2.PdfReader(file)
text = ""
for page in reader.pages:
@ -205,6 +208,27 @@ def extract_summary_report(pdf_path):
data["Number of LEL Fittings"] = int(re.search(r"Total number of L.E.L. fittings\s*(\d+)", text).group(1))
data["Number of fittings needing LEL"] = data["Number of Light Fittings"] - data["Number of LEL Fittings"]
roof_section = re.search(r"8\.0 Roofs:\n(.*?)\n9\.0 Floors:", text, re.DOTALL)
roof_text = roof_section.group(1).strip()
roof_type_match = re.search(r"Type\s*([A-Za-z0-9\s]+)", roof_text)
data["Main Roof Type"] = roof_type_match.group(1).strip() if roof_type_match else None
# Check if "Insulation" exists between Type and Insulation Thickness
insulation_search = re.search(
r"Type\s+.*?\n(Insulation\s+(.*?)\n)?(Insulation Thickness\s+(.*?)\n)", roof_text, re.DOTALL
)
if insulation_search:
# Insulation match will be present if it exists, otherwise it will be None
insulation_match = insulation_search.group(2) # Optional group for Insulation
insulation_thickness_match = insulation_search.group(4) # Required group for Insulation Thickness
# Populate insulation fields
data["Main Roof Insulation"] = insulation_match.strip() if insulation_match else None
data["Main Roof Insulation Thickness"] = (
insulation_thickness_match.strip() if insulation_thickness_match else None
)
return data
@ -434,6 +458,49 @@ def extract_building_parts_summary(text):
return dimensions
import re
def extract_roof_details_epr(text):
"""
Extracts roof type, insulation, and insulation thickness for each building part
in the provided EPR PDF text.
"""
# Define data structure to hold results
roof_data = []
# Locate each building part section
building_part_pattern = re.compile(
r"Construction details: Building part: (.*?)\n(.*?)(?=Conservatory|Construction details|$)",
re.DOTALL
)
# Extract each building part's data, including roof details
for match in building_part_pattern.finditer(text):
part_name = match.group(1).strip()
# Clean up the building part name
cleaned_part_name = re.sub(r" - built in.*|Room\(s\) in Roof area:.*", "", part_name).strip()
part_details = match.group(2)
# Extract Roof Type, Roof Insulation, and Roof Insulation Thickness
roof_type_match = re.search(r"Roof Type:\s*(.*?)(?=\n|$)", part_details)
roof_insulation_match = re.search(r"Roof Insulation:\s*(.*?)(?=\n|$)", part_details)
roof_insulation_thickness_match = re.search(r"Roof Insulation Thickness:\s*(.*?)(?=\n|$)", part_details)
# Store results for this building part
roof_data.append({
"Building Part": cleaned_part_name,
"Roof Type": roof_type_match.group(1).strip() if roof_type_match else None,
"Roof Insulation": roof_insulation_match.group(1).strip() if roof_insulation_match else None,
"Roof Insulation Thickness": roof_insulation_thickness_match.group(
1).strip() if roof_insulation_thickness_match else None,
})
return roof_data
def extract_epr(pdf_path):
"""
Extracts specific data from an Energy Report (EPR) PDF file.
@ -471,7 +538,10 @@ def extract_epr(pdf_path):
'First Extension Wall Area (m2)': None,
"Number of Light Fittings": None,
"Number of LEL Fittings": None,
"Number of fittings needing LEL": None
"Number of fittings needing LEL": None,
"Main Roof Type": None,
"Main Roof Insulation": None,
"Main Roof Insulation Thickness": None,
}
with open(pdf_path, "rb") as file:
@ -590,6 +660,13 @@ def extract_epr(pdf_path):
data["Number of LEL Fittings"] = int(lel_fittings_match.group(1))
data["Number of fittings needing LEL"] = data["Number of Light Fittings"] - data["Number of LEL Fittings"]
roof_details = extract_roof_details_epr(text)
# Get from the main building
main_roof_details = [r for r in roof_details if "Main" in r["Building Part"]]
data["Main Roof Type"] = main_roof_details[0]["Roof Type"]
data["Main Roof Insulation"] = main_roof_details[0]["Roof Insulation"]
data["Main Roof Insulation Thickness"] = main_roof_details[0]["Roof Insulation Thickness"]
return data
@ -1077,13 +1154,11 @@ def main():
# Save cost sheet - ideally this will be used as a secondary sheet for Stonewater
cost_sheet.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - cost sheet.xlsx", index=False)
stonewater_data["Room in Roof"].value_counts()
# stonewater_data[~pd.isnull(stonewater_data["Room in Roof"])]["survey_folder"].values
create_proposed_wave_3_bid(
costed_packages_filepath=os.path.join(
CUSTOMER_FOLDER_PATH, "Stonewater - Costed Retrofit Packages 20241030 (WIP).xlsx"
CUSTOMER_FOLDER_PATH, "Stonewater - Costed Retrofit Packages 20241030 (WIP) V2.xlsx"
),
archetypes_sheet_filepath=os.path.join(
CUSTOMER_FOLDER_PATH, "Stonewater SHDF_3_0_Board Triage 22.05.24 - Archetyped V3.1.xlsx"
@ -1098,11 +1173,30 @@ def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepa
archetypes_to_cost = costed_packages[
[
"Name", "Address ID", "Archetype ID", "Current SAP Rating", "Current EPC Band", "Modelled SAP Band",
"Modelled SAP Rating", 'Total Cost of Measures', 'Contingency Cost',
'Total Cost of Measures inc Contingency'
"Modelled SAP Rating", "Package Ref", 'Total Cost of Measures', 'Contingency Cost',
'Total Cost of Measures inc Contingency', 'Main Roof Type', 'Main Roof Insulation',
'Main Roof Insulation Thickness', 'Existing Primary Heating System',
'Existing Primary Heating PCDF Reference'
]
].copy()
# Combine 'Main Roof Type', 'Main Roof Insulation', 'Main Roof Insulation Thickness', separating by colons!
archetypes_to_cost['Surveyed Main Roof'] = (
archetypes_to_cost['Main Roof Type'] + ': ' + archetypes_to_cost['Main Roof Insulation'] + ': ' +
archetypes_to_cost['Main Roof Insulation Thickness'].astype(str)
)
# Combine the heating systems, separating by colons!
archetypes_to_cost['Surveyed Main Heating'] = (
archetypes_to_cost['Existing Primary Heating System'] + ': code - ' + archetypes_to_cost[
'Existing Primary Heating PCDF Reference'].astype(str)
)
archetypes_to_cost = archetypes_to_cost.drop(
columns=['Main Roof Type', 'Main Roof Insulation', 'Main Roof Insulation Thickness',
'Existing Primary Heating System',
'Existing Primary Heating PCDF Reference'])
# We take properties that are EPC D and below (61% of units)
archetypes_to_cost = archetypes_to_cost[archetypes_to_cost["Current EPC Band"].isin(["D", "E", "F", "G"])]
@ -1139,7 +1233,19 @@ def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepa
match_classification = []
for _, home in tqdm(proposed_sample.iterrows(), total=len(proposed_sample)):
surveyed = archetypes_to_cost[archetypes_to_cost["Archetype ID"] == home["Archetype ID"]]
surveyed = archetypes_to_cost[archetypes_to_cost["Archetype ID"] == home["Archetype ID"]].copy()
surveyed["Package Ref"] = surveyed["Package Ref"].astype(str)
package = " or ".join(sorted([x for x in surveyed["Package Ref"].unique() if x.strip()]))
package = package.replace("\n", "")
surveyed_roofs = " or ".join(sorted([x for x in surveyed["Surveyed Main Roof"].unique() if x.strip()]))
surveyed_roofs = surveyed_roofs.replace("\n", "")
surveyed_heating = " or ".join(sorted([x for x in surveyed["Surveyed Main Heating"].unique() if x.strip()]))
surveyed_heating = surveyed_heating.replace("\n", "")
# We now check if we have a perfect match
surveyed = surveyed[
(surveyed["Property Type"] == home["Property Type"]) &
@ -1149,17 +1255,33 @@ def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepa
]
if surveyed.empty:
if package == "2B2A":
raise Exception("Fix me")
match_classification.append(
{
"Address ID": home["Address ID"],
"Match to Surveyed": "Approximate"
"Match to Surveyed": "Approximate",
"Proposed Package Ref": package,
"Surveyed Archetype Roofs": surveyed_roofs,
"Surveyed Archetype Heating": surveyed_heating
}
)
continue
# Re-do
package = " or ".join(sorted([x for x in surveyed["Package Ref"].unique() if x.strip()]))
package = package.replace("\n", "")
surveyed_roofs = " or ".join(sorted([x for x in surveyed["Surveyed Main Roof"].unique() if x.strip()]))
surveyed_roofs = surveyed_roofs.replace("\n", "")
surveyed_heating = " or ".join(sorted([x for x in surveyed["Surveyed Main Heating"].unique() if x.strip()]))
surveyed_heating = surveyed_heating.replace("\n", "")
match_classification.append(
{
"Address ID": home["Address ID"],
"Match to Surveyed": "Exact"
"Match to Surveyed": "Exact",
"Proposed Package Ref": package,
"Surveyed Archetype Roofs": surveyed_roofs,
"Surveyed Archetype Heating": surveyed_heating
}
)