mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
done with stonewater for now
This commit is contained in:
parent
7e26fb4b86
commit
a9ea89d2ae
1 changed files with 133 additions and 11 deletions
|
|
@ -76,10 +76,13 @@ def extract_summary_report(pdf_path):
|
|||
'First Extension Wall Area (m2)': None,
|
||||
"Number of Light Fittings": None,
|
||||
"Number of LEL Fittings": None,
|
||||
"Number of fittings needing LEL": None
|
||||
"Number of fittings needing LEL": None,
|
||||
"Main Roof Type": None,
|
||||
"Main Roof Insulation": None,
|
||||
"Main Roof Insulation Thickness": None,
|
||||
}
|
||||
|
||||
with open(pdf_path, "rb") as file:
|
||||
with (open(pdf_path, "rb") as file):
|
||||
reader = PyPDF2.PdfReader(file)
|
||||
text = ""
|
||||
for page in reader.pages:
|
||||
|
|
@ -205,6 +208,27 @@ def extract_summary_report(pdf_path):
|
|||
data["Number of LEL Fittings"] = int(re.search(r"Total number of L.E.L. fittings\s*(\d+)", text).group(1))
|
||||
data["Number of fittings needing LEL"] = data["Number of Light Fittings"] - data["Number of LEL Fittings"]
|
||||
|
||||
roof_section = re.search(r"8\.0 Roofs:\n(.*?)\n9\.0 Floors:", text, re.DOTALL)
|
||||
roof_text = roof_section.group(1).strip()
|
||||
roof_type_match = re.search(r"Type\s*([A-Za-z0-9\s]+)", roof_text)
|
||||
data["Main Roof Type"] = roof_type_match.group(1).strip() if roof_type_match else None
|
||||
|
||||
# Check if "Insulation" exists between Type and Insulation Thickness
|
||||
insulation_search = re.search(
|
||||
r"Type\s+.*?\n(Insulation\s+(.*?)\n)?(Insulation Thickness\s+(.*?)\n)", roof_text, re.DOTALL
|
||||
)
|
||||
|
||||
if insulation_search:
|
||||
# Insulation match will be present if it exists, otherwise it will be None
|
||||
insulation_match = insulation_search.group(2) # Optional group for Insulation
|
||||
insulation_thickness_match = insulation_search.group(4) # Required group for Insulation Thickness
|
||||
|
||||
# Populate insulation fields
|
||||
data["Main Roof Insulation"] = insulation_match.strip() if insulation_match else None
|
||||
data["Main Roof Insulation Thickness"] = (
|
||||
insulation_thickness_match.strip() if insulation_thickness_match else None
|
||||
)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
|
|
@ -434,6 +458,49 @@ def extract_building_parts_summary(text):
|
|||
return dimensions
|
||||
|
||||
|
||||
import re
|
||||
|
||||
|
||||
def extract_roof_details_epr(text):
|
||||
"""
|
||||
Extracts roof type, insulation, and insulation thickness for each building part
|
||||
in the provided EPR PDF text.
|
||||
"""
|
||||
# Define data structure to hold results
|
||||
roof_data = []
|
||||
|
||||
# Locate each building part section
|
||||
building_part_pattern = re.compile(
|
||||
r"Construction details: Building part: (.*?)\n(.*?)(?=Conservatory|Construction details|$)",
|
||||
re.DOTALL
|
||||
)
|
||||
|
||||
# Extract each building part's data, including roof details
|
||||
for match in building_part_pattern.finditer(text):
|
||||
part_name = match.group(1).strip()
|
||||
|
||||
# Clean up the building part name
|
||||
cleaned_part_name = re.sub(r" - built in.*|Room\(s\) in Roof area:.*", "", part_name).strip()
|
||||
|
||||
part_details = match.group(2)
|
||||
|
||||
# Extract Roof Type, Roof Insulation, and Roof Insulation Thickness
|
||||
roof_type_match = re.search(r"Roof Type:\s*(.*?)(?=\n|$)", part_details)
|
||||
roof_insulation_match = re.search(r"Roof Insulation:\s*(.*?)(?=\n|$)", part_details)
|
||||
roof_insulation_thickness_match = re.search(r"Roof Insulation Thickness:\s*(.*?)(?=\n|$)", part_details)
|
||||
|
||||
# Store results for this building part
|
||||
roof_data.append({
|
||||
"Building Part": cleaned_part_name,
|
||||
"Roof Type": roof_type_match.group(1).strip() if roof_type_match else None,
|
||||
"Roof Insulation": roof_insulation_match.group(1).strip() if roof_insulation_match else None,
|
||||
"Roof Insulation Thickness": roof_insulation_thickness_match.group(
|
||||
1).strip() if roof_insulation_thickness_match else None,
|
||||
})
|
||||
|
||||
return roof_data
|
||||
|
||||
|
||||
def extract_epr(pdf_path):
|
||||
"""
|
||||
Extracts specific data from an Energy Report (EPR) PDF file.
|
||||
|
|
@ -471,7 +538,10 @@ def extract_epr(pdf_path):
|
|||
'First Extension Wall Area (m2)': None,
|
||||
"Number of Light Fittings": None,
|
||||
"Number of LEL Fittings": None,
|
||||
"Number of fittings needing LEL": None
|
||||
"Number of fittings needing LEL": None,
|
||||
"Main Roof Type": None,
|
||||
"Main Roof Insulation": None,
|
||||
"Main Roof Insulation Thickness": None,
|
||||
}
|
||||
|
||||
with open(pdf_path, "rb") as file:
|
||||
|
|
@ -590,6 +660,13 @@ def extract_epr(pdf_path):
|
|||
data["Number of LEL Fittings"] = int(lel_fittings_match.group(1))
|
||||
data["Number of fittings needing LEL"] = data["Number of Light Fittings"] - data["Number of LEL Fittings"]
|
||||
|
||||
roof_details = extract_roof_details_epr(text)
|
||||
# Get from the main building
|
||||
main_roof_details = [r for r in roof_details if "Main" in r["Building Part"]]
|
||||
data["Main Roof Type"] = main_roof_details[0]["Roof Type"]
|
||||
data["Main Roof Insulation"] = main_roof_details[0]["Roof Insulation"]
|
||||
data["Main Roof Insulation Thickness"] = main_roof_details[0]["Roof Insulation Thickness"]
|
||||
|
||||
return data
|
||||
|
||||
|
||||
|
|
@ -1077,13 +1154,11 @@ def main():
|
|||
# Save cost sheet - ideally this will be used as a secondary sheet for Stonewater
|
||||
cost_sheet.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - cost sheet.xlsx", index=False)
|
||||
|
||||
stonewater_data["Room in Roof"].value_counts()
|
||||
|
||||
# stonewater_data[~pd.isnull(stonewater_data["Room in Roof"])]["survey_folder"].values
|
||||
|
||||
create_proposed_wave_3_bid(
|
||||
costed_packages_filepath=os.path.join(
|
||||
CUSTOMER_FOLDER_PATH, "Stonewater - Costed Retrofit Packages 20241030 (WIP).xlsx"
|
||||
CUSTOMER_FOLDER_PATH, "Stonewater - Costed Retrofit Packages 20241030 (WIP) V2.xlsx"
|
||||
),
|
||||
archetypes_sheet_filepath=os.path.join(
|
||||
CUSTOMER_FOLDER_PATH, "Stonewater SHDF_3_0_Board Triage 22.05.24 - Archetyped V3.1.xlsx"
|
||||
|
|
@ -1098,11 +1173,30 @@ def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepa
|
|||
archetypes_to_cost = costed_packages[
|
||||
[
|
||||
"Name", "Address ID", "Archetype ID", "Current SAP Rating", "Current EPC Band", "Modelled SAP Band",
|
||||
"Modelled SAP Rating", 'Total Cost of Measures', 'Contingency Cost',
|
||||
'Total Cost of Measures inc Contingency'
|
||||
"Modelled SAP Rating", "Package Ref", 'Total Cost of Measures', 'Contingency Cost',
|
||||
'Total Cost of Measures inc Contingency', 'Main Roof Type', 'Main Roof Insulation',
|
||||
'Main Roof Insulation Thickness', 'Existing Primary Heating System',
|
||||
'Existing Primary Heating PCDF Reference'
|
||||
]
|
||||
].copy()
|
||||
|
||||
# Combine 'Main Roof Type', 'Main Roof Insulation', 'Main Roof Insulation Thickness', separating by colons!
|
||||
archetypes_to_cost['Surveyed Main Roof'] = (
|
||||
archetypes_to_cost['Main Roof Type'] + ': ' + archetypes_to_cost['Main Roof Insulation'] + ': ' +
|
||||
archetypes_to_cost['Main Roof Insulation Thickness'].astype(str)
|
||||
)
|
||||
|
||||
# Combine the heating systems, separating by colons!
|
||||
archetypes_to_cost['Surveyed Main Heating'] = (
|
||||
archetypes_to_cost['Existing Primary Heating System'] + ': code - ' + archetypes_to_cost[
|
||||
'Existing Primary Heating PCDF Reference'].astype(str)
|
||||
)
|
||||
|
||||
archetypes_to_cost = archetypes_to_cost.drop(
|
||||
columns=['Main Roof Type', 'Main Roof Insulation', 'Main Roof Insulation Thickness',
|
||||
'Existing Primary Heating System',
|
||||
'Existing Primary Heating PCDF Reference'])
|
||||
|
||||
# We take properties that are EPC D and below (61% of units)
|
||||
archetypes_to_cost = archetypes_to_cost[archetypes_to_cost["Current EPC Band"].isin(["D", "E", "F", "G"])]
|
||||
|
||||
|
|
@ -1139,7 +1233,19 @@ def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepa
|
|||
|
||||
match_classification = []
|
||||
for _, home in tqdm(proposed_sample.iterrows(), total=len(proposed_sample)):
|
||||
surveyed = archetypes_to_cost[archetypes_to_cost["Archetype ID"] == home["Archetype ID"]]
|
||||
|
||||
surveyed = archetypes_to_cost[archetypes_to_cost["Archetype ID"] == home["Archetype ID"]].copy()
|
||||
surveyed["Package Ref"] = surveyed["Package Ref"].astype(str)
|
||||
|
||||
package = " or ".join(sorted([x for x in surveyed["Package Ref"].unique() if x.strip()]))
|
||||
package = package.replace("\n", "")
|
||||
|
||||
surveyed_roofs = " or ".join(sorted([x for x in surveyed["Surveyed Main Roof"].unique() if x.strip()]))
|
||||
surveyed_roofs = surveyed_roofs.replace("\n", "")
|
||||
|
||||
surveyed_heating = " or ".join(sorted([x for x in surveyed["Surveyed Main Heating"].unique() if x.strip()]))
|
||||
surveyed_heating = surveyed_heating.replace("\n", "")
|
||||
|
||||
# We now check if we have a perfect match
|
||||
surveyed = surveyed[
|
||||
(surveyed["Property Type"] == home["Property Type"]) &
|
||||
|
|
@ -1149,17 +1255,33 @@ def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepa
|
|||
]
|
||||
|
||||
if surveyed.empty:
|
||||
if package == "2B2A":
|
||||
raise Exception("Fix me")
|
||||
match_classification.append(
|
||||
{
|
||||
"Address ID": home["Address ID"],
|
||||
"Match to Surveyed": "Approximate"
|
||||
"Match to Surveyed": "Approximate",
|
||||
"Proposed Package Ref": package,
|
||||
"Surveyed Archetype Roofs": surveyed_roofs,
|
||||
"Surveyed Archetype Heating": surveyed_heating
|
||||
}
|
||||
)
|
||||
continue
|
||||
# Re-do
|
||||
package = " or ".join(sorted([x for x in surveyed["Package Ref"].unique() if x.strip()]))
|
||||
package = package.replace("\n", "")
|
||||
surveyed_roofs = " or ".join(sorted([x for x in surveyed["Surveyed Main Roof"].unique() if x.strip()]))
|
||||
surveyed_roofs = surveyed_roofs.replace("\n", "")
|
||||
surveyed_heating = " or ".join(sorted([x for x in surveyed["Surveyed Main Heating"].unique() if x.strip()]))
|
||||
surveyed_heating = surveyed_heating.replace("\n", "")
|
||||
|
||||
match_classification.append(
|
||||
{
|
||||
"Address ID": home["Address ID"],
|
||||
"Match to Surveyed": "Exact"
|
||||
"Match to Surveyed": "Exact",
|
||||
"Proposed Package Ref": package,
|
||||
"Surveyed Archetype Roofs": surveyed_roofs,
|
||||
"Surveyed Archetype Heating": surveyed_heating
|
||||
}
|
||||
)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue