updating stonewater to extract wall

This commit is contained in:
Khalim Conn-Kowlessar 2024-11-13 23:26:04 +00:00
parent b01635ddd6
commit 8da1aecb55
5 changed files with 190 additions and 5 deletions

2
.idea/Model.iml generated
View file

@ -7,7 +7,7 @@
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
</content>
<orderEntry type="jdk" jdkName="Fastapi-backend" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="Stonewater-wave-3" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyNamespacePackagesService">

2
.idea/misc.xml generated
View file

@ -3,7 +3,7 @@
<component name="Black">
<option name="sdkName" value="Python 3.10 (backend)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Fastapi-backend" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Stonewater-wave-3" project-jdk-type="Python SDK" />
<component name="PyCharmProfessionalAdvertiser">
<option name="shown" value="true" />
</component>

View file

@ -349,3 +349,32 @@ def caha():
"exclusions": ["boiler_upgrade"]
}
print(body2)
#
asset_list3 = [
{
"address": "10b Forest Gardens", "postcode": "N17 6XA", "uprn": 100021180197
}
]
filename3 = f"{USER_ID}/{CAHA_PORTFOLIO_ID}/asset_list3.csv"
save_csv_to_s3(
dataframe=pd.DataFrame(asset_list3),
bucket_name="retrofit-plan-inputs-dev",
file_name=filename3
)
body3 = {
"portfolio_id": str(119),
"housing_type": "Social",
"goal": "Increasing EPC",
"goal_value": "C",
"trigger_file_path": filename3,
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": "",
"valuation_file_path": "",
"scenario_name": "Wave 3 Packages",
"multi_plan": True,
"budget": None,
"exclusions": ["boiler_upgrade"]
}
print(body3)

View file

@ -37,6 +37,68 @@ def sap_to_epc(sap_points: int | float):
return "G"
def extract_wall_details_summary(text):
"""
Extracts wall type, insulation, dry-lining, and thickness for each building part,
including any alternative wall details within the 7.0 Walls section of the summary PDF text.
"""
# Define data structure to hold all building part wall entries
wall_data = []
# Locate the entire 7.0 Walls section
wall_section = re.search(r"7\.0 Walls:\n(.*?)\n8\.0 Roofs:", text, re.DOTALL).group(1)
# Define pattern to match each building part's wall entry within the section
building_part_pattern = re.compile(
r"(Main Property|1st Extension|2nd Extension|[\w\s]+)\n" # Matches each building part
r"Type\s+(.*?)\n" # Matches main wall Type
r"Insulation\s+(.*?)\n" # Matches main wall Insulation
r"(Dry-lining\s+(.*?)\n)?" # Optional Dry-lining
r"Wall Thickness Unknown\s+(.*?)\n" # Matches main wall Thickness Unknown
r"Wall Thickness \[mm\]\s+(\d+)" # Matches main wall Thickness
r"(?:\nAlternative Wall Area.*?\n" # Starts matching alternative wall section if present
r"Alternative Type\s+(.*?)\n" # Matches alternative wall Type
r"Alternative Insulation\s+(.*?)\n" # Matches alternative wall Insulation
r"(Alternative Dry-lining\s+(.*?)\n)?" # Optional Alternative Dry-lining
r"Alternative Wall Thickness Unknown\s+(.*?)\n" # Matches alternative wall Thickness Unknown
r"Alternative Wall Thickness\s+(\d+))?", # Matches alternative wall Thickness
re.DOTALL
)
# Find all building part entries within the 7.0 Walls section
for match in building_part_pattern.finditer(wall_section):
wall_label = match.group(1).strip()
main_wall_type = match.group(2).strip()
main_wall_insulation = match.group(3).strip()
main_wall_dry_lining = match.group(5).strip() if match.group(5) else "N/A"
main_wall_thickness_unknown = match.group(6).strip()
main_wall_thickness = int(match.group(7))
# Optional alternative wall fields
alt_wall_type = match.group(8).strip() if match.group(8) else None
alt_wall_insulation = match.group(9).strip() if match.group(9) else None
alt_wall_dry_lining = match.group(10).strip() if match.group(10) else None
alt_wall_thickness_unknown = match.group(11).strip() if match.group(11) else None
alt_wall_thickness = int(match.group(12)) if match.group(12) else None
# Append each building part as a dictionary in the wall_data list
wall_data.append({
"Building Part": wall_label,
"Wall Type": main_wall_type,
"Wall Insulation": main_wall_insulation,
"Wall Dry-lining": main_wall_dry_lining,
"Wall Thickness Unknown": main_wall_thickness_unknown,
"Wall Thickness (mm)": main_wall_thickness,
"Alternative Wall Type": alt_wall_type,
"Alternative Wall Insulation": alt_wall_insulation,
"Alternative Wall Dry-lining": alt_wall_dry_lining,
"Alternative Wall Thickness Unknown": alt_wall_thickness_unknown,
"Alternative Wall Thickness (mm)": alt_wall_thickness,
})
return wall_data
def extract_summary_report(pdf_path):
"""
Extracts specific data from the provided PDF file.
@ -80,6 +142,14 @@ def extract_summary_report(pdf_path):
"Main Roof Type": None,
"Main Roof Insulation": None,
"Main Roof Insulation Thickness": None,
"Main Wall Type": None,
"Main Wall Insulation": None,
"Main Wall Dry-lining": None,
"Main Wall Thickness": None,
"Main Building Alternative Wall Type": None,
"Main Building Alternative Wall Insulation": None,
"Main Building Alternative Wall Dry-lining": None,
"Main Building Alternative Wall Thickness": None,
}
with (open(pdf_path, "rb") as file):
@ -229,6 +299,18 @@ def extract_summary_report(pdf_path):
insulation_thickness_match.strip() if insulation_thickness_match else None
)
walls_data = extract_wall_details_summary(text)
# Get the main building wall data
main_building_walls = [wall for wall in walls_data if "Main" in wall["Building Part"]][0]
data["Main Wall Type"] = main_building_walls["Wall Type"]
data["Main Wall Insulation"] = main_building_walls["Wall Insulation"]
data["Main Wall Dry-lining"] = main_building_walls["Wall Dry-lining"]
data["Main Wall Thickness"] = main_building_walls["Wall Thickness (mm)"]
data["Main Building Alternative Wall Type"] = main_building_walls["Alternative Wall Type"]
data["Main Building Alternative Wall Insulation"] = main_building_walls["Alternative Wall Insulation"]
data["Main Building Alternative Wall Dry-lining"] = main_building_walls["Alternative Wall Dry-lining"]
data["Main Building Alternative Wall Thickness"] = main_building_walls["Alternative Wall Thickness (mm)"]
return data
@ -498,10 +580,64 @@ def extract_roof_details_epr(text):
return roof_data
def extract_wall_details_epr(text):
"""
Extracts wall type, insulation, dry-lining, and thickness for each building part
in the provided EPR PDF text.
"""
# Define data structure to hold results
wall_data = []
# Locate each building part section
building_part_pattern = re.compile(
r"Construction details: Building part: (.*?)\n(.*?)(?=Conservatory|Construction details|$)",
re.DOTALL
)
# Extract each building part's data, including wall details
for match in building_part_pattern.finditer(text):
part_name = match.group(1).strip()
# Clean up the building part name
cleaned_part_name = re.sub(r" - built in.*|Room\(s\) in Roof area:.*", "", part_name).strip()
part_details = match.group(2)
# Extract Wall Type, Wall Insulation, Wall Dry-lining, and Wall Thickness
wall_type_match = re.search(r"Wall Type:\s*(.*?)(?=\n|$)", part_details)
wall_insulation_match = re.search(r"Wall Insulation:\s*(.*?)(?=\n|$)", part_details)
wall_drylining_match = re.search(r"Wall Dry-lining:\s*(.*?)(?=\n|$)", part_details)
wall_thickness_match = re.search(r"Wall Thickness:\s*(\d+)(?=\n|$)", part_details)
# Extract Alternative Wall information if available
alt_wall_type_match = re.search(r"Alternative Wall Type:\s*(.*?)(?=\n|$)", part_details)
alt_wall_insulation_match = re.search(r"Alternative Wall Insulation:\s*(.*?)(?=\n|$)", part_details)
alt_wall_drylining_match = re.search(r"Alternative Wall Dry-lining:\s*(.*?)(?=\n|$)", part_details)
alt_wall_thickness_match = re.search(r"Alternative Wall Thickness:\s*(\d+)(?=\n|$)", part_details)
# Store results for this building part
wall_data.append({
"Building Part": cleaned_part_name,
"Wall Type": wall_type_match.group(1).strip() if wall_type_match else None,
"Wall Insulation": wall_insulation_match.group(1).strip() if wall_insulation_match else None,
"Wall Dry-lining": wall_drylining_match.group(1).strip() if wall_drylining_match else None,
"Wall Thickness": int(wall_thickness_match.group(1)) if wall_thickness_match else None,
"Alternative Wall Type": alt_wall_type_match.group(1).strip() if alt_wall_type_match else None,
"Alternative Wall Insulation": alt_wall_insulation_match.group(
1).strip() if alt_wall_insulation_match else None,
"Alternative Wall Dry-lining": alt_wall_drylining_match.group(
1).strip() if alt_wall_drylining_match else None,
"Alternative Wall Thickness": int(alt_wall_thickness_match.group(1)) if alt_wall_thickness_match else None,
})
return wall_data
def extract_epr(pdf_path):
"""
Extracts specific data from an Energy Report (EPR) PDF file.
"""
data = {
"Address": None,
"Postcode": None,
@ -539,6 +675,14 @@ def extract_epr(pdf_path):
"Main Roof Type": None,
"Main Roof Insulation": None,
"Main Roof Insulation Thickness": None,
"Main Wall Type": None,
"Main Wall Insulation": None,
"Main Wall Dry-lining": None,
"Main Wall Thickness": None,
"Main Building Alternative Wall Type": None,
"Main Building Alternative Wall Insulation": None,
"Main Building Alternative Wall Dry-lining": None,
"Main Building Alternative Wall Thickness": None,
}
with open(pdf_path, "rb") as file:
@ -664,6 +808,17 @@ def extract_epr(pdf_path):
data["Main Roof Insulation"] = main_roof_details[0]["Roof Insulation"]
data["Main Roof Insulation Thickness"] = main_roof_details[0]["Roof Insulation Thickness"]
wall_details = extract_wall_details_epr(text)
main_wall_details = [w for w in wall_details if "Main" in w["Building Part"]][0]
data["Main Wall Type"] = main_wall_details["Wall Type"]
data["Main Wall Insulation"] = main_wall_details["Wall Insulation"]
data["Main Wall Dry-lining"] = main_wall_details["Wall Dry-lining"]
data["Main Wall Thickness"] = main_wall_details["Wall Thickness"]
data["Main Building Alternative Wall Type"] = main_wall_details["Alternative Wall Type"]
data["Main Building Alternative Wall Insulation"] = main_wall_details["Alternative Wall Insulation"]
data["Main Building Alternative Wall Dry-lining"] = main_wall_details["Alternative Wall Dry-lining"]
data["Main Building Alternative Wall Thickness"] = main_wall_details["Alternative Wall Thickness"]
return data
@ -1425,6 +1580,7 @@ def append_stonewater_id():
)
model_proposed_sample = model_proposed_sample[~pd.isnull(model_proposed_sample["Address ID"])]
model_proposed_sample["Address ID"] = model_proposed_sample["Address ID"].astype(int)
z = model_proposed_sample["Archetype ID"].drop_duplicates().sort_values()
original_archetypes = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24 "

View file

@ -10,10 +10,10 @@ class SecondaryHeating:
"""
# The list of existing heating systems that are accepted
ACCEPTED_MAINHEAT_DESCRIPTIONS = ["Boiler and radiators, mains gas"]
ACCEPTED_SECONDHEAT_DESCRIPTIONS = ["Room heaters, electric"]
ACCEPTED_MAINHEAT_DESCRIPTIONS = ["Boiler and radiators, mains gas", "Electric storage heaters"]
ACCEPTED_SECONDHEAT_DESCRIPTIONS = ["Room heaters, electric", 'Portable electric heaters (assumed)']
# These are the heaters where works are required to remove them
FIXED_HEATER_DESCRIPTIONS = ["Room heaters, electric", 'Portable electric heaters (assumed)']
FIXED_HEATER_DESCRIPTIONS = ["Room heaters, electric"]
def __init__(self, property_instance: Property):
self.property = property_instance