diff --git a/.idea/Model.iml b/.idea/Model.iml index df6c4faa..762580d9 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 50cad4ca..c916a158 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/etl/customers/ksquared/Wave3 Modelling.py b/etl/customers/ksquared/Wave3 Modelling.py index c861edfc..845ab634 100644 --- a/etl/customers/ksquared/Wave3 Modelling.py +++ b/etl/customers/ksquared/Wave3 Modelling.py @@ -349,3 +349,32 @@ def caha(): "exclusions": ["boiler_upgrade"] } print(body2) + + # + asset_list3 = [ + { + "address": "10b Forest Gardens", "postcode": "N17 6XA", "uprn": 100021180197 + } + ] + filename3 = f"{USER_ID}/{CAHA_PORTFOLIO_ID}/asset_list3.csv" + save_csv_to_s3( + dataframe=pd.DataFrame(asset_list3), + bucket_name="retrofit-plan-inputs-dev", + file_name=filename3 + ) + body3 = { + "portfolio_id": str(119), + "housing_type": "Social", + "goal": "Increasing EPC", + "goal_value": "C", + "trigger_file_path": filename3, + "already_installed_file_path": "", + "patches_file_path": "", + "non_invasive_recommendations_file_path": "", + "valuation_file_path": "", + "scenario_name": "Wave 3 Packages", + "multi_plan": True, + "budget": None, + "exclusions": ["boiler_upgrade"] + } + print(body3) diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py index b6fed4db..0af3ffbb 100644 --- a/etl/customers/stonewater/Wave 3 Preparation.py +++ b/etl/customers/stonewater/Wave 3 Preparation.py @@ -37,6 +37,68 @@ def sap_to_epc(sap_points: int | float): return "G" +def extract_wall_details_summary(text): + """ + Extracts wall type, insulation, dry-lining, and thickness for each building part, + including any alternative wall details within the 7.0 Walls section of the summary PDF text. + """ + # Define data structure to hold all building part wall entries + wall_data = [] + + # Locate the entire 7.0 Walls section + wall_section = re.search(r"7\.0 Walls:\n(.*?)\n8\.0 Roofs:", text, re.DOTALL).group(1) + + # Define pattern to match each building part's wall entry within the section + building_part_pattern = re.compile( + r"(Main Property|1st Extension|2nd Extension|[\w\s]+)\n" # Matches each building part + r"Type\s+(.*?)\n" # Matches main wall Type + r"Insulation\s+(.*?)\n" # Matches main wall Insulation + r"(Dry-lining\s+(.*?)\n)?" # Optional Dry-lining + r"Wall Thickness Unknown\s+(.*?)\n" # Matches main wall Thickness Unknown + r"Wall Thickness \[mm\]\s+(\d+)" # Matches main wall Thickness + r"(?:\nAlternative Wall Area.*?\n" # Starts matching alternative wall section if present + r"Alternative Type\s+(.*?)\n" # Matches alternative wall Type + r"Alternative Insulation\s+(.*?)\n" # Matches alternative wall Insulation + r"(Alternative Dry-lining\s+(.*?)\n)?" # Optional Alternative Dry-lining + r"Alternative Wall Thickness Unknown\s+(.*?)\n" # Matches alternative wall Thickness Unknown + r"Alternative Wall Thickness\s+(\d+))?", # Matches alternative wall Thickness + re.DOTALL + ) + + # Find all building part entries within the 7.0 Walls section + for match in building_part_pattern.finditer(wall_section): + wall_label = match.group(1).strip() + main_wall_type = match.group(2).strip() + main_wall_insulation = match.group(3).strip() + main_wall_dry_lining = match.group(5).strip() if match.group(5) else "N/A" + main_wall_thickness_unknown = match.group(6).strip() + main_wall_thickness = int(match.group(7)) + + # Optional alternative wall fields + alt_wall_type = match.group(8).strip() if match.group(8) else None + alt_wall_insulation = match.group(9).strip() if match.group(9) else None + alt_wall_dry_lining = match.group(10).strip() if match.group(10) else None + alt_wall_thickness_unknown = match.group(11).strip() if match.group(11) else None + alt_wall_thickness = int(match.group(12)) if match.group(12) else None + + # Append each building part as a dictionary in the wall_data list + wall_data.append({ + "Building Part": wall_label, + "Wall Type": main_wall_type, + "Wall Insulation": main_wall_insulation, + "Wall Dry-lining": main_wall_dry_lining, + "Wall Thickness Unknown": main_wall_thickness_unknown, + "Wall Thickness (mm)": main_wall_thickness, + "Alternative Wall Type": alt_wall_type, + "Alternative Wall Insulation": alt_wall_insulation, + "Alternative Wall Dry-lining": alt_wall_dry_lining, + "Alternative Wall Thickness Unknown": alt_wall_thickness_unknown, + "Alternative Wall Thickness (mm)": alt_wall_thickness, + }) + + return wall_data + + def extract_summary_report(pdf_path): """ Extracts specific data from the provided PDF file. @@ -80,6 +142,14 @@ def extract_summary_report(pdf_path): "Main Roof Type": None, "Main Roof Insulation": None, "Main Roof Insulation Thickness": None, + "Main Wall Type": None, + "Main Wall Insulation": None, + "Main Wall Dry-lining": None, + "Main Wall Thickness": None, + "Main Building Alternative Wall Type": None, + "Main Building Alternative Wall Insulation": None, + "Main Building Alternative Wall Dry-lining": None, + "Main Building Alternative Wall Thickness": None, } with (open(pdf_path, "rb") as file): @@ -229,6 +299,18 @@ def extract_summary_report(pdf_path): insulation_thickness_match.strip() if insulation_thickness_match else None ) + walls_data = extract_wall_details_summary(text) + # Get the main building wall data + main_building_walls = [wall for wall in walls_data if "Main" in wall["Building Part"]][0] + data["Main Wall Type"] = main_building_walls["Wall Type"] + data["Main Wall Insulation"] = main_building_walls["Wall Insulation"] + data["Main Wall Dry-lining"] = main_building_walls["Wall Dry-lining"] + data["Main Wall Thickness"] = main_building_walls["Wall Thickness (mm)"] + data["Main Building Alternative Wall Type"] = main_building_walls["Alternative Wall Type"] + data["Main Building Alternative Wall Insulation"] = main_building_walls["Alternative Wall Insulation"] + data["Main Building Alternative Wall Dry-lining"] = main_building_walls["Alternative Wall Dry-lining"] + data["Main Building Alternative Wall Thickness"] = main_building_walls["Alternative Wall Thickness (mm)"] + return data @@ -498,10 +580,64 @@ def extract_roof_details_epr(text): return roof_data +def extract_wall_details_epr(text): + """ + Extracts wall type, insulation, dry-lining, and thickness for each building part + in the provided EPR PDF text. + """ + # Define data structure to hold results + wall_data = [] + + # Locate each building part section + building_part_pattern = re.compile( + r"Construction details: Building part: (.*?)\n(.*?)(?=Conservatory|Construction details|$)", + re.DOTALL + ) + + # Extract each building part's data, including wall details + for match in building_part_pattern.finditer(text): + part_name = match.group(1).strip() + + # Clean up the building part name + cleaned_part_name = re.sub(r" - built in.*|Room\(s\) in Roof area:.*", "", part_name).strip() + + part_details = match.group(2) + + # Extract Wall Type, Wall Insulation, Wall Dry-lining, and Wall Thickness + wall_type_match = re.search(r"Wall Type:\s*(.*?)(?=\n|$)", part_details) + wall_insulation_match = re.search(r"Wall Insulation:\s*(.*?)(?=\n|$)", part_details) + wall_drylining_match = re.search(r"Wall Dry-lining:\s*(.*?)(?=\n|$)", part_details) + wall_thickness_match = re.search(r"Wall Thickness:\s*(\d+)(?=\n|$)", part_details) + + # Extract Alternative Wall information if available + alt_wall_type_match = re.search(r"Alternative Wall Type:\s*(.*?)(?=\n|$)", part_details) + alt_wall_insulation_match = re.search(r"Alternative Wall Insulation:\s*(.*?)(?=\n|$)", part_details) + alt_wall_drylining_match = re.search(r"Alternative Wall Dry-lining:\s*(.*?)(?=\n|$)", part_details) + alt_wall_thickness_match = re.search(r"Alternative Wall Thickness:\s*(\d+)(?=\n|$)", part_details) + + # Store results for this building part + wall_data.append({ + "Building Part": cleaned_part_name, + "Wall Type": wall_type_match.group(1).strip() if wall_type_match else None, + "Wall Insulation": wall_insulation_match.group(1).strip() if wall_insulation_match else None, + "Wall Dry-lining": wall_drylining_match.group(1).strip() if wall_drylining_match else None, + "Wall Thickness": int(wall_thickness_match.group(1)) if wall_thickness_match else None, + "Alternative Wall Type": alt_wall_type_match.group(1).strip() if alt_wall_type_match else None, + "Alternative Wall Insulation": alt_wall_insulation_match.group( + 1).strip() if alt_wall_insulation_match else None, + "Alternative Wall Dry-lining": alt_wall_drylining_match.group( + 1).strip() if alt_wall_drylining_match else None, + "Alternative Wall Thickness": int(alt_wall_thickness_match.group(1)) if alt_wall_thickness_match else None, + }) + + return wall_data + + def extract_epr(pdf_path): """ Extracts specific data from an Energy Report (EPR) PDF file. """ + data = { "Address": None, "Postcode": None, @@ -539,6 +675,14 @@ def extract_epr(pdf_path): "Main Roof Type": None, "Main Roof Insulation": None, "Main Roof Insulation Thickness": None, + "Main Wall Type": None, + "Main Wall Insulation": None, + "Main Wall Dry-lining": None, + "Main Wall Thickness": None, + "Main Building Alternative Wall Type": None, + "Main Building Alternative Wall Insulation": None, + "Main Building Alternative Wall Dry-lining": None, + "Main Building Alternative Wall Thickness": None, } with open(pdf_path, "rb") as file: @@ -664,6 +808,17 @@ def extract_epr(pdf_path): data["Main Roof Insulation"] = main_roof_details[0]["Roof Insulation"] data["Main Roof Insulation Thickness"] = main_roof_details[0]["Roof Insulation Thickness"] + wall_details = extract_wall_details_epr(text) + main_wall_details = [w for w in wall_details if "Main" in w["Building Part"]][0] + data["Main Wall Type"] = main_wall_details["Wall Type"] + data["Main Wall Insulation"] = main_wall_details["Wall Insulation"] + data["Main Wall Dry-lining"] = main_wall_details["Wall Dry-lining"] + data["Main Wall Thickness"] = main_wall_details["Wall Thickness"] + data["Main Building Alternative Wall Type"] = main_wall_details["Alternative Wall Type"] + data["Main Building Alternative Wall Insulation"] = main_wall_details["Alternative Wall Insulation"] + data["Main Building Alternative Wall Dry-lining"] = main_wall_details["Alternative Wall Dry-lining"] + data["Main Building Alternative Wall Thickness"] = main_wall_details["Alternative Wall Thickness"] + return data @@ -1425,6 +1580,7 @@ def append_stonewater_id(): ) model_proposed_sample = model_proposed_sample[~pd.isnull(model_proposed_sample["Address ID"])] model_proposed_sample["Address ID"] = model_proposed_sample["Address ID"].astype(int) + z = model_proposed_sample["Archetype ID"].drop_duplicates().sort_values() original_archetypes = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24 " diff --git a/recommendations/SecondaryHeating.py b/recommendations/SecondaryHeating.py index 931dbff0..a9d5de04 100644 --- a/recommendations/SecondaryHeating.py +++ b/recommendations/SecondaryHeating.py @@ -10,10 +10,10 @@ class SecondaryHeating: """ # The list of existing heating systems that are accepted - ACCEPTED_MAINHEAT_DESCRIPTIONS = ["Boiler and radiators, mains gas"] - ACCEPTED_SECONDHEAT_DESCRIPTIONS = ["Room heaters, electric"] + ACCEPTED_MAINHEAT_DESCRIPTIONS = ["Boiler and radiators, mains gas", "Electric storage heaters"] + ACCEPTED_SECONDHEAT_DESCRIPTIONS = ["Room heaters, electric", 'Portable electric heaters (assumed)'] # These are the heaters where works are required to remove them - FIXED_HEATER_DESCRIPTIONS = ["Room heaters, electric", 'Portable electric heaters (assumed)'] + FIXED_HEATER_DESCRIPTIONS = ["Room heaters, electric"] def __init__(self, property_instance: Property): self.property = property_instance