diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py index c623e9f7..1748f624 100644 --- a/etl/customers/stonewater/Wave 3 Preparation.py +++ b/etl/customers/stonewater/Wave 3 Preparation.py @@ -1,5 +1,6 @@ import os -from pyexpat import features +from urllib import parse +from fuzzywuzzy import fuzz import PyPDF2 import re @@ -2936,6 +2937,14 @@ def identify_incorrect_packages(): ) +def extract_sharepoint_url(x): + if pd.isnull(x): + return "" + return "/".join(parse.urlparse( + x.split(" - http")[1] + ).path.replace("%20", " ").split("/")[-2:]) + + def revised_model(): """ This function implements the revised model for Stonewater, where we are looking at new priority postcodes @@ -2956,6 +2965,7 @@ def revised_model(): original_archetypes = original_archetypes[~pd.isnull(original_archetypes["Address ID"])] original_archetypes = original_archetypes[original_archetypes["Address ID"] != "Address ID"] original_archetypes["Address ID"] = original_archetypes["Address ID"].astype(int) + original_archetypes["UPRN"] = original_archetypes["UPRN"].astype("Int64").astype(str) # Check if we have all of the addresses missed = original_archetypes[ @@ -2965,7 +2975,7 @@ def revised_model(): assert set(missed) == {'NOT PRIORITY POSTCODE', 'IN WAVE 2.1', 'EPC C OR ABOVE'} original_archetypes = original_archetypes[ - ["Address ID", "Archetype ID", "Archetype Group Rank"] + ["Address ID", "Archetype ID", "Archetype Group Rank", "UPRN"] ] # Merge these archetypes on to the new priority postcodes @@ -3104,6 +3114,42 @@ def revised_model(): # Replace \n with "" retrofit_assessment_data["Postcode"] = retrofit_assessment_data["Postcode"].str.replace("\n", "") + retrofit_assessments_data_columns = [ + 'Current SAP Rating', 'Current EPC Band', 'Primary Energy Use (kWh/yr)', + 'Primary Energy Use Intensity (kWh/m2/yr)', 'Number of Storeys', + 'Fuel Bill', 'Window Age Description', + 'Window Age Description Proportion (%)', + 'Secondary Window Age Description', + 'Secondary Window Age Description Proportion (%)', 'Number of Windows', + 'Total Number of Doors', 'Number of Insulated Doors', + 'Existing Primary Heating System', + 'Existing Primary Heating PCDF Reference', + 'Existing Primary Heating Controls', + 'Existing Primary Heating % of Heat', + 'Existing Secondary Heating System', + 'Existing Secondary Heating PCDF Reference', + 'Existing Secondary Heating Controls', + 'Existing Secondary Heating % of Heat', 'Secondary Heating Code', + 'Water Heating Code', 'Total Floor Area (m2)', + 'Total Ground Floor Area (m2)', 'RIR Floor Area', + 'Main Building Wall Area (m2)', 'First Extension Wall Area (m2)', + 'Number of Light Fittings', 'Number of LEL Fittings', + 'Number of fittings needing LEL', 'Main Roof Type', + 'Main Roof Insulation', 'Main Roof Insulation Thickness', + 'Main Wall Type', 'Main Wall Insulation', 'Main Wall Dry-lining', + 'Main Wall Thickness', 'Main Building Alternative Wall Type', + 'Main Building Alternative Wall Insulation', + 'Main Building Alternative Wall Dry-lining', + 'Main Building Alternative Wall Thickness', 'Main Fuel' + ] + # For the columns in retrofit_assessments_data_columns, prefix all of them with Survey: + retrofit_assessments_data_columns_prefixed = ["Survey: " + x for x in retrofit_assessments_data_columns] + rename_dict = dict(zip(retrofit_assessments_data_columns, retrofit_assessments_data_columns_prefixed)) + retrofit_assessment_data = retrofit_assessment_data.rename(columns=rename_dict) + retrofit_assessment_data["Survey: Current EPC Band"] = ( + retrofit_assessment_data["Survey: Current SAP Rating"].apply(lambda x: sap_to_epc(x)) + ) + # We can read in the data as needed # Next Step: Read in the coordinated measures and match to the extracted data @@ -3134,14 +3180,6 @@ def revised_model(): ccs_coordination_sheet = ccs_coordination_sheet.head(87) ccs_coordination = pd.concat([ccs_coordination_removed_from_programme, ccs_coordination_sheet]) - from urllib import parse - def extract_sharepoint_url(x): - if pd.isnull(x): - return "" - return "/".join(parse.urlparse( - x.split(" - http")[1] - ).path.replace("%20", " ").split("/")[-2:]) - ccs_coordination["folder_path"] = ccs_coordination["Sharepoint Link"].apply(lambda x: extract_sharepoint_url(x)) ############################################################ @@ -3224,8 +3262,6 @@ def revised_model(): lambda x: extract_sharepoint_url(x) ) - # Combine the data back - ############################################################ # NEW 450 COORDINATED RETROFIT ASSESSMENTS ############################################################# @@ -3352,7 +3388,6 @@ def revised_model(): ) ccs_coordination = ccs_coordination[~pd.isnull(ccs_coordination["Postcode"])] ccs_coordination = ccs_coordination[ccs_coordination["Retrofit Assessment"] != "Outstanding"] - from fuzzywuzzy import fuzz ccs_manual_filters = { "35 Kittiwake Close": "Wave 2.1 Surveys/11. CCS Dorset/Kittiwake Close 35" @@ -3596,6 +3631,17 @@ def revised_model(): matching_lookup, how="left", on="Name" ) + # We now map the retrofit assessment data to the coordinated packages + wates_coordination = wates_coordination.merge( + retrofit_assessment_data.drop(columns=["Postcode"]), how="left", on="survey_folder" + ) + ccs_coordination = ccs_coordination.merge( + retrofit_assessment_data.drop(columns=["Postcode"]), how="left", on="survey_folder" + ) + retrofit_packages_board = retrofit_packages_board.merge( + retrofit_assessment_data.drop(columns=["Postcode"]), how="left", on="survey_folder" + ) + # We have 4 properties in the Wates coordination board, that we want to remove from the retrofit packages board to_remove = wates_coordination[ wates_coordination["Asset ID_x"].astype(int).isin(retrofit_packages_board["Organisation Reference"]) @@ -3617,8 +3663,8 @@ def revised_model(): 'Flat Roof', 'Room in Roof', 'Window Upgrade', 'Door Upgrade', 'Ventilation', 'Main Heating', 'Water Heating', 'Heating Controls', 'Solar PV', 'Other measures', 'Organisation Reference', - ] - ], + ] + retrofit_assessments_data_columns_prefixed + ], ccs_coordination[ [ # We don't have secondary wall insulation, Flat Roof, RIR, Heating Controls, @@ -3627,8 +3673,8 @@ def revised_model(): 'SAP Band Install Package', 'Package Approved (Client)', 'Wall Insulation', 'Loft Insulation', 'Windows Upgrade', 'Ext. Doors Upgrade', 'Ventilation', 'Heating', 'Other Measures', "Asset ID.1_y", - ] - ].rename( + ] + retrofit_assessments_data_columns_prefixed + ].rename( columns={ "SAP Band Pre": "Actual SAP Band", "SAP Rating Pre": "Actual SAP Rating", @@ -3651,8 +3697,8 @@ def revised_model(): 'Wall Insulation', 'Loft Insulation', 'Windows Upgrade', 'Ext. Doors Upgrade', 'Ventilation', 'Heating', 'Other Measures', 'Asset ID_x' - ] - ].rename( + ] + retrofit_assessments_data_columns_prefixed + ].rename( columns={ "SAP Band Pre": "Actual SAP Band", "SAP Rating Pre": "Actual SAP Rating", @@ -3681,24 +3727,8 @@ def revised_model(): on="Organisation Reference" ) - # We match the properties to their closest match - # We clean up the SAP ratings in the coordinated packages - def sap_to_number(x): - try: - return int(x) - except: - if x[-1] in ["A", "B", "C", "D", "E", "F"]: - return int(x[:-1]) - - if x[0] in ["A", "B", "C", "D", "E", "F"]: - return int(x[1:]) - - coordinated_packages = coordinated_packages[~pd.isnull(coordinated_packages["Actual SAP Band"])] - coordinated_packages = coordinated_packages[~pd.isnull(coordinated_packages["Actual SAP Rating"])] - - coordinated_packages["Actual SAP Rating"] = coordinated_packages["Actual SAP Rating"].apply( - lambda x: sap_to_number(x) - ) + coordinated_packages = coordinated_packages[~pd.isnull(coordinated_packages["Survey: Current EPC Band"])] + coordinated_packages = coordinated_packages[~pd.isnull(coordinated_packages["Survey: Current SAP Rating"])] # We need the features pertaining to these priority postcodes @@ -3721,6 +3751,11 @@ def revised_model(): if not match.empty: return match + # Finally, we search for a property in the same Archetype + match = coordinated_packages[coordinated_packages["Archetype ID"] == home["Archetype ID"]] + if not match.empty: + return match + return None # No match found coordinated_packages["Postal Region"] = coordinated_packages["Postcode"].str.split(" ").str[0].str.strip() @@ -3732,6 +3767,12 @@ def revised_model(): coordinated_packages["Primary Property Type"] = coordinated_packages["Property Type"].str.split(":").str[0] new_priority_postcodes["Primary Property Type"] = new_priority_postcodes["Property Type"].str.split(":").str[0] + coordinated_packages = coordinated_packages.merge( + new_priority_postcodes[["Organisation Reference", "Archetype ID"]], + how="left", + on="Organisation Reference" + ) + # For every property in the priority postcodes data, we look for a most appropriate matching property no_match = [] matches = [] @@ -3759,16 +3800,17 @@ def revised_model(): no_match_summary = no_match_summary.sort_values("Organisation Reference", ascending=False) # len(no_match) - # 8764, 5607, 5646 + # 8764, 5607, 5646, 5071 # no_match_summary.shape - # (3953, 6), (2948, 6), (2969, 7) + # (3953, 6), (2948, 6), (2969, 7), (2575, 7) matches_df = pd.DataFrame(matches) matches_df = matches_df.merge( - coordinated_packages[["Organisation Reference", "Actual SAP Band", "Actual SAP Rating"]], + coordinated_packages[["Organisation Reference", "Survey: Current EPC Band", "Survey: Current SAP Rating"]], left_on="Best Match Organisation Reference", right_on="Organisation Reference", suffixes=("", " - Closest Match") ) + # We want to aggregate the matches, when we have multiple aggregated_matches_df = [] for org_ref, mapped_matches in matches_df.groupby("Organisation Reference"): @@ -3778,19 +3820,21 @@ def revised_model(): "Organisation Reference": org_ref, "Number of matches": 1, "Proportion": 100, - "Estimated SAP Rating": mapped_matches["Actual SAP Rating"].values[0], - "Estimated EPC Rating": sap_to_epc(mapped_matches["Actual SAP Rating"].values[0]) + "Estimated SAP Rating": mapped_matches["Survey: Current SAP Rating"].values[0], + "Estimated EPC Rating": mapped_matches["Survey: Current EPC Band"].values[0] } ) continue # We need to aggregate the matches, since we have multiple - average_rating = mapped_matches["Actual SAP Rating"].mean() + average_rating = mapped_matches["Survey: Current SAP Rating"].mean() number_of_matches = mapped_matches.shape[0] average_epc_rating = sap_to_epc(average_rating) # proportion is the number of properties that have this EPC rating proportion_with_this_epc = int( - mapped_matches[mapped_matches["Actual SAP Band"] == average_epc_rating].shape[0] / number_of_matches * 100) + mapped_matches[mapped_matches["Survey: Current EPC Band"] == average_epc_rating].shape[ + 0] / number_of_matches * 100 + ) aggregated_matches_df.append( { "Organisation Reference": org_ref, @@ -3804,12 +3848,220 @@ def revised_model(): aggregated_matches_df = pd.DataFrame(aggregated_matches_df) mapped_priority_list = new_priority_postcodes.merge( - matches_df, on="Organisation Reference", + aggregated_matches_df, on="Organisation Reference", how="left" ) - # We merge on the EPC ratings for the matched properties - mapped_priority_list = mapped_priority_list.merge( + mapped_priority_list["address1"] = mapped_priority_list["Address"].str.split(",").str[0] + + # If we have a leading number like 01, 02, 03, 04, 05, 06, 07, 08, 09, we remove the leading 0 + + def remove_leading_zero(address): + return re.sub(r"^0([1-9]) ", r"\1 ", address) + + # Example usage + mapped_priority_list["address1"] = mapped_priority_list["address1"].apply(remove_leading_zero) + mapped_priority_list["address1"] = np.where( + mapped_priority_list["Organisation Reference"] == 37004, + "8 Mason Road", + mapped_priority_list["address1"] ) + mapped_priority_list["address1"] = np.where( + mapped_priority_list["Organisation Reference"] == 37003, + "9 Mason Road", + mapped_priority_list["address1"] + ) + + mapped_priority_list = mapped_priority_list.rename( + columns={"UPRN": "uprn"} + ) + mapped_priority_list["row_id"] = mapped_priority_list["Organisation Reference"] + + # Let's get the newest EPC data for these properties + # We merge on UPRN, when we have it + # from etl.route_march_data_pull.app import get_data + # epc_data, errors, nodata = get_data( + # asset_list=mapped_priority_list, + # fulladdress_column="Address", + # address1_column="address1", + # postcode_column="Postcode", + # manual_uprn_map={}, + # epc_api_only=True + # ) + # + # epc_df = pd.DataFrame(epc_data) + # epc_df.to_csv( + # os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project", "full_epc_data.csv"), index=False + # ) + epc_df = pd.read_csv(os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project", "full_epc_data.csv")) + epc_df = epc_df.rename(columns={"row_id": "Organisation Reference"}) + + # We now package up the data + + # Sheet 1 is the base coordination data + output_coordination_sheet = coordinated_packages[ + [ + "Name", "Postcode", 'Organisation Reference', 'Package Ref', + 'Main Wall Insulation', 'Secondary Wall Insulation', 'Loft insulation', + 'Flat Roof', 'Room in Roof', 'Window Upgrade', 'Door Upgrade', + 'Ventilation', 'Main Heating', 'Water Heating', 'Heating Controls', + 'Solar PV', 'Other measures', 'Survey: Current SAP Rating', 'Survey: Current EPC Band', + 'Survey: Primary Energy Use (kWh/yr)', + 'Survey: Primary Energy Use Intensity (kWh/m2/yr)', + 'Survey: Number of Storeys', 'Survey: Fuel Bill', + 'Survey: Window Age Description', + 'Survey: Window Age Description Proportion (%)', + 'Survey: Secondary Window Age Description', + 'Survey: Secondary Window Age Description Proportion (%)', + 'Survey: Number of Windows', 'Survey: Total Number of Doors', + 'Survey: Number of Insulated Doors', + 'Survey: Existing Primary Heating System', + 'Survey: Existing Primary Heating PCDF Reference', + 'Survey: Existing Primary Heating Controls', + 'Survey: Existing Primary Heating % of Heat', + 'Survey: Existing Secondary Heating System', + 'Survey: Existing Secondary Heating PCDF Reference', + 'Survey: Existing Secondary Heating Controls', + 'Survey: Existing Secondary Heating % of Heat', + 'Survey: Secondary Heating Code', 'Survey: Water Heating Code', + 'Survey: Total Floor Area (m2)', 'Survey: Total Ground Floor Area (m2)', + 'Survey: RIR Floor Area', 'Survey: Main Building Wall Area (m2)', + 'Survey: First Extension Wall Area (m2)', + 'Survey: Number of Light Fittings', 'Survey: Number of LEL Fittings', + 'Survey: Number of fittings needing LEL', 'Survey: Main Roof Type', + 'Survey: Main Roof Insulation', + 'Survey: Main Roof Insulation Thickness', 'Survey: Main Wall Type', + 'Survey: Main Wall Insulation', 'Survey: Main Wall Dry-lining', + 'Survey: Main Wall Thickness', + 'Survey: Main Building Alternative Wall Type', + 'Survey: Main Building Alternative Wall Insulation', + 'Survey: Main Building Alternative Wall Dry-lining', + 'Survey: Main Building Alternative Wall Thickness', + 'Survey: Main Fuel', + 'Walls', 'Roofs', 'Heating', 'Main Fuel', 'Age', 'Property Type' + ] + ].rename( + columns={ + 'Walls': "Parity - Walls", + 'Roofs': "Parity - Roof", + 'Heating': "Parity - Heating", + 'Main Fuel': "Parity - Fuel", + 'Age': "Parity - Age Band", + 'Property Type': "Parity - Property Type" + } + ) + + # Sheet 2 is the lookup table which maps the properties to their closest match + # We need to bring in the parity attributes between the mapped properties so we can see side-by-side + mapped_lookup = matches_df[ + [ + 'Organisation Reference', + 'Best Match Organisation Reference', + 'Survey: Current EPC Band', + 'Survey: Current SAP Rating' + ] + ].rename( + columns={ + 'Best Match Organisation Reference': "Best Match - Organisation Reference", + "Survey: Current EPC Band": "Best Match - Survey: Current EPC Band", + 'Survey: Current SAP Rating': "Best Match - Survey: Current SAp Rating" + } + ).merge( + features[["Organisation Reference", "Walls", "Roofs", "Heating", "Main Fuel", "Age", "Property Type"]], + how="left", + on="Organisation Reference" + ).merge( + features[["Organisation Reference", "Walls", "Roofs", "Heating", "Main Fuel", "Age", "Property Type"]].rename( + columns={ + "Organisation Reference": "Best Match - Organisation Reference", + "Walls": "Best Match - Walls", + "Roofs": "Best Match - Roof", + "Heating": "Best Match - Heating", + "Main Fuel": "Best Match - Main Fuel", + "Age": "Best Match - Age", + "Property Type": "Best Match - Property Type" + } + ), + how="left", + on="Best Match - Organisation Reference" + ).merge( + coordinated_packages[ + [ + "Organisation Reference", 'Survey: Main Wall Type', 'Survey: Main Wall Insulation', + 'Survey: Main Roof Type', 'Survey: Main Roof Insulation', 'Survey: Main Roof Insulation Thickness', + 'Survey: Existing Primary Heating System', + ] + ].rename( + columns={ + "Organisation Reference": "Best Match - Organisation Reference", + 'Survey: Main Wall Type': 'Best Match - Survey: Main Wall Type', + 'Survey: Main Wall Insulation': 'Best Match - Survey: Main Wall Insulation', + 'Survey: Main Roof Type': 'Best Match - Survey: Main Roof Type', + 'Survey: Main Roof Insulation': 'Best Match - Survey: Main Roof Insulation', + 'Survey: Main Roof Insulation Thickness': 'Best Match - Survey: Main Roof Insulation Thickness', + 'Survey: Existing Primary Heating System': 'Best Match - Survey: Existing Primary Heating System', + } + ), + how="left", + on="Best Match - Organisation Reference" + ) + + # Finally, we have the property, against the mapped home with the estimate SAP scores and the EPC data + worksheet = mapped_priority_list[ + [ + 'Organisation Reference', 'Address', 'Postcode', 'Address ID', 'uprn', 'Archetype ID', + 'SAP', 'SAP Band', "Property Type", "Walls", "Roofs", 'Glazing', + 'Heating', 'Main Fuel', 'Hot Water', 'Estimated SAP Rating', 'Estimated EPC Rating' + ] + ].rename( + columns={ + "SAP": "Parity - SAP Rating", + "SAP Band": "Parity - EPC Rating", + "Property Type": "Parity - Property Type", + "Walls": "Parity - Walls", + "Roofs": "Parity - Roofs", + 'Glazing': "Parity - Glazing", + 'Heating': 'Parity - Heating', + 'Main Fuel': 'Parity - Main Fuel', + 'Hot Water': 'Parity - Hot Water', + } + ).merge( + epc_df[ + [ + "Organisation Reference", + "uprn", + "current-energy-efficiency", + "current-energy-rating", + "lodgement-date", + "construction-age-band", + "walls-description", + "roof-description", + "mainheat-description", + "windows-description", + "hotwater-description", + "main-fuel", + "total-floor-area", + ] + ].rename( + columns={ + "uprn": "Last EPC - uprn", + "current-energy-efficiency": "Last EPC - SAP Score", + "current-energy-rating": "Last EPC - EPC Rating", + "lodgement-date": "Last EPC - Date Lodged", + "construction-age-band": "Last EPC - Age Band", + "walls-description": "Last EPC - Walls", + "roof-description": "Last EPC - Roof", + "mainheat-description": "Last EPC - Heating", + "windows-description": "Last EPC - Windows", + "hotwater-description": "Last EPC - Hot Water", + "main-fuel": "Last EPC - Main Fuel", + "total-floor-area": "Last EPC - Total Floor Area" + } + ), + how="left", + on='Organisation Reference' + ) + + worksheet["Years Since Last EPC"] # if __name__ == "__main__": # main() diff --git a/etl/route_march_data_pull/app.py b/etl/route_march_data_pull/app.py index 247ce98c..3432b744 100644 --- a/etl/route_march_data_pull/app.py +++ b/etl/route_march_data_pull/app.py @@ -20,7 +20,7 @@ load_dotenv(dotenv_path="backend/.env") EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") -def get_data(asset_list, fulladdress_column, address1_column, postcode_column, manual_uprn_map): +def get_data(asset_list, fulladdress_column, address1_column, postcode_column, manual_uprn_map, epc_api_only=True): epc_data = [] errors = [] no_epc = [] @@ -33,6 +33,11 @@ def get_data(asset_list, fulladdress_column, address1_column, postcode_column, m if house_no is None: house_no = house_number uprn = manual_uprn_map.get(full_address, None) + if uprn is None and home.get("uprn"): + uprn = home["uprn"] + + if pd.isnull(uprn): + uprn = None searcher = SearchEpc( address1=str(house_no), @@ -88,6 +93,15 @@ def get_data(asset_list, fulladdress_column, address1_column, postcode_column, m no_epc.append(home["row_id"]) continue + if epc_api_only: + epc = { + "row_id": home["row_id"], + **searcher.newest_epc.copy() + } + + epc_data.append(epc) + continue + # Look for EPC recommendatons try: property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"]) diff --git a/survey_report/app.py b/survey_report/app.py index 87ce7864..be31bd52 100644 --- a/survey_report/app.py +++ b/survey_report/app.py @@ -1,6 +1,9 @@ import os import PyPDF2 from string import Template + +import pandas as pd + from survey_report.extraction.detect_report_type import detect_report_type from survey_report.extraction.quidos import SiteNotesExtractor, EPRExtractor @@ -34,44 +37,54 @@ def handle(): :return: """ - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 2" + folders = [ + "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 1", + "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 2", + "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 3", + "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 4", + "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 5", + ] + data = [] + for data_folder in folders: - folder_contents = os.listdir(data_folder) - # We look for the following files: - # Site notes - file_mapping = {} - for file in folder_contents: - # Check if it's a pdf file - if not file.endswith(".pdf"): - continue - filepath = os.path.join(data_folder, file) - with (open(filepath, "rb") as f): - pdf = PyPDF2.PdfReader(f) - first_page = pdf.pages[0].extract_text() - text = "" - for page in pdf.pages: - text += page.extract_text() + folder_contents = os.listdir(data_folder) + # We look for the following files: + # Site notes + file_mapping = {} + for file in folder_contents: + # Check if it's a pdf file + if not file.endswith(".pdf"): + continue + filepath = os.path.join(data_folder, file) + with (open(filepath, "rb") as f): + pdf = PyPDF2.PdfReader(f) + first_page = pdf.pages[0].extract_text() + text = "" + for page in pdf.pages: + text += page.extract_text() - # Check the report type - report_type = detect_report_type(first_page) - if report_type is not None: - file_mapping[report_type] = text + # Check the report type + report_type = detect_report_type(first_page) + if report_type is not None: + file_mapping[report_type] = text - # This is only set up to work with quido site notes so we must have it - site_notes_extractor = SiteNotesExtractor(file_mapping["quidos_site_notes"]) - site_notes = site_notes_extractor.extract_all() + # This is only set up to work with quido site notes so we must have it + site_notes_extractor = SiteNotesExtractor(file_mapping["quidos_site_notes"]) + site_notes = site_notes_extractor.extract_all() - # We also must have an EPR - epr_extractor = EPRExtractor(file_mapping["quidos_epr"]) - epr = epr_extractor.extract_all() + # We also must have an EPR + epr_extractor = EPRExtractor(file_mapping["quidos_epr"]) + epr = epr_extractor.extract_all() - # We now produce the combined data sheet which is the starting figure: - data_sheet = {**epr, **site_notes} - del data_sheet['Building Dimensions'] - # We unnest the Total Building Dimensions - data_sheet["Total Building Floor Area (m2)"] = data_sheet["Total Building Dimensions"]["floor_area"] - data_sheet["Total Building Heat Loss Area (m2)"] = data_sheet["Total Building Dimensions"]["heat_loss_area"] - del data_sheet["Total Building Dimensions"] + # We now produce the combined data sheet which is the starting figure: + data_sheet = {**epr, **site_notes} + del data_sheet['Building Dimensions'] + # We unnest the Total Building Dimensions + data_sheet["Total Building Floor Area (m2)"] = data_sheet["Total Building Dimensions"]["floor_area"] + data_sheet["Total Building Heat Loss Area (m2)"] = data_sheet["Total Building Dimensions"]["heat_loss_area"] + del data_sheet["Total Building Dimensions"] + data.append(data_sheet) + data = pd.DataFrame(data) # Generate the HTML report # Placeholder locations