import os import json import pandas as pd from asset_list.AssetList import AssetList from asset_list.mappings.property_type import PROPERTY_MAPPING from asset_list.mappings.built_form import BUILT_FORM_MAPPINGS from asset_list.mappings.walls import WALL_CONSTRUCTION_MAPPINGS from asset_list.mappings.heating_systems import HEATING_MAPPINGS from asset_list.mappings.exising_pv import EXISTING_PV_MAPPINGS from asset_list.mappings.roof import ROOF_CONSTRUCTION_MAPPINGS from asset_list.utils import get_data from dotenv import load_dotenv from backend.SearchEpc import SearchEpc load_dotenv(dotenv_path="backend/.env") EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") def extract_address1(asset_list, full_address_col, postcode_col, method="first_two_words"): if method == "first_two_words": asset_list["address1_extracted"] = asset_list[full_address_col].str.split(" ").str[:2].str.join(" ") return asset_list if method == "first_word": asset_list["address1_extracted"] = asset_list[full_address_col].str.split(" ").str[0] return asset_list if method == "house_number_extraction": asset_list["address1_extracted"] = asset_list.apply( lambda x: SearchEpc.get_house_number(address=x[full_address_col], postcode=x[postcode_col]), axis=1 ) return asset_list raise ValueError(f"Method {method} not recognized") def app(): """ This app is EPC pulling data for some properties owned by Livewest Data request contents: Date of last EPC Reason for EPC SAP score on register Property Type Property Area Property Age Any Dimensions (HLP,PW,RH) Property Wall Construction Heating Type Secondary Heating Loft Insulation Depth Additional if possible: Heat loss calculations EPC recommendations Property UPRN """ # Stonewater Solar data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/October 2025 Solar" data_filename = "Copy of AP Stonewater Ammended address list - PV AM Amended - Khalim initial review.xlsx" sheet_name = "Proposed Sheet" postcode_column = 'Postcode' address1_column = None address1_method = "house_number_extraction" fulladdress_column = "Address" address_cols_to_concat = [] missing_postcodes_method = None landlord_year_built = None landlord_os_uprn = None landlord_property_type = "Property Type" landlord_built_form = "Property Type" landlord_wall_construction = "Walls" landlord_roof_construction = "Roofs" landlord_heating_system = "Heating" landlord_existing_pv = None landlord_property_id = "Asset Id" landlord_sap = "SAP" outcomes_filename = None outcomes_sheetname = None outcomes_postcode = None outcomes_houseno = None outcomes_id = None outcomes_address = None master_filepaths = [] master_id_colnames = [] master_to_asset_list_filepath = None phase = False ecosurv_landlords = None asset_list_header = 0 landlord_block_reference = None # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Cambridge/" data_filename = "22.10_Cambridge_west addresses.xlsx" sheet_name = "Asset List" postcode_column = 'Postcode' address1_column = None address1_method = "house_number_extraction" fulladdress_column = "Full Address" address_cols_to_concat = [] missing_postcodes_method = None landlord_year_built = None landlord_os_uprn = None landlord_property_type = None landlord_built_form = None landlord_wall_construction = None landlord_roof_construction = None landlord_heating_system = None landlord_existing_pv = None landlord_property_id = "id" landlord_sap = None outcomes_filename = None outcomes_sheetname = None outcomes_postcode = None outcomes_houseno = None outcomes_id = None outcomes_address = None master_filepaths = [] master_id_colnames = [] master_to_asset_list_filepath = None phase = False ecosurv_landlords = None asset_list_header = 0 landlord_block_reference = None # Property Box data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NRLA/Property Box" data_filename = "Property Box Finance Portfolio.xlsx" sheet_name = "Sheet1" postcode_column = 'Postcode' address1_column = None address1_method = "house_number_extraction" fulladdress_column = "Address 1" address_cols_to_concat = [] missing_postcodes_method = None landlord_year_built = None landlord_os_uprn = None landlord_property_type = None landlord_built_form = None landlord_wall_construction = None landlord_roof_construction = None landlord_heating_system = None landlord_existing_pv = None landlord_property_id = "row_id" landlord_sap = None outcomes_filename = None outcomes_sheetname = None outcomes_postcode = None outcomes_houseno = None outcomes_id = None outcomes_address = None master_filepaths = [] master_id_colnames = [] master_to_asset_list_filepath = None phase = False ecosurv_landlords = None asset_list_header = 0 landlord_block_reference = "block_id" # CDS - able-to-pay data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/CDS/Able to pay" data_filename = "CDS_ASSET LIST_(2314).xlsx" sheet_name = "Sheet1" postcode_column = 'Property Address - Postcode' address1_column = "Property Address - Line 1" address1_method = None fulladdress_column = "Property Address - Line 1" address_cols_to_concat = [] missing_postcodes_method = None landlord_year_built = None landlord_os_uprn = None landlord_property_type = None landlord_built_form = None landlord_wall_construction = None landlord_roof_construction = None landlord_heating_system = None landlord_existing_pv = None landlord_property_id = "row_id" landlord_sap = None outcomes_filename = None outcomes_sheetname = None outcomes_postcode = None outcomes_houseno = None outcomes_id = None outcomes_address = None master_filepaths = [] master_id_colnames = [] master_to_asset_list_filepath = None phase = False ecosurv_landlords = None asset_list_header = 0 landlord_block_reference = None # Hyde - solar data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hyde/Solar" data_filename = "Domna Property Analysis HYDE (Chichester Removed)V2-Completed.xlsx" sheet_name = "Electric Property Inspections" postcode_column = 'Postcode' address1_column = None # Is only patchily populated so we create it address1_method = 'house_number_extraction' fulladdress_column = "Address" address_cols_to_concat = [] missing_postcodes_method = None landlord_year_built = None landlord_os_uprn = None landlord_property_type = "Property Type" landlord_built_form = "Property Type" landlord_wall_construction = "Walls " landlord_roof_construction = "Roofs" landlord_heating_system = "Heating" landlord_existing_pv = None landlord_property_id = "Address ID" landlord_sap = "SAP" outcomes_filename = None outcomes_sheetname = None outcomes_postcode = None outcomes_houseno = None outcomes_id = None outcomes_address = None master_filepaths = [] master_id_colnames = [] master_to_asset_list_filepath = None phase = False ecosurv_landlords = None asset_list_header = 0 landlord_block_reference = None # Hyde cavity data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hyde/Cavity" data_filename = "Domna Property Analysis HYDE (Chichester Removed)V2-Completed.xlsx" sheet_name = "Cavity Inspections" postcode_column = 'Postcode' address1_column = None # Is only patchily populated so we create it address1_method = 'house_number_extraction' fulladdress_column = "Address" address_cols_to_concat = [] missing_postcodes_method = None landlord_year_built = None landlord_os_uprn = None landlord_property_type = "Property Type" landlord_built_form = "Property Type" landlord_wall_construction = "Walls " landlord_roof_construction = "Roofs" landlord_heating_system = "Heating" landlord_existing_pv = None landlord_property_id = "Address ID" landlord_sap = "SAP" outcomes_filename = None outcomes_sheetname = None outcomes_postcode = None outcomes_houseno = None outcomes_id = None outcomes_address = None master_filepaths = [] master_id_colnames = [] master_to_asset_list_filepath = None phase = False ecosurv_landlords = None asset_list_header = 0 landlord_block_reference = None # CDS - Sept 2025 data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/CDS/September 2025 Programme" data_filename = "Founder Estates CDS.xlsx" sheet_name = "Combined List" postcode_column = 'Postcode' address1_column = None # Is only patchily populated so we create it address1_method = 'house_number_extraction' fulladdress_column = "Address" address_cols_to_concat = [] missing_postcodes_method = None landlord_year_built = None landlord_os_uprn = None landlord_property_type = "Property Type" landlord_built_form = None landlord_wall_construction = None landlord_roof_construction = None landlord_heating_system = "Heating Type" landlord_existing_pv = None landlord_property_id = "(Do Not Modify) Property" landlord_sap = None outcomes_filename = None outcomes_sheetname = None outcomes_postcode = None outcomes_houseno = None outcomes_id = None outcomes_address = None master_filepaths = [] master_id_colnames = [] master_to_asset_list_filepath = None phase = False ecosurv_landlords = None asset_list_header = 0 landlord_block_reference = None # Project from Nick data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/sfr/October 2025 AL portfolio" data_filename = "22.10 AL Portfolio.xlsx" sheet_name = "22.10 AL Portfolio" postcode_column = 'Postcode' address1_column = None address1_method = 'house_number_extraction' fulladdress_column = "Address" address_cols_to_concat = [] missing_postcodes_method = None landlord_year_built = None landlord_os_uprn = None landlord_property_type = None landlord_built_form = None landlord_wall_construction = None landlord_roof_construction = None landlord_heating_system = None landlord_existing_pv = None landlord_property_id = "Row ID" landlord_sap = None outcomes_filename = None outcomes_sheetname = None outcomes_postcode = None outcomes_houseno = None outcomes_id = None outcomes_address = None master_filepaths = [] master_id_colnames = [] master_to_asset_list_filepath = None phase = False ecosurv_landlords = None asset_list_header = 0 landlord_block_reference = None # Lambeth data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lambeth" data_filename = "LAMBETH Asset List ( Incomplete).xlsx" sheet_name = "Green properties" postcode_column = 'SX3 Postcode' address1_column = "SX3 Short Address" address1_method = None fulladdress_column = None address_cols_to_concat = ["SX3 Short Address"] missing_postcodes_method = None landlord_year_built = None landlord_os_uprn = None landlord_property_type = "Property Type" landlord_built_form = None landlord_wall_construction = None landlord_roof_construction = None landlord_heating_system = None landlord_existing_pv = None landlord_property_id = "row_id" landlord_sap = None outcomes_filename = None outcomes_sheetname = None outcomes_postcode = None outcomes_houseno = None outcomes_id = None outcomes_address = None master_filepaths = [] master_id_colnames = [] master_to_asset_list_filepath = None phase = False ecosurv_landlords = None asset_list_header = 0 landlord_block_reference = None # # Colchester # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/Aug2025 202 inspections" # data_filename = "Colchester Borough Homes - Inspections - Additional 202 Addresses JW 280725 copy.xlsx" # sheet_name = "Extra 202 Colchester Addresses" # postcode_column = 'domna_postcode' # address1_column = "domna_address_1" # address1_method = None # fulladdress_column = "domna_full_address" # address_cols_to_concat = [] # missing_postcodes_method = None # landlord_year_built = None # landlord_os_uprn = None # landlord_property_type = "landlord_property_type" # landlord_built_form = "landlord_built_form" # landlord_wall_construction = None # landlord_roof_construction = None # landlord_heating_system = None # landlord_existing_pv = None # landlord_property_id = "landlord_property_id" # landlord_sap = None # outcomes_filename = None # outcomes_sheetname = None # outcomes_postcode = None # outcomes_houseno = None # outcomes_id = None # outcomes_address = None # master_filepaths = [] # master_id_colnames = [] # master_to_asset_list_filepath = None # phase = False # ecosurv_landlords = None # asset_list_header = 0 # landlord_block_reference = "landlord_block_reference" # # Abri # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Abri/Post Inspections" # data_filename = "Desktop ABRI data - Standardised After Programmes (2).xlsx" # sheet_name = "Reviewed List" # postcode_column = 'domna_postcode' # address1_column = "domna_address_1" # address1_method = None # fulladdress_column = "domna_full_address" # address_cols_to_concat = [] # missing_postcodes_method = None # landlord_year_built = "landlord_year_built" # landlord_os_uprn = None # landlord_property_type = "PropertyType_original_from_landlord" # landlord_built_form = "BuildForm_original_from_landlord" # landlord_wall_construction = "Wall Construction_original_from_landlord" # landlord_roof_construction = None # landlord_heating_system = "HeatingType_original_from_landlord" # landlord_existing_pv = None # landlord_property_id = "landlord_property_id" # landlord_sap = None # outcomes_filename = None # outcomes_sheetname = None # outcomes_postcode = None # outcomes_houseno = None # outcomes_id = None # outcomes_address = None # master_filepaths = [] # master_id_colnames = [] # master_to_asset_list_filepath = None # phase = False # ecosurv_landlords = None # asset_list_header = 0 # landlord_block_reference = None # Freebridge data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Freebridge/Aug2025 programme" data_filename = "Domna - FCH property data May 25 copy.xlsx" sheet_name = "EPC Data" postcode_column = 'Post Code' address1_column = "Address 1" address1_method = None fulladdress_column = None address_cols_to_concat = ["Address 1", "Address 4"] missing_postcodes_method = None landlord_year_built = "Build Date" landlord_os_uprn = None landlord_property_type = "Property Type" landlord_built_form = None landlord_wall_construction = "Walls Description" landlord_heating_system = "Heating Type" landlord_existing_pv = None landlord_property_id = "Place Ref" landlord_roof_construction = "Roof Description" landlord_sap = "Current SAP" outcomes_filename = [] outcomes_sheetname = [] outcomes_postcode = [] outcomes_houseno = [] outcomes_address = [] outcomes_id = [] master_filepaths = [] master_to_asset_list_filepath = None asset_list_header = 0 landlord_block_reference = None master_id_colnames = [] phase = False # Inspections not complete, produce a partial view ecosurv_landlords = None # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Broadlands" # data_filename = "Broadlands Asset List.xlsx" # sheet_name = "Assets" # postcode_column = 'POSTCODE' # fulladdress_column = None # address1_column = "Address1" # address1_method = None # address_cols_to_concat = ["Address1"] # missing_postcodes_method = None # landlord_year_built = "DATEBUILT" # landlord_os_uprn = None # landlord_property_type = "PropertyType" # landlord_built_form = "PropertyType" # landlord_wall_construction = None # landlord_heating_system = "Heating Fuel" # landlord_existing_pv = None # landlord_property_id = "Row ID" # outcomes_filename = [os.path.join(data_folder, "outcomes.xlsx")] # outcomes_sheetname = ["Sheet1"] # outcomes_postcode = ["Postcode"] # outcomes_houseno = ["No."] # outcomes_address = ["Address"] # outcomes_id = [None] # master_filepaths = [ # os.path.join(data_folder, "eco3 submissions.csv"), # os.path.join(data_folder, "eco4 submissions.csv"), # ] # master_to_asset_list_filepath = None # asset_list_header = 0 # landlord_block_reference = None # master_id_colnames = [None, None] # landlord_roof_construction = None # phase = False # landlord_sap = None # ecosurv_landlords = "broadland" # # # # # Community: # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Community Housing/New Programme" # data_filename = "SUB EPC C to DOMNA - 24.07.25.xlsx" # sheet_name = "Sheet1" # postcode_column = 'POSTCODE' # fulladdress_column = "ADDRESS" # address1_column = None # address1_method = "house_number_extraction" # address_cols_to_concat = [] # missing_postcodes_method = None # landlord_year_built = "BUILD DATE" # landlord_os_uprn = None # landlord_property_type = "PROPERTY TYPE" # landlord_built_form = "Archetype" # Using the inspections archetype # landlord_wall_construction = "CONSTRUCTION TYPE" # landlord_roof_construction = None # landlord_heating_system = None # landlord_existing_pv = None # landlord_property_id = "UPRN" # landlord_sap = None # outcomes_filename = [] # outcomes_sheetname = [] # outcomes_postcode = [] # outcomes_houseno = [] # outcomes_id = [] # outcomes_address = [] # master_filepaths = [] # master_to_asset_list_filepath = None # phase = False # ecosurv_landlords = None # asset_list_header = 1 # landlord_block_reference = None # master_id_colnames = [] # # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Programme Analysis" # data_filename = "EalingProjectRebuildJW210725.xlsx" # sheet_name = "Refine & Houses" # postcode_column = 'Postcode' # fulladdress_column = "Address" # address1_column = None # address1_method = "house_number_extraction" # address_cols_to_concat = [] # missing_postcodes_method = None # landlord_year_built = None # landlord_os_uprn = None # landlord_property_type = None # Using the inspections property type # landlord_built_form = None # landlord_wall_construction = None # landlord_roof_construction = None # landlord_heating_system = None # landlord_existing_pv = None # landlord_property_id = "Property ref" # landlord_sap = None # outcomes_filename = [] # outcomes_sheetname = [] # outcomes_postcode = [] # outcomes_houseno = [] # outcomes_id = [] # outcomes_address = [] # master_filepaths = [] # master_to_asset_list_filepath = None # phase = False # ecosurv_landlords = None # asset_list_header = 0 # landlord_block_reference = "Block Reference" # master_id_colnames = [] # # # TODO: Delete me # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NRLA/" # data_filename = "20250716 Asset List.xlsx" # sheet_name = "Sheet 1" # postcode_column = 'Postcode' # fulladdress_column = "Full Address" # address1_column = None # address1_method = "house_number_extraction" # address_cols_to_concat = [] # missing_postcodes_method = None # landlord_year_built = None # landlord_os_uprn = None # landlord_property_type = None # landlord_built_form = None # landlord_wall_construction = None # landlord_heating_system = None # landlord_existing_pv = None # landlord_property_id = "Row ID" # outcomes_filename = [] # outcomes_sheetname = [] # outcomes_postcode = [] # outcomes_houseno = [] # outcomes_address = [] # outcomes_id = [] # master_filepaths = [] # master_to_asset_list_filepath = None # asset_list_header = 0 # landlord_block_reference = None # master_id_colnames = [] # landlord_roof_construction = None # phase = False # landlord_sap = None # ecosurv_landlords = None # # # Southend # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southend/July 2025 Programme" # data_filename = "SOUTHEND - RYAN.xlsx" # sheet_name = "July 2025 Surveys" # postcode_column = 'Postcode' # fulladdress_column = "Full postal address" # address1_column = None # address1_method = "house_number_extraction" # address_cols_to_concat = [] # missing_postcodes_method = None # landlord_year_built = "Property age" # landlord_os_uprn = None # landlord_property_type = "Property type" # landlord_built_form = "Property type" # landlord_wall_construction = None # landlord_heating_system = None # landlord_existing_pv = None # landlord_property_id = "ID" # outcomes_filename = [] # outcomes_sheetname = [] # outcomes_postcode = [] # outcomes_houseno = [] # outcomes_address = [] # outcomes_id = [] # master_filepaths = [] # master_to_asset_list_filepath = None # asset_list_header = 0 # landlord_block_reference = None # master_id_colnames = [] # landlord_roof_construction = None # phase = False # landlord_sap = None # ecosurv_landlords = None # # # For Rooftop # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Rooftop" # data_filename = "Rooftop Asset List - July 2025.xlsx" # sheet_name = "Sheet1" # postcode_column = 'post_code' # fulladdress_column = None # address1_column = "add_1" # address1_method = None # address_cols_to_concat = [ # "add_1", "add_2", "add_3", "add_4" # ] # missing_postcodes_method = None # landlord_year_built = "date_built" # landlord_os_uprn = None # landlord_property_type = "ConstructionStyle" # landlord_built_form = "ConstructionStyle" # landlord_wall_construction = None # landlord_heating_system = "Description" # landlord_existing_pv = None # landlord_property_id = "PropertyCode" # outcomes_filename = [os.path.join(data_folder, "Rooftop_Outcomes.xlsx")] # outcomes_sheetname = ["OUTCOMES"] # outcomes_postcode = ["POSTCODE"] # outcomes_houseno = ["NO"] # outcomes_address = ["ADDRESS"] # outcomes_id = [None] # master_filepaths = [os.path.join(data_folder, "Master.csv")] # master_to_asset_list_filepath = None # asset_list_header = 1 # landlord_block_reference = "bl_rec_ref" # master_id_colnames = [None] # landlord_roof_construction = None # phase = False # landlord_sap = None # ecosurv_landlords = "rooftop" # # # For Housing # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/For Housing/New Programme July 2025" # data_filename = "FOR HOUSING Asset List (Combined).xlsx" # sheet_name = "Asset List" # postcode_column = 'Postcode' # fulladdress_column = "Address" # address1_column = None # address1_method = "house_number_extraction" # address_cols_to_concat = [] # missing_postcodes_method = None # landlord_year_built = None # landlord_os_uprn = None # landlord_property_type = "Type" # landlord_built_form = "Type" # landlord_wall_construction = None # landlord_heating_system = "Heating - full" # landlord_existing_pv = None # landlord_property_id = "UPRN" # outcomes_filename = [os.path.join(data_folder, "Khalim Combined - for analysis.xlsx")] # outcomes_sheetname = ["Sheet1"] # outcomes_postcode = ["POSTCODE"] # outcomes_houseno = ["NO"] # outcomes_address = ["ADDRESS"] # outcomes_id = [None] # master_filepaths = [os.path.join(data_folder, "submissions.csv")] # master_to_asset_list_filepath = None # asset_list_header = 0 # landlord_block_reference = None # master_id_colnames = [None] # landlord_roof_construction = None # phase = False # landlord_sap = "SAP" # ecosurv_landlords = "for housing" # # # CDS # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/CDS" # data_filename = "Founder Estates - Asset List.xlsx" # sheet_name = "Combined" # postcode_column = 'Postcode' # fulladdress_column = "Address" # address1_column = None # address1_method = "house_number_extraction" # address_cols_to_concat = [] # missing_postcodes_method = None # landlord_year_built = None # landlord_os_uprn = None # landlord_property_type = None # landlord_built_form = None # landlord_wall_construction = None # landlord_heating_system = "Heating Type" # landlord_existing_pv = None # landlord_property_id = "Row ID" # outcomes_filename = [] # outcomes_sheetname = [] # outcomes_postcode = [] # outcomes_houseno = [] # outcomes_address = [] # outcomes_id = [] # master_filepaths = [os.path.join(data_folder, "submissions.csv")] # master_to_asset_list_filepath = None # asset_list_header = 0 # landlord_block_reference = None # master_id_colnames = [None] # landlord_roof_construction = None # phase = False # landlord_sap = None # ecosurv_landlords = "cds" # # # Plus Dane # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane/New Programme July 2025/" # data_filename = "20250711 Plus Dane Asset List.xlsx" # sheet_name = "Sheet1" # postcode_column = 'Postcode' # fulladdress_column = "Address" # address1_column = None # address1_method = "house_number_extraction" # address_cols_to_concat = [] # missing_postcodes_method = None # landlord_year_built = "Property Age" # landlord_os_uprn = None # landlord_property_type = "Property Type" # landlord_built_form = "Built Form" # landlord_wall_construction = "Wall Construction" # landlord_heating_system = "Full Heating System" # landlord_existing_pv = None # landlord_property_id = "UPRN" # outcomes_filename = [ # os.path.join(data_folder, "Outcomes - Plus Dane_CWI_2024.xlsx"), # os.path.join(data_folder, "Outcomes - Plus Dane_CWI_2025.xlsx"), # os.path.join(data_folder, "Outcomes - Plus Dane_PV_2025.xlsx"), # ] # outcomes_sheetname = [ # "CWI & LI - 2024", "2025 - CWI", "PV - 2025", # ] # outcomes_postcode = ["Postcode", "Postcode", "Postcode"] # outcomes_houseno = ["No.", "No", "No"] # outcomes_address = ["Address", "Address", "Address"] # outcomes_id = ["Asset Reference", "LL UPRN", "LL UPRN"] # master_filepaths = [ # os.path.join(data_folder, "submissions/JJC-Table 1.csv"), # os.path.join(data_folder, "submissions/SCIS-Table 1.csv") # ] # master_to_asset_list_filepath = None # asset_list_header = 1 # landlord_block_reference = None # master_id_colnames = [None, None] # landlord_roof_construction = None # phase = False # landlord_sap = "SAP Rating" # ecosurv_landlords = "plus dane" # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Brentwood/July 2025 New Programme" # data_filename = "20250710 Asset List Brentwood.xlsx" # sheet_name = "Sheet1" # postcode_column = 'Postcode' # fulladdress_column = None # address1_column = "House Number" # address1_method = None # address_cols_to_concat = ["House Number", "Address Line 1", "Address Line 2", "Address Line 3"] # missing_postcodes_method = None # landlord_year_built = "Year Built" # landlord_os_uprn = None # landlord_property_type = "Dwelling" # landlord_built_form = None # landlord_wall_construction = None # landlord_heating_system = "Heating" # landlord_existing_pv = None # landlord_property_id = "UPRN" # outcomes_filename = [os.path.join(data_folder, "Brentwood - outcomes for analysis.xlsx")] # outcomes_sheetname = ["OUTCOMES"] # outcomes_postcode = ["POSTCODE"] # outcomes_houseno = [None] # outcomes_address = ["ADDRESS"] # outcomes_id = [None] # master_filepaths = [os.path.join(data_folder, "Submissions.csv")] # master_to_asset_list_filepath = None # asset_list_header = 1 # landlord_block_reference = None # master_id_colnames = [None] # landlord_roof_construction = None # phase = False # landlord_sap = None # ecosurv_landlords = "brentwood" # Brentwood # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Brentwood/July 2025 New Programme" # data_filename = "20250710 Asset List Brentwood.xlsx" # sheet_name = "Sheet1" # postcode_column = 'Postcode' # fulladdress_column = None # address1_column = "House Number" # address1_method = None # address_cols_to_concat = ["House Number", "Address Line 1", "Address Line 2", "Address Line 3"] # missing_postcodes_method = None # landlord_year_built = "Year Built" # landlord_os_uprn = None # landlord_property_type = "Dwelling" # landlord_built_form = None # landlord_wall_construction = None # landlord_heating_system = "Heating" # landlord_existing_pv = None # landlord_property_id = "UPRN" # outcomes_filename = [os.path.join(data_folder, "Brentwood - outcomes for analysis.xlsx")] # outcomes_sheetname = ["OUTCOMES"] # outcomes_postcode = ["POSTCODE"] # outcomes_houseno = [None] # outcomes_address = ["ADDRESS"] # outcomes_id = [None] # master_filepaths = [os.path.join(data_folder, "Submissions.csv")] # master_to_asset_list_filepath = None # asset_list_header = 1 # landlord_block_reference = None # master_id_colnames = [None] # landlord_roof_construction = None # phase = False # landlord_sap = None # ecosurv_landlords = "brentwood" # # # Eastlight # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Eastlight/New Programme" # data_filename = "INSPECTIONS MASTER Non Tech.xlsx" # sheet_name = "EASTLIGHT CW" # postcode_column = 'Postcode' # fulladdress_column = None # address1_column = "HouseName" # address1_method = None # address_cols_to_concat = ["HouseName", "Block", "Address1", "Address2", "Address3"] # missing_postcodes_method = None # landlord_year_built = "Built In Year" # landlord_os_uprn = None # landlord_property_type = "AssetType" # landlord_built_form = "Archetype" # Using inspections archetype # landlord_wall_construction = None # landlord_roof_construction = None # landlord_heating_system = "Main Heating Source" # landlord_existing_pv = None # landlord_property_id = "UPRN" # landlord_sap = "SAP Score" # outcomes_filename = [ # os.path.join(data_folder, "Eastlight_CWI_JJC_2025.xlsx"), # os.path.join(data_folder, "Eastlight_CWI_SCIS_2025.xlsx"), # ] # outcomes_sheetname = ["Outcomes", "Feedback"] # outcomes_postcode = ["Postcode", "Postcode"] # outcomes_houseno = ["No", "No."] # outcomes_id = [None, None] # outcomes_address = ["Address", "Address"] # master_filepaths = [ # os.path.join(data_folder, "ECO 3-Table 1.csv"), # os.path.join(data_folder, "ECO 4-Table 1.csv"), # ] # master_to_asset_list_filepath = None # phase = False # ecosurv_landlords = "eastlight" # asset_list_header = 0 # landlord_block_reference = None # master_id_colnames = [None, None] # landlord_sap = None # Pickering and Ferens # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Pickering & Ferens" # data_filename = "SAP 9 vs SAP 10 Sava Intelligent Energy - Property List (190625).xlsx" # sheet_name = "Sava Intelligent Energy - Prope" # postcode_column = 'Postcode' # fulladdress_column = 'Address' # address1_column = None # address1_method = "house_number_extraction" # address_cols_to_concat = [] # missing_postcodes_method = None # landlord_year_built = None # landlord_os_uprn = None # landlord_property_type = "Property Type" # Using the inspections property type # landlord_built_form = "Archetype 2" # landlord_wall_construction = None # landlord_roof_construction = None # landlord_heating_system = None # landlord_existing_pv = None # landlord_property_id = "UPRN" # landlord_sap = "SAP Rating (RdSAP 10)" # outcomes_filename = [] # outcomes_sheetname = [] # outcomes_postcode = [] # outcomes_houseno = [] # outcomes_id = [] # outcomes_address = [] # master_filepaths = [ # os.path.join(data_folder, "PICKERING & FERENS ROLLING MASTER SHEET HEDGEFUND - 26.7.24 - K.csv"), # os.path.join(data_folder, "PICKERING & FERENS NEW MASTER GBIS UPDATED 21.8.24 - M - For Analysis.csv"), # ] # master_to_asset_list_filepath = None # phase = False # ecosurv_landlords = "pickering" # asset_list_header = 0 # landlord_block_reference = None # master_id_colnames = [None, None] # Colchester # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester" # data_filename = "Warmfront data- Colchester Borough Homes (Complete).xlsx" # sheet_name = "Sheet1" # postcode_column = 'Full Address.1' # fulladdress_column = "Full Address" # address1_column = None # address1_method = "first_word" # address_cols_to_concat = [] # missing_postcodes_method = None # landlord_year_built = "Build Date" # landlord_os_uprn = None # landlord_property_type = "Property Type" # landlord_wall_construction = "Wallinsul" # landlord_heating_system = "HeatSorc" # landlord_existing_pv = None # landlord_property_id = "Property Reference" # outcomes_filename = [] # outcomes_sheetname = [] # outcomes_postcode = [] # outcomes_houseno = [] # outcomes_id = [] # outcomes_address = [] # master_filepaths = [] # master_to_asset_list_filepath = None # asset_list_header = 0 # landlord_built_form = None # landlord_roof_construction = None # landlord_sap = None # landlord_block_reference = None # phase = False # ecosurv_landlords = None # master_id_colnames = [] # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot" # data_filename = "EalingFlats.xlsx" # sheet_name = "Sheet1" # postcode_column = 'Postcode' # fulladdress_column = "Address" # address1_column = None # address1_method = "house_number_extraction" # address_cols_to_concat = [] # missing_postcodes_method = None # landlord_year_built = None # landlord_os_uprn = None # landlord_property_type = None # Using the inspections property type # landlord_built_form = None # landlord_wall_construction = None # landlord_roof_construction = None # landlord_heating_system = None # landlord_existing_pv = None # landlord_property_id = "Property ref" # landlord_sap = None # outcomes_filename = [] # outcomes_sheetname = [] # outcomes_postcode = [] # outcomes_houseno = [] # outcomes_id = [] # outcomes_address = [] # master_filepaths = [] # master_to_asset_list_filepath = None # phase = False # ecosurv_landlords = None # asset_list_header = 0 # landlord_block_reference = "Block Ref" # master_id_colnames = [] # Southern - Jan list # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/New Programme/Jan 2025 List" # data_filename = "SOUTHERN ASSETS January 2025 Additions Query 21.03.2025.xlsx" # sheet_name = "Jan 2025 additions" # postcode_column = 'Post Code' # fulladdress_column = None # address1_column = "NO." # address1_method = None # address_cols_to_concat = ["NO.", "Street / Block Name", "Town/Area"] # missing_postcodes_method = None # landlord_year_built = None # landlord_os_uprn = None # landlord_property_type = None # Using the inspections property type # landlord_built_form = None # landlord_wall_construction = None # landlord_roof_construction = None # landlord_heating_system = None # landlord_existing_pv = None # landlord_property_id = "SH Property Reference" # landlord_sap = None # outcomes_filename = [ # os.path.join(data_folder, "RT - Southern Housing Group - JJC.xlsx"), # os.path.join(data_folder, "RT - SOUTHERN OUTCOMES - SCIS Merged.xlsx"), # ] # outcomes_sheetname = ["Feedback", "Collated"] # outcomes_postcode = ["Poscode", "Postcode"] # outcomes_houseno = ["No.", "No"] # outcomes_id = ["UPRNs", None] # outcomes_address = ["Address", "Address"] # master_filepaths = [ # os.path.join(data_folder, "southern_submissions/CAVITY'S - DECEMBER 2018-Table 1.csv"), # os.path.join(data_folder, "southern_submissions/CAVITY'S 2019-Table 1.csv"), # os.path.join(data_folder, "southern_submissions/CAVITY'S ECO4-Table 1.csv"), # os.path.join(data_folder, "southern_submissions/LOFT'S-Table 1.csv"), # ] # master_to_asset_list_filepath = None # phase = False # ecosurv_landlords = "southern" # asset_list_header = 0 # landlord_block_reference = None # master_id_colnames = [None, None, None, None] # NCHA # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NCHA" # data_filename = "Energy Information MASTER June 2025.xlsx" # sheet_name = "Data" # postcode_column = 'Postcode' # fulladdress_column = "Address" # address1_column = None # address1_method = "house_number_extraction" # address_cols_to_concat = [] # missing_postcodes_method = None # landlord_year_built = "Build Date (HAR10)" # landlord_os_uprn = None # landlord_property_type = "Property Type (HAR10)" # landlord_built_form = "Build Form (EPC)" # landlord_wall_construction = "Wall Description" # landlord_roof_construction = None # landlord_heating_system = "HEAT Code" # landlord_existing_pv = None # landlord_property_id = "Place ref" # landlord_sap = "EPC SAP" # outcomes_filename = None # outcomes_sheetname = None # outcomes_postcode = None # outcomes_houseno = None # outcomes_id = None # outcomes_address = None # master_filepaths = [] # master_to_asset_list_filepath = None # phase = False # ecosurv_landlords = None # asset_list_header = 0 # landlord_block_reference = None # master_id_colnames = [] # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Calico" # data_filename = "07.04 CALICO - Final List.xlsx" # asset_list_header = 2 # sheet_name = "Final List" # postcode_column = 'Postcode' # fulladdress_column = None # address1_column = "Property Number / Name" # address1_method = None # address_cols_to_concat = [ # "Property Number / Name", # "Street", # "Town" # ] # missing_postcodes_method = None # landlord_year_built = "NROSH Estimated Build Date" # landlord_os_uprn = None # landlord_property_type = "Asset Type" # landlord_built_form = None # landlord_wall_construction = "Wall Type" # landlord_heating_system = "Boiler Type" # landlord_existing_pv = None # landlord_property_id = "Asset Reference" # outcomes_filename = [] # outcomes_sheetname = [] # outcomes_postcode = [] # outcomes_houseno = [] # outcomes_id = [] # outcomes_address = [] # master_filepaths = [] # master_id_colnames = [] # master_to_asset_list_filepath = None # landlord_roof_construction = None # landlord_block_reference = None # landlord_sap = "Current Efficiency Rating - Score" # phase = None # ecosurv_landlords = None # data_folder = ( # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/2018 Asset # List" # ) # data_filename = "LIVEWEST STOCK - 23rd October 2018.xlsx" # sheet_name = "Assets" # postcode_column = 'Postcode' # fulladdress_column = "Address" # address1_column = None # address1_method = "house_number_extraction" # address_cols_to_concat = [] # missing_postcodes_method = None # landlord_year_built = "Build Year" # landlord_os_uprn = None # landlord_property_type = "Property Archetype" # landlord_built_form = None # landlord_wall_construction = None # landlord_heating_system = "Heating Fuel Type" # landlord_existing_pv = None # landlord_property_id = "Uprn - DO NOT DELETE" # outcomes_filename = [ # os.path.join(data_folder, "RT - LiveWest.xlsx") # ] # outcomes_sheetname = ["Feedback"] # outcomes_postcode = ["Poscode"] # outcomes_houseno = ["No."] # outcomes_id = ["UPRN"] # outcomes_address = ["Address"] # master_filepaths = [ # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/Rolling # Master " # "- redacted for analysis/CAVITY-Table 1.csv" # ] # master_id_colnames = [None] # master_to_asset_list_filepath = None # landlord_roof_construction = None # landlord_block_reference = None # landlord_sap = None # phase = None # ecosurv_landlords = "livewest|live west" # data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March " # "2025/Livewest Asset List (Original) - csv") # data_filename = "Report-Table 1.csv" # sheet_name = None # postcode_column = 'Postcode' # fulladdress_column = "T1_Address" # address1_column = None # address1_method = "house_number_extraction" # address_cols_to_concat = [] # missing_postcodes_method = None # landlord_year_built = "Build Yr" # landlord_os_uprn = None # landlord_property_type = "T1_AssetType" # landlord_built_form = "T1_AssetType" # landlord_wall_construction = "Wall Type Cavity" # landlord_heating_system = "Heating Fuel" # landlord_existing_pv = None # landlord_property_id = "T1_UPRN" # outcomes_filename = [ # os.path.join(data_folder, "RT - LiveWest.xlsx") # ] # outcomes_address = ["Address"] # outcomes_sheetname = ["Feedback"] # outcomes_postcode = ["Poscode"] # outcomes_houseno = ["No."] # outcomes_id = ["UPRN"] # master_filepaths = [ # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/Rolling # Master " # "- redacted for analysis/CAVITY-Table 1.csv" # ] # master_id_colnames = [None] # master_to_asset_list_filepath = None # landlord_roof_construction = None # landlord_block_reference = None # landlord_sap = None # phase = None # ecosurv_landlords = "livewest|live west" # Stori # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Storicymru" # data_filename = "Asset list - for analysis.xlsx" # sheet_name = "SAP and Costs Calculations" # postcode_column = 'Postcode' # fulladdress_column = "Address1" # address1_column = None # address1_method = "house_number_extraction" # address_cols_to_concat = [] # missing_postcodes_method = None # landlord_year_built = "Age" # landlord_os_uprn = None # landlord_property_type = "TYPE" # landlord_built_form = "AGE / DETACHMENT" # landlord_wall_construction = "WALL" # landlord_roof_construction = "LOFT INSULATION" # landlord_heating_system = "BOILER" # landlord_existing_pv = "SOLAR PV" # landlord_property_id = "UPRN" # landlord_sap = "Current SAP Rating" # landlord_block_reference = None # outcomes_filename = [] # outcomes_sheetname = [] # outcomes_postcode = [] # outcomes_houseno = [] # outcomes_id = [] # outcomes_address = [] # master_filepaths = [] # master_to_asset_list_filepath = None # master_id_colnames = [] # phase = False # ecosurv_landlords = None # Thrive - reconciliation # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation" # data_filename = "Thrive Asset List - Complete - Updated May 2025.xlsx" # sheet_name = "Sheet1" # postcode_column = 'postcode' # fulladdress_column = "full_address" # address1_column = "address_line_1" # address1_method = None # address_cols_to_concat = [] # missing_postcodes_method = None # landlord_year_built = "age_band_calculated" # landlord_os_uprn = None # landlord_property_type = "property_type" # landlord_built_form = "build_form" # landlord_wall_construction = None # landlord_roof_construction = "assumed_loft_insulation_thickness_updated" # landlord_heating_system = "heating_type_updated" # landlord_existing_pv = None # landlord_property_id = "thrive_property_id" # landlord_sap = "sap_rating_updated" # landlord_block_reference = "block_reference" # outcomes_filename = [ # os.path.join(data_folder, "Thrive - Outcomes - April 24-March25 - Corrected.xlsx") # ] # outcomes_sheetname = ["Sheet1"] # outcomes_postcode = ["postcode"] # outcomes_houseno = ["No."] # outcomes_id = ["thrive_property_id"] # outcomes_address = ["address"] # master_filepaths = [ # os.path.join(data_folder, "Thrive Submissions ECO3 - with IDS.csv"), # os.path.join(data_folder, "Thrive Submissions ECO4 - with IDS.csv"), # ] # master_to_asset_list_filepath = None # master_id_colnames = ["thrive_property_id", "thrive_property_id"] # phase = False # ecosurv_landlords = "thrive" # Southern Midlands # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/Midlands Properties - Apr 2025" # data_filename = "Southern Housing Midlands Property List - combined.xlsx" # sheet_name = "Sheet 1" # postcode_column = 'Post Code' # fulladdress_column = "Address" # address1_column = None # address1_method = "house_number_extraction" # address_cols_to_concat = [] # missing_postcodes_method = None # landlord_year_built = "Age_1" # landlord_os_uprn = None # landlord_property_type = "Prop_Type" # landlord_built_form = "Prop_Type" # landlord_wall_construction = "Walls_P" # landlord_heating_system = "Heating System" # landlord_existing_pv = None # landlord_property_id = "AssetID" # outcomes_filename = None # outcomes_sheetname = None # outcomes_postcode = None # outcomes_houseno = None # outcomes_id = None # outcomes_address = None # master_filepaths = [] # master_to_asset_list_filepath = None # Maps addresses to uprn in problematic cases manual_uprn_map = {} asset_list = AssetList( local_filepath=os.path.join(data_folder, data_filename), header=asset_list_header, sheet_name=sheet_name, address1_colname=address1_column, postcode_colname=postcode_column, landlord_property_id=landlord_property_id, full_address_colname=fulladdress_column, full_address_cols_to_concat=address_cols_to_concat, missing_postcodes_method=missing_postcodes_method, address1_extraction_method=address1_method, landlord_year_built=landlord_year_built, landlord_uprn=landlord_os_uprn, landlord_property_type=landlord_property_type, landlord_built_form=landlord_built_form, landlord_wall_construction=landlord_wall_construction, landlord_roof_construction=landlord_roof_construction, landlord_heating_system=landlord_heating_system, landlord_existing_pv=landlord_existing_pv, landlord_sap=landlord_sap, landlord_block_reference=landlord_block_reference, phase=phase ) asset_list.init_standardise() # We produce the new maps, which can be saved for future useage new_property_type_map = { k: v for k, v in ( asset_list.variable_mappings[asset_list.landlord_property_type] if asset_list.landlord_property_type else {} ).items() if k not in PROPERTY_MAPPING } new_built_form_map = { k: v for k, v in ( asset_list.variable_mappings[asset_list.landlord_built_form] if asset_list.landlord_built_form else {} ).items() if k not in BUILT_FORM_MAPPINGS } new_wall_map = { k: v for k, v in ( asset_list.variable_mappings[asset_list.landlord_wall_construction] if asset_list.landlord_wall_construction else {} ).items() if k not in WALL_CONSTRUCTION_MAPPINGS } new_heating_map = { k: v for k, v in ( asset_list.variable_mappings[asset_list.landlord_heating_system] if asset_list.landlord_heating_system else {} ).items() if k not in HEATING_MAPPINGS } new_existing_pv_map = { k: v for k, v in ( asset_list.variable_mappings[asset_list.landlord_existing_pv] if asset_list.landlord_existing_pv else {} ).items() if k not in EXISTING_PV_MAPPINGS } new_roof_construction_map = { k: v for k, v in ( asset_list.variable_mappings[asset_list.landlord_roof_construction] if asset_list.landlord_roof_construction else {} ).items() if k not in ROOF_CONSTRUCTION_MAPPINGS } asset_list.apply_standardiation() # We now flag properties that have been treated under existing programmes asset_list.flag_outcomes( outcomes_filepaths=outcomes_filename, outcomes_sheetname=outcomes_sheetname, outcomes_address=outcomes_address, outcomes_postcode=outcomes_postcode, outcomes_houseno=outcomes_houseno, outcomes_id=outcomes_id ) asset_list.flag_survey_master( master_filepaths=master_filepaths, master_to_asset_list_filepath=master_to_asset_list_filepath, master_id_colnames=master_id_colnames, ) asset_list.flag_ecosurv(ecosurv_landlords) ### We retrieve the EPC data # We chunk up this data into 5000 rows at a time # Create the chunks directory epc_api_only = False force_retrieve_data = False skip = None # Used to skip already completed chunks chunk_size = 2000 filename = "Chunk {i}.csv" download_folder = os.path.join(data_folder, "Chunks") if not os.path.exists(download_folder): os.makedirs(download_folder) chunk_indexes = list(range(0, len(asset_list.standardised_asset_list), chunk_size)) downloaded_files = {filename.format(i=i) for i in chunk_indexes} # We check if we have files associated to these files already and if we do, and we do not want to force the # fetching of the data, we skip folder_contents = os.listdir(download_folder) if all(x in folder_contents for x in downloaded_files): skip = max(chunk_indexes) if any(x in folder_contents for x in downloaded_files): skip = max([i for i in chunk_indexes if filename.format(i=i) in folder_contents]) for i in range(0, len(asset_list.standardised_asset_list), chunk_size): print(f"Processing chunk {i} to {i + chunk_size}") if skip is not None and not force_retrieve_data: if i <= skip: continue chunk = asset_list.standardised_asset_list[i:i + chunk_size] epc_data_chunk, errors_chunk, no_epc_chunk = get_data( df=chunk, row_id_name=asset_list.DOMNA_PROPERTY_ID, uprn_column=AssetList.STANDARD_UPRN, fulladdress_column=AssetList.STANDARD_FULL_ADDRESS, address1_column=AssetList.STANDARD_ADDRESS_1, postcode_column=AssetList.STANDARD_POSTCODE, property_type_column=AssetList.STANDARD_PROPERTY_TYPE, built_form_column=AssetList.STANDARD_BUILT_FORM, manual_uprn_map=manual_uprn_map, epc_api_only=epc_api_only, epc_auth_token=EPC_AUTH_TOKEN ) # We now retrieve any failed properties chunk_failed = chunk[chunk[asset_list.DOMNA_PROPERTY_ID].isin(errors_chunk)] epc_data_failed, _, _ = get_data( df=chunk_failed, row_id_name=asset_list.DOMNA_PROPERTY_ID, uprn_column=AssetList.STANDARD_UPRN, fulladdress_column=AssetList.STANDARD_FULL_ADDRESS, address1_column=AssetList.STANDARD_ADDRESS_1, postcode_column=AssetList.STANDARD_POSTCODE, property_type_column=AssetList.STANDARD_PROPERTY_TYPE, built_form_column=AssetList.STANDARD_BUILT_FORM, manual_uprn_map=manual_uprn_map, epc_api_only=epc_api_only, epc_auth_token=EPC_AUTH_TOKEN ) epc_data_chunk.extend(epc_data_failed) # Append the failed data to the main data # Store the chunk locally as a csv pd.DataFrame(epc_data_chunk).to_csv(os.path.join(data_folder, f"Chunks/Chunk {i}.csv"), index=False) # Store the errors and no-data locally with open(os.path.join(data_folder, f"Chunks/Chunk {i} errors.json"), "w") as f: json.dump(errors_chunk, f) with open(os.path.join(data_folder, f"Chunks/Chunk {i} nodata.csv"), "w") as f: json.dump(no_epc_chunk, f) # We read in and concatenate the created created chunks # List the contents epc_data = [] for file in downloaded_files: csv_data = pd.read_csv(os.path.join(download_folder, file)) # We need to convert the recommendations back to a list csv_data["recommendations"] = csv_data["recommendations"].apply(eval) # We don't have this if we didn't run the pulling from find my epc if "find_my_epc_data" in csv_data.columns: csv_data["find_my_epc_data"] = csv_data["find_my_epc_data"].apply(eval) epc_data.append(csv_data) epc_df = pd.concat(epc_data) if "estimated" not in epc_df.columns: epc_df["estimated"] = False epc_df["estimated"] = epc_df["estimated"].fillna(False) # We expand out the recommendations recommendations_df = epc_df[[asset_list.DOMNA_PROPERTY_ID, "recommendations"]] unique_recommendations = set() for _, row in recommendations_df.iterrows(): unique_recommendations.update([rec["improvement-summary-text"] for rec in row["recommendations"]]) columns = [asset_list.DOMNA_PROPERTY_ID] + list(unique_recommendations) transformed_data = [] for _, row in recommendations_df.iterrows(): # Initialize a dictionary for this row with False for all recommendations row_data = {col: False for col in columns} row_data[asset_list.DOMNA_PROPERTY_ID] = row[asset_list.DOMNA_PROPERTY_ID] # Set True for each recommendation present in this row for rec in row["recommendations"]: recommendation_text = rec["improvement-summary-text"] row_data[recommendation_text] = True # Append the row data to transformed_data transformed_data.append(row_data) transformed_df = pd.DataFrame(transformed_data) for col in [ "Floor insulation (solid floor)", "Floor insulation", "Floor insulation (suspended floor)" ]: if col not in transformed_df.columns: transformed_df[col] = False transformed_df = transformed_df[ [ asset_list.DOMNA_PROPERTY_ID, "Floor insulation (solid floor)", "Floor insulation", "Floor insulation (suspended floor)" ] ] transformed_df["epc_has_floor_recommendation"] = ( transformed_df["Floor insulation (solid floor)"] | transformed_df["Floor insulation"] | transformed_df["Floor insulation (suspended floor)"] ) # Get the find my epc data if "find_my_epc_data" not in epc_df.columns: epc_df["find_my_epc_data"] = None find_my_epc_data = [] for _, x in epc_df.iterrows(): if x["find_my_epc_data"]: find_my_epc_data.append( { asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID], **x["find_my_epc_data"] } ) else: find_my_epc_data.append( { asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID] } ) find_my_epc_data = pd.DataFrame(find_my_epc_data) find_my_epc_data = find_my_epc_data.merge( transformed_df[[asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"]], how="left", on=asset_list.DOMNA_PROPERTY_ID ) # We check if we get the solar pv column: if "Solar photovoltaics" not in find_my_epc_data.columns: find_my_epc_data["Solar photovoltaics"] = False # Retrieve just the data we need epc_df = epc_df[ [asset_list.DOMNA_PROPERTY_ID] + list(asset_list.EPC_API_DATA_NAMES.keys()) ].rename( columns=asset_list.EPC_API_DATA_NAMES ) # Look for columns not in the find my EPC data, which will have happened if we didn't # retrieve it in the first place missed_find_epc_cols = [c for c in list(asset_list.FIND_EPC_DATA_NAMES.keys()) if c not in find_my_epc_data.columns] if missed_find_epc_cols: for c in missed_find_epc_cols: find_my_epc_data[c] = None epc_df = epc_df.merge( find_my_epc_data[ [asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"] + list(asset_list.FIND_EPC_DATA_NAMES.keys()) ] .rename(columns=asset_list.FIND_EPC_DATA_NAMES), how="left", on=asset_list.DOMNA_PROPERTY_ID ) asset_list.merge_data(epc_df) asset_list.extract_attributes() asset_list.identify_worktypes() # We now flag the status of the property asset_list.label_property_status() asset_list.analyse_geographies() asset_list.get_work_figures() # Store as an excel filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx" # Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data # Determine inspections priority # solar_jobs = asset_list.standardised_asset_list[~pd.isnull(asset_list.standardised_asset_list["solar_reason"])][ # "domna_postcode"].unique() # asset_list.standardised_asset_list["in_solar_area"] = asset_list.standardised_asset_list["domna_postcode"].isin( # solar_jobs # ) # # Same for cav # cavity_jobs = asset_list.standardised_asset_list[ # ~pd.isnull(asset_list.standardised_asset_list["cavity_reason"]) # ]["domna_postcode"].unique() # asset_list.standardised_asset_list["in_cavity_area"] = asset_list.standardised_asset_list["domna_postcode"].isin( # cavity_jobs # ) # # We prioritise properties that are in solar areas and cavity areas # import numpy as np # asset_list.standardised_asset_list["inspection_priority"] = np.where( # asset_list.standardised_asset_list["in_solar_area"] | asset_list.standardised_asset_list["in_cavity_area"], # 1, 2 # ) with pd.ExcelWriter(filename) as writer: asset_list.standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False) if asset_list.block_analysis_df is not None: asset_list.block_analysis_df.to_excel(writer, sheet_name="Block Analysis", index=False) # If we have outcomes, we add a tab with the outcomes if not asset_list.outcomes_for_output.empty: asset_list.outcomes_for_output.to_excel(writer, sheet_name="Outcomes", index=False) if not asset_list.unmatched_submissions.empty: asset_list.unmatched_submissions.to_excel(writer, sheet_name="Unmatched Submissions", index=False) if not asset_list.outcomes_no_match.empty: asset_list.outcomes_no_match.to_excel(writer, sheet_name="Unmatched Outcomes", index=False) if not asset_list.ecosurv_no_match.empty: asset_list.ecosurv_no_match.to_excel(writer, sheet_name="Unmatched Ecosurv", index=False) if not asset_list.geographical_areas.empty: asset_list.geographical_areas.to_excel(writer, sheet_name="Geographical Areas", index=False)