import os import json import pandas as pd from pprint import pprint import msgpack from utils.s3 import read_from_s3 from asset_list.AssetList import AssetList from asset_list.mappings.property_type import PROPERTY_MAPPING from asset_list.mappings.built_form import BUILT_FORM_MAPPINGS from asset_list.mappings.walls import WALL_CONSTRUCTION_MAPPINGS from asset_list.mappings.heating_systems import HEATING_MAPPINGS from asset_list.mappings.exising_pv import EXISTING_PV_MAPPINGS from asset_list.mappings.roof import ROOF_CONSTRUCTION_MAPPINGS from asset_list.utils import get_data from dotenv import load_dotenv from backend.SearchEpc import SearchEpc load_dotenv(dotenv_path="backend/.env") EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") def extract_address1(asset_list, full_address_col, postcode_col, method="first_two_words"): if method == "first_two_words": asset_list["address1_extracted"] = asset_list[full_address_col].str.split(" ").str[:2].str.join(" ") return asset_list if method == "first_word": asset_list["address1_extracted"] = asset_list[full_address_col].str.split(" ").str[0] return asset_list if method == "house_number_extraction": asset_list["address1_extracted"] = asset_list.apply( lambda x: SearchEpc.get_house_number(address=x[full_address_col], postcode=x[postcode_col]), axis=1 ) return asset_list raise ValueError(f"Method {method} not recognized") def app(): """ This app is EPC pulling data for some properties owned by Livewest Data request contents: Date of last EPC Reason for EPC SAP score on register Property Type Property Area Property Age Any Dimensions (HLP,PW,RH) Property Wall Construction Heating Type Secondary Heating Loft Insulation Depth Additional if possible: Heat loss calculations EPC recommendations Property UPRN """ # TODO: # For cavity work: # - Flag any entries that have a different wall type between non-intrusive data against EPC # - Worth double checking entries that have a difference in wall construction # - Look at anything that is flagged as an empty cavity but the EPC data says it’s a filled cavity # - Look at the current EPC scores - Anything that is C75 or above, especially if it’s assumed no insulation # - By postcode, we can try and deduce if all of the addresses are a flats and then estimate if 50% of the flats # are less than C75 # - Flag anything pre SAP2012 # - Flag anything over 5 years old # - Look at year built vs age band # # For Solar: # - Discount any that have solar PV - based on non-intrusives and from the inspections team # - In the heating, discount anything that isn’t ashp, ghsp, hhrs, electric storage - possibly homes with # electric room heaters but it might need to be an EPC E # - Fabric - check the floor, wall and roof: # - Filled or empty cavity is good # - Insulated solid/timber/system built is good # - SCIS/CEG needs solid floors # - JJC don’t care # - Anything with a loft 200 or below # - Anything C75 and above won’t qualify # - Insulated loft = 200mm # - We want: fully insulated property (all wall types), EPC D or below (floors should be solid) # - Or the insulation required is loft/cavity (floors should be solid) # Bromford data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme " "Rebuild/Prepared data/") data_filename = "asset_list.xlsx" sheet_name = "Sheet1" postcode_column = 'PostCode' fulladdress_column = "FullAddress" address1_column = None address1_method = "house_number_extraction" address_cols_to_concat = [] missing_postcodes_method = None landlord_year_built = "ConYear" landlord_os_uprn = None landlord_property_type = "AssetTypeDesc" landlord_built_form = "PropTypeDesc" landlord_wall_construction = "Construction type" landlord_roof_construction = None landlord_heating_system = "Heating Type" landlord_existing_pv = None landlord_property_id = "Asset" landlord_sap = None outcomes_filename = "outcomes.xlsx" outcomes_sheetname = "Sheet1" outcomes_postcode = "Postcode" outcomes_houseno = "No" outcomes_id = None outcomes_address = "Address" master_filepaths = [ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Prepared data/ECO " "3 submissions.csv", "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Prepared data/ECO " "4 submissions.csv", ] master_to_asset_list_filepath = None phase = False ecosurv_landlords = "paul butler|bromford" # Torus data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Torus/Phase 1" data_filename = "Torus Property Asset List - Phase 1.xlsx" sheet_name = "TORUS" postcode_column = 'Postcode' fulladdress_column = None address1_column = "AddressLine1" address1_method = None address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"] missing_postcodes_method = None landlord_year_built = "Property Age" landlord_os_uprn = "NatUPRN" landlord_property_type = "Property Type" landlord_built_form = "Built Form" landlord_wall_construction = "Wall Construction" landlord_roof_construction = "Roof Construction" landlord_heating_system = "Space Heating Source" landlord_existing_pv = "Low Carbon Technology (Solar PV)" landlord_property_id = "UPRN" landlord_sap = "SAP Score" outcomes_filename = None outcomes_sheetname = None outcomes_postcode = None outcomes_houseno = None outcomes_id = None outcomes_address = None master_filepaths = [] master_to_asset_list_filepath = None phase = True # Ealing - houses data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing" data_filename = "Ealing_rechecked_cleaned_05042025.csv" sheet_name = None postcode_column = 'Postcode' fulladdress_column = "Address" address1_column = None address1_method = "house_number_extraction" address_cols_to_concat = [] missing_postcodes_method = None landlord_year_built = "Year Built" landlord_os_uprn = None landlord_property_type = "Property Type Code" landlord_built_form = None landlord_wall_construction = None landlord_heating_system = None landlord_existing_pv = None landlord_property_id = "Property ref" outcomes_filename = None outcomes_sheetname = None outcomes_postcode = None outcomes_houseno = None outcomes_id = None outcomes_address = None master_filepaths = [] master_to_asset_list_filepath = None # Southern Midlands data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/Midlands Properties - Apr 2025" data_filename = "Southern Housing Midlands Property List - combined.xlsx" sheet_name = "Sheet 1" postcode_column = 'Post Code' fulladdress_column = "Address" address1_column = None address1_method = "house_number_extraction" address_cols_to_concat = [] missing_postcodes_method = None landlord_year_built = "Age_1" landlord_os_uprn = None landlord_property_type = "Prop_Type" landlord_built_form = "Prop_Type" landlord_wall_construction = "Walls_P" landlord_heating_system = "Heating System" landlord_existing_pv = None landlord_property_id = "AssetID" outcomes_filename = None outcomes_sheetname = None outcomes_postcode = None outcomes_houseno = None outcomes_id = None outcomes_address = None master_filepaths = [] master_to_asset_list_filepath = None # Live West (2018 Asset list) data_folder = ( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/2018 Asset List" ) data_filename = "LIVEWEST STOCK - 23rd October 2018.xlsx" sheet_name = "Assets" postcode_column = 'Postcode' fulladdress_column = "Address" address1_column = None address1_method = "house_number_extraction" address_cols_to_concat = [] missing_postcodes_method = None landlord_year_built = "Build Year" landlord_os_uprn = None landlord_property_type = "Property Archetype" landlord_built_form = None landlord_wall_construction = None landlord_heating_system = "Heating Fuel Type" landlord_existing_pv = None landlord_property_id = "Uprn - DO NOT DELETE" outcomes_filename = "RT - LiveWest.xlsx" outcomes_sheetname = "Feedback" outcomes_postcode = "Poscode" outcomes_houseno = "No." outcomes_id = "UPRN" master_filepaths = [ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/Rolling Master " "- redacted for analysis/CAVITY-Table 1.csv" ] master_to_asset_list_filepath = None # Live West (South West asset list) data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March " "2025/Livewest Asset List (Original) - csv") data_filename = "Report-Table 1.csv" sheet_name = None postcode_column = 'Postcode' fulladdress_column = "T1_Address" address1_column = None address1_method = "house_number_extraction" address_cols_to_concat = [] missing_postcodes_method = None landlord_year_built = "Build Yr" landlord_os_uprn = None landlord_property_type = "T1_AssetType" landlord_built_form = "T1_AssetType" landlord_wall_construction = "Wall Type Cavity" landlord_heating_system = "Heating Fuel" landlord_existing_pv = None landlord_property_id = "T1_UPRN" outcomes_filename = "RT - LiveWest.xlsx" outcomes_sheetname = "Feedback" outcomes_postcode = "Poscode" outcomes_houseno = "No." outcomes_id = "UPRN" master_filepaths = [ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/Rolling Master " "- redacted for analysis/CAVITY-Table 1.csv" ] master_to_asset_list_filepath = None # PFP London data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/London" data_filename = "PFP AREAS SURROUNDING LONDON - JAY, RUTH & LANE.xlsx" sheet_name = "PFP SURROUNDING LONDON" postcode_column = 'Postcode' fulladdress_column = None address1_column = "AddressLine1" address1_method = None address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"] missing_postcodes_method = None landlord_year_built = None landlord_os_uprn = None landlord_property_type = "Archetype (PFP)" landlord_built_form = "Archetype (PFP)" landlord_wall_construction = None landlord_heating_system = None landlord_existing_pv = None landlord_property_id = "Uprn" outcomes_filename = None outcomes_sheetname = None outcomes_postcode = None outcomes_houseno = None outcomes_id = None master_filepaths = [] master_to_asset_list_filepath = None # PFP North-West data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/North-West" data_filename = "Places for People NORTH WEST - INSPECTIONS MASTER - UPDATE.xlsx" sheet_name = "CHECKED" postcode_column = 'Postcode' fulladdress_column = None address1_column = "AddressLine1" address1_method = None address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"] missing_postcodes_method = None landlord_year_built = None landlord_os_uprn = None landlord_property_type = "Archetype (PFP)" landlord_built_form = "Archetype (PFP)" landlord_wall_construction = None landlord_heating_system = None landlord_existing_pv = None landlord_property_id = "Uprn" outcomes_filename = None outcomes_sheetname = None outcomes_postcode = None outcomes_houseno = None outcomes_id = None master_filepaths = [] master_to_asset_list_filepath = None # PFP North-East data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/North-East" data_filename = "Places for People NORTH EAST - INSPECTIONS MASTER.xlsx" sheet_name = "CHECKED" postcode_column = 'Postcode' fulladdress_column = None address1_column = "AddressLine1" address1_method = None address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"] missing_postcodes_method = None landlord_year_built = None landlord_os_uprn = None landlord_property_type = "Archetype (PFP)" landlord_built_form = "Archetype (PFP)" landlord_wall_construction = None landlord_heating_system = None landlord_existing_pv = None landlord_property_id = "Uprn" outcomes_filename = None outcomes_sheetname = None outcomes_postcode = None outcomes_houseno = None outcomes_id = None master_filepaths = [] master_to_asset_list_filepath = None # PFP East data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/East" data_filename = "PFP EAST - Master - DN LN NG NR PE POSTCODES.xlsx" sheet_name = "PFP EAST" postcode_column = 'Postcode' fulladdress_column = None address1_column = "AddressLine1" address1_method = None address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"] missing_postcodes_method = None landlord_year_built = None landlord_os_uprn = None landlord_property_type = "Archetype (PFP)" landlord_built_form = "Archetype (PFP)" landlord_wall_construction = None landlord_heating_system = None landlord_existing_pv = None landlord_property_id = "Uprn" outcomes_filename = None outcomes_sheetname = None outcomes_postcode = None outcomes_houseno = None outcomes_id = None master_filepaths = [] master_to_asset_list_filepath = None # Wates data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Wates - " data_filename = "ECO 4 Wates.xlsx" sheet_name = "Roadmap Homes" postcode_column = 'Postcode' fulladdress_column = None address1_column = "Address Line 1" address1_method = None address_cols_to_concat = ["Address Line 1", "Address Line 2", "Address Line 3"] missing_postcodes_method = None landlord_year_built = "Build Year" landlord_os_uprn = None landlord_property_type = "Archetype" landlord_built_form = "Archetype" landlord_wall_construction = "Wall" landlord_heating_system = "Heating Type" landlord_existing_pv = None landlord_property_id = "UPRN" outcomes_filename = None outcomes_sheetname = None outcomes_postcode = None outcomes_houseno = None master_filepaths = [] master_to_asset_list_filepath = None # Ealing # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Programme data - 04032025" # data_filename = "Ealing BC - Property Plus Tenure 25.02.2025.xlsx" # sheet_name = "IGNORE - FULL MAIN" # postcode_column = 'Postcode' # fulladdress_column = "Address" # address1_column = None # address1_method = "first_word" # address_cols_to_concat = [] # missing_postcodes_method = None # landlord_year_built = "Year Built" # landlord_os_uprn = None # landlord_property_type = "Property Type Code" # landlord_wall_construction = None # landlord_heating_system = None # landlord_existing_pv = None # landlord_property_id = "Property ref" # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester" # data_filename = "Warmfront data- Colchester Borough Homes (Complete).xlsx" # sheet_name = "Sheet1" # postcode_column = 'Full Address.1' # fulladdress_column = "Full Address" # address1_column = None # address1_method = "first_word" # address_cols_to_concat = [] # missing_postcodes_method = None # landlord_year_built = "Build Date" # landlord_os_uprn = None # landlord_property_type = "Property Type" # landlord_wall_construction = "Wallinsul" # landlord_heating_system = "HeatSorc" # landlord_existing_pv = None # landlord_property_id = "Property Reference" # outcomes_filename = None # outcomes_sheetname = None # outcomes_postcode = None # outcomes_houseno = None # master_filepaths = [] # master_to_asset_list_filepath = None # For Westward data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Westward" data_filename = "WESTWARD - completed list - 20.03.2025.xlsx" sheet_name = "Sheet1" postcode_column = "WFT EDIT Postcode" fulladdress_column = "Address" address1_column = None address1_method = "house_number_extraction" address_cols_to_concat = [] missing_postcodes_method = None landlord_year_built = "Build date" landlord_os_uprn = "UPRN" landlord_property_type = "Location type" landlord_built_form = None landlord_wall_construction = "Wall Construction (EPC)" landlord_heating_system = "Heat Source" landlord_existing_pv = "PV (Y/N)" landlord_property_id = "Place ref" outcomes_filename = None outcomes_sheetname = None outcomes_postcode = None outcomes_houseno = None master_filepaths = [] master_to_asset_list_filepath = None outcomes_id = None # For ACIS - programme re-build # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/ACIS/ACIS Full Programme Review March 2025" # data_filename = "ACIS asset list.xlsx" # sheet_name = "Assets" # address1_column = "House No" # postcode_column = "Postcode" # landlord_property_id = "UPRN" # fulladdress_column = None # address_cols_to_concat = ["House No", "Street", "Town"] # missing_postcodes_method = None # address1_method = None # landlord_year_built = "YEAR BUILT" # landlord_os_uprn = None # landlord_property_type = "Property type" # landlord_built_form = None # landlord_wall_construction = "Wall Constuction" # landlord_heating_system = "Heating" # landlord_existing_pv = None # outcomes_filename = "ACIS Group - 25.11.2024 - outcomes.xlsx" # outcomes_sheetname = "Feedback" # outcomes_postcode = "Postcode" # outcomes_houseno = "No" # master_filepaths = [ # os.path.join(data_folder, "ECO 3 -Table 1.csv"), # os.path.join(data_folder, "ECO 4 -Table 1.csv"), # ] # master_to_asset_list_filepath = None # For plus dane data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane" data_filename = "PLUS DANE Asset List - for analysis.xlsx" sheet_name = "Asset List" address1_column = " Address" postcode_column = " Postcode" landlord_property_id = "UPRN" fulladdress_column = " Address" address_cols_to_concat = [] missing_postcodes_method = None address1_method = None landlord_year_built = "Property Age" landlord_os_uprn = None landlord_property_type = "Property Type" landlord_wall_construction = "Landlord Wall Full" landlord_heating_system = "Landlord Heating" landlord_existing_pv = None outcomes_filename = "plus dane outcomes.xlsx" outcomes_sheetname = "EVERYTHING" outcomes_postcode = "Post Code" outcomes_houseno = "Numb." master_filepaths = [ os.path.join(data_folder, "JJC Rolling Master.csv"), os.path.join(data_folder, "SCIS Rolling Master.csv"), ] master_to_asset_list_filepath = os.path.join(data_folder, "surveys_to_assets.csv") # Maps addresses to uprn in problematic cases manual_uprn_map = {} asset_list = AssetList( local_filepath=os.path.join(data_folder, data_filename), header=0, sheet_name=sheet_name, address1_colname=address1_column, postcode_colname=postcode_column, landlord_property_id=landlord_property_id, full_address_colname=fulladdress_column, full_address_cols_to_concat=address_cols_to_concat, missing_postcodes_method=missing_postcodes_method, address1_extraction_method=address1_method, landlord_year_built=landlord_year_built, landlord_uprn=landlord_os_uprn, landlord_property_type=landlord_property_type, landlord_built_form=landlord_built_form, landlord_wall_construction=landlord_wall_construction, landlord_roof_construction=landlord_roof_construction, landlord_heating_system=landlord_heating_system, landlord_existing_pv=landlord_existing_pv, landlord_sap=landlord_sap, phase=phase ) asset_list.init_standardise() # We produce the new maps, which can be saved for future useage new_property_type_map = { k: v for k, v in ( asset_list.variable_mappings[asset_list.landlord_property_type] if asset_list.landlord_property_type else {} ).items() if k not in PROPERTY_MAPPING } new_built_form_map = { k: v for k, v in ( asset_list.variable_mappings[asset_list.landlord_built_form] if asset_list.landlord_built_form else {} ).items() if k not in BUILT_FORM_MAPPINGS } new_wall_map = { k: v for k, v in ( asset_list.variable_mappings[asset_list.landlord_wall_construction] if asset_list.landlord_wall_construction else {} ).items() if k not in WALL_CONSTRUCTION_MAPPINGS } new_heating_map = { k: v for k, v in ( asset_list.variable_mappings[asset_list.landlord_heating_system] if asset_list.landlord_heating_system else {} ).items() if k not in HEATING_MAPPINGS } new_existing_pv_map = { k: v for k, v in ( asset_list.variable_mappings[asset_list.landlord_existing_pv] if asset_list.landlord_existing_pv else {} ).items() if k not in EXISTING_PV_MAPPINGS } new_roof_construction_map = { k: v for k, v in ( asset_list.variable_mappings[asset_list.landlord_roof_construction] if asset_list.landlord_roof_construction else {} ).items() if k not in ROOF_CONSTRUCTION_MAPPINGS } asset_list.apply_standardiation() # We now flag properties that have been treated under existing programmes asset_list.flag_outcomes( outcomes_filepath=os.path.join(data_folder, outcomes_filename) if outcomes_filename else None, outcomes_sheetname=outcomes_sheetname, outcomes_address=outcomes_address, outcomes_postcode=outcomes_postcode, outcomes_houseno=outcomes_houseno, outcomes_id=outcomes_id ) asset_list.flag_survey_master( master_filepaths=master_filepaths, master_to_asset_list_filepath=master_to_asset_list_filepath ) asset_list.flag_ecosurv(ecosurv_landlords) ### We retrieve the EPC data # We chunk up this data into 5000 rows at a time # Create the chunks directory epc_api_only = False force_retrieve_data = False skip = None # Used to skip already completed chunks chunk_size = 1000 filename = "Chunk {i}.csv" download_folder = os.path.join(data_folder, "Chunks") if not os.path.exists(download_folder): os.makedirs(download_folder) chunk_indexes = list(range(0, len(asset_list.standardised_asset_list), chunk_size)) downloaded_files = {filename.format(i=i) for i in chunk_indexes} # We check if we have files associated to these files already and if we do, and we do not want to force the # fetching of the data, we skip folder_contents = os.listdir(download_folder) if all(x in folder_contents for x in downloaded_files): skip = max(chunk_indexes) if any(x in folder_contents for x in downloaded_files): skip = max([i for i in chunk_indexes if filename.format(i=i) in folder_contents]) for i in range(0, len(asset_list.standardised_asset_list), chunk_size): print(f"Processing chunk {i} to {i + chunk_size}") if skip is not None and not force_retrieve_data: if i <= skip: continue chunk = asset_list.standardised_asset_list[i:i + chunk_size] epc_data_chunk, errors_chunk, no_epc_chunk = get_data( df=chunk, row_id_name=asset_list.DOMNA_PROPERTY_ID, uprn_column=AssetList.STANDARD_UPRN, fulladdress_column=AssetList.STANDARD_FULL_ADDRESS, address1_column=AssetList.STANDARD_ADDRESS_1, postcode_column=AssetList.STANDARD_POSTCODE, property_type_column=AssetList.STANDARD_PROPERTY_TYPE, built_form_column=AssetList.STANDARD_BUILT_FORM, manual_uprn_map=manual_uprn_map, epc_api_only=epc_api_only, epc_auth_token=EPC_AUTH_TOKEN ) # We now retrieve any failed properties chunk_failed = chunk[chunk[asset_list.DOMNA_PROPERTY_ID].isin(errors_chunk)] epc_data_failed, _, _ = get_data( df=chunk_failed, row_id_name=asset_list.DOMNA_PROPERTY_ID, uprn_column=AssetList.STANDARD_UPRN, fulladdress_column=AssetList.STANDARD_FULL_ADDRESS, address1_column=AssetList.STANDARD_ADDRESS_1, postcode_column=AssetList.STANDARD_POSTCODE, property_type_column=AssetList.STANDARD_PROPERTY_TYPE, built_form_column=AssetList.STANDARD_BUILT_FORM, manual_uprn_map=manual_uprn_map, epc_api_only=epc_api_only, epc_auth_token=EPC_AUTH_TOKEN ) epc_data_chunk.extend(epc_data_failed) # Append the failed data to the main data # Store the chunk locally as a csv pd.DataFrame(epc_data_chunk).to_csv(os.path.join(data_folder, f"Chunks/Chunk {i}.csv"), index=False) # Store the errors and no-data locally with open(os.path.join(data_folder, f"Chunks/Chunk {i} errors.json"), "w") as f: json.dump(errors_chunk, f) with open(os.path.join(data_folder, f"Chunks/Chunk {i} nodata.csv"), "w") as f: json.dump(no_epc_chunk, f) # We read in and concatenate the created created chunks # List the contents epc_data = [] for file in downloaded_files: csv_data = pd.read_csv(os.path.join(download_folder, file)) # We need to convert the recommendations back to a list csv_data["recommendations"] = csv_data["recommendations"].apply(eval) # We don't have this if we didn't run the pulling from find my epc if "find_my_epc_data" in csv_data.columns: csv_data["find_my_epc_data"] = csv_data["find_my_epc_data"].apply(eval) epc_data.append(csv_data) epc_df = pd.concat(epc_data) epc_df["estimated"] = epc_df["estimated"].fillna(False) # We expand out the recommendations recommendations_df = epc_df[[asset_list.DOMNA_PROPERTY_ID, "recommendations"]] unique_recommendations = set() for _, row in recommendations_df.iterrows(): unique_recommendations.update([rec["improvement-summary-text"] for rec in row["recommendations"]]) columns = [asset_list.DOMNA_PROPERTY_ID] + list(unique_recommendations) transformed_data = [] for _, row in recommendations_df.iterrows(): # Initialize a dictionary for this row with False for all recommendations row_data = {col: False for col in columns} row_data[asset_list.DOMNA_PROPERTY_ID] = row[asset_list.DOMNA_PROPERTY_ID] # Set True for each recommendation present in this row for rec in row["recommendations"]: recommendation_text = rec["improvement-summary-text"] row_data[recommendation_text] = True # Append the row data to transformed_data transformed_data.append(row_data) transformed_df = pd.DataFrame(transformed_data) transformed_df = transformed_df[ [ asset_list.DOMNA_PROPERTY_ID, "Floor insulation (solid floor)", "Floor insulation", "Floor insulation (suspended floor)" ] ] transformed_df["epc_has_floor_recommendation"] = ( transformed_df["Floor insulation (solid floor)"] | transformed_df["Floor insulation"] | transformed_df["Floor insulation (suspended floor)"] ) # Get the find my epc data if "find_my_epc_data" not in epc_df.columns: epc_df["find_my_epc_data"] = None find_my_epc_data = [] for _, x in epc_df.iterrows(): if x["find_my_epc_data"]: find_my_epc_data.append( { asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID], **x["find_my_epc_data"] } ) else: find_my_epc_data.append( { asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID] } ) find_my_epc_data = pd.DataFrame(find_my_epc_data) find_my_epc_data = find_my_epc_data.merge( transformed_df[[asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"]], how="left", on=asset_list.DOMNA_PROPERTY_ID ) # We check if we get the solar pv column: if "Solar photovoltaics" not in find_my_epc_data.columns: find_my_epc_data["Solar photovoltaics"] = False # Retrieve just the data we need epc_df = epc_df[ [asset_list.DOMNA_PROPERTY_ID] + list(asset_list.EPC_API_DATA_NAMES.keys()) ].rename( columns=asset_list.EPC_API_DATA_NAMES ) # Look for columns not in the find my EPC data, which will have happened if we didn't # retrieve it in the first place missed_find_epc_cols = [c for c in list(asset_list.FIND_EPC_DATA_NAMES.keys()) if c not in find_my_epc_data.columns] if missed_find_epc_cols: for c in missed_find_epc_cols: find_my_epc_data[c] = None epc_df = epc_df.merge( find_my_epc_data[ [asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"] + list(asset_list.FIND_EPC_DATA_NAMES.keys()) ] .rename(columns=asset_list.FIND_EPC_DATA_NAMES), how="left", on=asset_list.DOMNA_PROPERTY_ID ) asset_list.merge_data(epc_df) asset_list.extract_attributes() cleaned = read_from_s3( s3_file_name="cleaned_epc_data/cleaned.bson", bucket_name="retrofit-data-dev" ) cleaned = msgpack.unpackb(cleaned, raw=False) asset_list.identify_worktypes(cleaned) pprint(asset_list.work_type_figures) asset_list.flat_analysis() ################################################################ # WESTWARD - comparison between Kieran's method & automated ################################################################ # Check 1) cavity_fills = pd.read_excel( os.path.join(data_folder, "WESTWARD - Route March Prep.xlsx"), sheet_name="Straight Fill" ) cavity_fills = cavity_fills.merge( asset_list.standardised_asset_list[ [asset_list.STANDARD_LANDLORD_PROPERTY_ID, "cavity_reason"] ], how="left", left_on=asset_list.landlord_property_id, right_on=asset_list.STANDARD_LANDLORD_PROPERTY_ID ) cavity_fills["cavity_reason"] = cavity_fills["cavity_reason"].fillna("Not identified") print(cavity_fills["cavity_reason"].value_counts()) # Didn't identify 3 properties because they're bedsits # 4 properties were identified, not based on the non-intrusives but instead because # Westward said they were built in 2003/2007. Have adjusted this to use the age from the # epc as well, as EPC says 1975 and they look like 1975 properties # 37 properties flagged as already having solar - these are all because the landlord said they have solar # e.g. # https://earth.google.com/web/search/11+Winsland+Avenue+TOTNES+TQ9+5FT/@50.43354465,-3.71318276,46.57468503a, # 59.14004365d,35y,0h,0t, # 0r/data=CpABGmISXAolMHg0ODZkMWQxOGE4NWRiZjdkOjB4YjBhM2E5M2Q3YWVlMWEwYhlZYgp7fzdJQCHFfC9027QNwCohMTEgV2luc2xhbmQgQXZlbnVlIFRPVE5FUyBUUTkgNUZUGAIgASImCiQJbxsQEoo3SUARXQcp_HE3SUAZBmiZGJ6yDcAhCA0fqq63DcBCAggBOgMKATBCAggASg0I____________ARAA # https://earth.google.com/web/search/15+St+Anne%27s+Ct,+Newton+Abbot+TQ12+1TL/@50.53068337,-3.61611128, # 11.74908956a,135.73212429d,35y,0h,0t, # 0r/data=CpUBGmcSYQolMHg0ODZkMDVkMjFhODhjZjgxOjB4MjBmMzE2Zjc3MGI2NGMwYxlCxHLw8UNJQCFZqyzALe4MwComMTUgU3QgQW5uZSdzIEN0LCBOZXd0b24gQWJib3QgVFExMiAxVEwYAiABIiYKJAm-r6U2iDdJQBHS5ICRdDdJQBmYGVpmiLINwCG8wcrtqbYNwEICCAE6AwoBMEICCABKDQj___________8BEAA # Check 2) cavity_fills_with_solar = pd.read_excel( os.path.join(data_folder, "WESTWARD - Route March Prep.xlsx"), sheet_name="Solar PV - Straight Fill" ) cavity_fills_with_solar = cavity_fills_with_solar.merge( asset_list.standardised_asset_list[ [asset_list.STANDARD_LANDLORD_PROPERTY_ID, "cavity_reason"] ], how="left", left_on=asset_list.landlord_property_id, right_on=asset_list.STANDARD_LANDLORD_PROPERTY_ID ) cavity_fills_with_solar["cavity_reason"] = cavity_fills_with_solar["cavity_reason"].fillna("Not identified") print(cavity_fills_with_solar["cavity_reason"].value_counts()) # 203 properties total # 140 properties were flagged up based on non-intrusives (Non-Intrusive Data Showed Empty Cavity) # 63 property already has solar # Check 3) RDF rdf = pd.read_excel( os.path.join(data_folder, "WESTWARD - Route March Prep.xlsx"), sheet_name="RDF CIGA checks" ) rdf = rdf.merge( asset_list.standardised_asset_list[ [asset_list.STANDARD_LANDLORD_PROPERTY_ID, "cavity_reason", "solar_reason"] ], how="left", left_on=asset_list.landlord_property_id, right_on=asset_list.STANDARD_LANDLORD_PROPERTY_ID ) rdf["cavity_reason"] = rdf["cavity_reason"].fillna("Not identified") print(rdf["cavity_reason"].value_counts()) # 264 properties are not identified, 261 of which are due to the fact they contain materials # The other 3 were determined to be eligible for solar instead # Many of these units that were identified for rdf works could be solar jobs rdf_with_solar = pd.read_excel( os.path.join(data_folder, "WESTWARD - Route March Prep.xlsx"), sheet_name="Solar PV - RDF CIGA Checks" ) rdf_with_solar = rdf_with_solar.merge( asset_list.standardised_asset_list[ [asset_list.STANDARD_LANDLORD_PROPERTY_ID, "cavity_reason", "solar_reason"] ], how="left", left_on=asset_list.landlord_property_id, right_on=asset_list.STANDARD_LANDLORD_PROPERTY_ID ) rdf_with_solar["cavity_reason"] = rdf_with_solar["cavity_reason"].fillna("Not identified") rdf_with_solar["cavity_reason"].value_counts() # All others identified - some flagged as empties due to EPC or landlord data suggesting as much # 5 not identified due to containing COMPACTED BEAD asset_list.standardised_asset_list = asset_list.standardised_asset_list[ asset_list.standardised_asset_list[asset_list.landlord_property_id] ] asset_list.load_contact_details( local_filepath=os.path.join(data_folder, "Full property list wth D&V report V look up 12.2.25.xlsx"), sheet_name="Report 1", landlord_property_id=asset_list.landlord_property_id, phone_number_column='Property Current Tel. Number', fullname_column='Proeprty Current Occupant', firstname_column=None, lastname_column=None, email_column=None, # TODO - we need this ) # Convert to a format suitable for CRM # TODO: TEMP assigned_surveyors = pd.DataFrame( [ { asset_list.landlord_property_id: "02610001", "week_commencing": "10/10/2025", "surveyor_name": "Khalim Conn-Kowlessar", "surveyor_email": "khalim@domna.homes", } ] ) # TODO: Sort the output by postcode company_domain = "ealing.gov.uk" crm_pipeline_name = "Survey Management" first_dealstage = "READY TO BEGIN SCHEDULING" # TODO - temp, upload to either SharePoint or AWS asset_list.prepare_for_crm( assigned_surveyors=assigned_surveyors, company_domain=company_domain, crm_pipeline_name=crm_pipeline_name, first_dealstage=first_dealstage ) hubspot_data = asset_list.hubspot_data # Store as an excel filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx" # Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data with pd.ExcelWriter(filename) as writer: asset_list.standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False) asset_list.flat_data.to_excel(writer, sheet_name="Flat Data", index=False) # If we have outcomes, we add a tab with the outcomes if not asset_list.outcomes_for_output.empty: asset_list.outcomes_for_output.to_excel(writer, sheet_name="Outcomes", index=False) if not asset_list.unmatched_submissions.empty: asset_list.unmatched_submissions.to_excel(writer, sheet_name="Unmatched Submissions", index=False) if not asset_list.outcomes_no_match.empty: asset_list.outcomes_no_match.to_excel(writer, sheet_name="Unmatched Outcomes", index=False) if not asset_list.ecosurv_no_match.empty: asset_list.ecosurv_no_match.to_excel(writer, sheet_name="Unmatched Ecosurv", index=False) # Store the Hubspot export as a csv hubspot_data.to_csv(os.path.join(data_folder, "Hubspot Export.csv"), index=False)