mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
1225 lines
46 KiB
Python
1225 lines
46 KiB
Python
import os
|
|
import json
|
|
import pandas as pd
|
|
from asset_list.AssetList import AssetList
|
|
from asset_list.mappings.property_type import PROPERTY_MAPPING
|
|
from asset_list.mappings.built_form import BUILT_FORM_MAPPINGS
|
|
from asset_list.mappings.walls import WALL_CONSTRUCTION_MAPPINGS
|
|
from asset_list.mappings.heating_systems import HEATING_MAPPINGS
|
|
from asset_list.mappings.exising_pv import EXISTING_PV_MAPPINGS
|
|
from asset_list.mappings.roof import ROOF_CONSTRUCTION_MAPPINGS
|
|
from asset_list.utils import get_data
|
|
|
|
from dotenv import load_dotenv
|
|
from backend.SearchEpc import SearchEpc
|
|
|
|
load_dotenv(dotenv_path="backend/.env")
|
|
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
|
|
|
|
|
|
def extract_address1(asset_list, full_address_col, postcode_col, method="first_two_words"):
|
|
if method == "first_two_words":
|
|
asset_list["address1_extracted"] = asset_list[full_address_col].str.split(" ").str[:2].str.join(" ")
|
|
return asset_list
|
|
|
|
if method == "first_word":
|
|
asset_list["address1_extracted"] = asset_list[full_address_col].str.split(" ").str[0]
|
|
return asset_list
|
|
|
|
if method == "house_number_extraction":
|
|
asset_list["address1_extracted"] = asset_list.apply(
|
|
lambda x: SearchEpc.get_house_number(address=x[full_address_col], postcode=x[postcode_col]),
|
|
axis=1
|
|
)
|
|
return asset_list
|
|
|
|
raise ValueError(f"Method {method} not recognized")
|
|
|
|
|
|
def app():
|
|
"""
|
|
This app is EPC pulling data for some properties owned by Livewest
|
|
|
|
Data request contents:
|
|
Date of last EPC
|
|
Reason for EPC
|
|
SAP score on register
|
|
Property Type
|
|
Property Area
|
|
Property Age
|
|
Any Dimensions (HLP,PW,RH)
|
|
Property Wall Construction
|
|
Heating Type
|
|
Secondary Heating
|
|
Loft Insulation Depth
|
|
|
|
Additional if possible:
|
|
Heat loss calculations
|
|
EPC recommendations
|
|
Property UPRN
|
|
"""
|
|
# Freebridge
|
|
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Freebridge"
|
|
data_filename = "Domna - FCH property data May 25 copy.xlsx"
|
|
sheet_name = "EPC Data"
|
|
postcode_column = 'Post Code'
|
|
address1_column = "Address 1"
|
|
address1_method = None
|
|
fulladdress_column = None
|
|
address_cols_to_concat = ["Address 1", "Address 4"]
|
|
missing_postcodes_method = None
|
|
landlord_year_built = "Build Date"
|
|
landlord_os_uprn = None
|
|
landlord_property_type = "Property Type"
|
|
landlord_built_form = None
|
|
landlord_wall_construction = "Walls Description"
|
|
landlord_heating_system = "Heating Type"
|
|
landlord_existing_pv = None
|
|
landlord_property_id = "Place Ref"
|
|
landlord_roof_construction = "Roof Description"
|
|
landlord_sap = "Current SAP"
|
|
outcomes_filename = []
|
|
outcomes_sheetname = []
|
|
outcomes_postcode = []
|
|
outcomes_houseno = []
|
|
outcomes_address = []
|
|
outcomes_id = []
|
|
master_filepaths = []
|
|
master_to_asset_list_filepath = None
|
|
asset_list_header = 0
|
|
landlord_block_reference = None
|
|
master_id_colnames = []
|
|
phase = True # Inspections not complete, produce a partial view
|
|
ecosurv_landlords = None
|
|
|
|
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Broadlands"
|
|
data_filename = "Broadlands Asset List.xlsx"
|
|
sheet_name = "Assets"
|
|
postcode_column = 'POSTCODE'
|
|
fulladdress_column = None
|
|
address1_column = "Address1"
|
|
address1_method = None
|
|
address_cols_to_concat = ["Address1"]
|
|
missing_postcodes_method = None
|
|
landlord_year_built = "DATEBUILT"
|
|
landlord_os_uprn = None
|
|
landlord_property_type = "PropertyType"
|
|
landlord_built_form = "PropertyType"
|
|
landlord_wall_construction = None
|
|
landlord_heating_system = "Heating Fuel"
|
|
landlord_existing_pv = None
|
|
landlord_property_id = "Row ID"
|
|
outcomes_filename = [os.path.join(data_folder, "outcomes.xlsx")]
|
|
outcomes_sheetname = ["Sheet1"]
|
|
outcomes_postcode = ["Postcode"]
|
|
outcomes_houseno = ["No."]
|
|
outcomes_address = ["Address"]
|
|
outcomes_id = [None]
|
|
master_filepaths = [
|
|
os.path.join(data_folder, "eco3 submissions.csv"),
|
|
os.path.join(data_folder, "eco4 submissions.csv"),
|
|
]
|
|
master_to_asset_list_filepath = None
|
|
asset_list_header = 0
|
|
landlord_block_reference = None
|
|
master_id_colnames = [None, None]
|
|
landlord_roof_construction = None
|
|
phase = False
|
|
landlord_sap = None
|
|
ecosurv_landlords = "broadland"
|
|
#
|
|
|
|
# Community:
|
|
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Community Housing/New Programme"
|
|
data_filename = "SUB EPC C to DOMNA - 24.07.25.xlsx"
|
|
sheet_name = "Sheet1"
|
|
postcode_column = 'POSTCODE'
|
|
fulladdress_column = "ADDRESS"
|
|
address1_column = None
|
|
address1_method = "house_number_extraction"
|
|
address_cols_to_concat = []
|
|
missing_postcodes_method = None
|
|
landlord_year_built = "BUILD DATE"
|
|
landlord_os_uprn = None
|
|
landlord_property_type = "PROPERTY TYPE"
|
|
landlord_built_form = "Archetype" # Using the inspections archetype
|
|
landlord_wall_construction = "CONSTRUCTION TYPE"
|
|
landlord_roof_construction = None
|
|
landlord_heating_system = None
|
|
landlord_existing_pv = None
|
|
landlord_property_id = "UPRN"
|
|
landlord_sap = None
|
|
outcomes_filename = []
|
|
outcomes_sheetname = []
|
|
outcomes_postcode = []
|
|
outcomes_houseno = []
|
|
outcomes_id = []
|
|
outcomes_address = []
|
|
master_filepaths = []
|
|
master_to_asset_list_filepath = None
|
|
phase = False
|
|
ecosurv_landlords = None
|
|
asset_list_header = 1
|
|
landlord_block_reference = None
|
|
master_id_colnames = []
|
|
|
|
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Programme Analysis"
|
|
data_filename = "EalingProjectRebuildJW210725.xlsx"
|
|
sheet_name = "Refine & Houses"
|
|
postcode_column = 'Postcode'
|
|
fulladdress_column = "Address"
|
|
address1_column = None
|
|
address1_method = "house_number_extraction"
|
|
address_cols_to_concat = []
|
|
missing_postcodes_method = None
|
|
landlord_year_built = None
|
|
landlord_os_uprn = None
|
|
landlord_property_type = None # Using the inspections property type
|
|
landlord_built_form = None
|
|
landlord_wall_construction = None
|
|
landlord_roof_construction = None
|
|
landlord_heating_system = None
|
|
landlord_existing_pv = None
|
|
landlord_property_id = "Property ref"
|
|
landlord_sap = None
|
|
outcomes_filename = []
|
|
outcomes_sheetname = []
|
|
outcomes_postcode = []
|
|
outcomes_houseno = []
|
|
outcomes_id = []
|
|
outcomes_address = []
|
|
master_filepaths = []
|
|
master_to_asset_list_filepath = None
|
|
phase = False
|
|
ecosurv_landlords = None
|
|
asset_list_header = 0
|
|
landlord_block_reference = "Block Reference"
|
|
master_id_colnames = []
|
|
|
|
# TODO: Delete me
|
|
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NRLA/"
|
|
data_filename = "20250716 Asset List.xlsx"
|
|
sheet_name = "Sheet 1"
|
|
postcode_column = 'Postcode'
|
|
fulladdress_column = "Full Address"
|
|
address1_column = None
|
|
address1_method = "house_number_extraction"
|
|
address_cols_to_concat = []
|
|
missing_postcodes_method = None
|
|
landlord_year_built = None
|
|
landlord_os_uprn = None
|
|
landlord_property_type = None
|
|
landlord_built_form = None
|
|
landlord_wall_construction = None
|
|
landlord_heating_system = None
|
|
landlord_existing_pv = None
|
|
landlord_property_id = "Row ID"
|
|
outcomes_filename = []
|
|
outcomes_sheetname = []
|
|
outcomes_postcode = []
|
|
outcomes_houseno = []
|
|
outcomes_address = []
|
|
outcomes_id = []
|
|
master_filepaths = []
|
|
master_to_asset_list_filepath = None
|
|
asset_list_header = 0
|
|
landlord_block_reference = None
|
|
master_id_colnames = []
|
|
landlord_roof_construction = None
|
|
phase = False
|
|
landlord_sap = None
|
|
ecosurv_landlords = None
|
|
|
|
# Southend
|
|
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southend/July 2025 Programme"
|
|
data_filename = "SOUTHEND - RYAN.xlsx"
|
|
sheet_name = "July 2025 Surveys"
|
|
postcode_column = 'Postcode'
|
|
fulladdress_column = "Full postal address"
|
|
address1_column = None
|
|
address1_method = "house_number_extraction"
|
|
address_cols_to_concat = []
|
|
missing_postcodes_method = None
|
|
landlord_year_built = "Property age"
|
|
landlord_os_uprn = None
|
|
landlord_property_type = "Property type"
|
|
landlord_built_form = "Property type"
|
|
landlord_wall_construction = None
|
|
landlord_heating_system = None
|
|
landlord_existing_pv = None
|
|
landlord_property_id = "ID"
|
|
outcomes_filename = []
|
|
outcomes_sheetname = []
|
|
outcomes_postcode = []
|
|
outcomes_houseno = []
|
|
outcomes_address = []
|
|
outcomes_id = []
|
|
master_filepaths = []
|
|
master_to_asset_list_filepath = None
|
|
asset_list_header = 0
|
|
landlord_block_reference = None
|
|
master_id_colnames = []
|
|
landlord_roof_construction = None
|
|
phase = False
|
|
landlord_sap = None
|
|
ecosurv_landlords = None
|
|
|
|
# For Rooftop
|
|
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Rooftop"
|
|
data_filename = "Rooftop Asset List - July 2025.xlsx"
|
|
sheet_name = "Sheet1"
|
|
postcode_column = 'post_code'
|
|
fulladdress_column = None
|
|
address1_column = "add_1"
|
|
address1_method = None
|
|
address_cols_to_concat = [
|
|
"add_1", "add_2", "add_3", "add_4"
|
|
]
|
|
missing_postcodes_method = None
|
|
landlord_year_built = "date_built"
|
|
landlord_os_uprn = None
|
|
landlord_property_type = "ConstructionStyle"
|
|
landlord_built_form = "ConstructionStyle"
|
|
landlord_wall_construction = None
|
|
landlord_heating_system = "Description"
|
|
landlord_existing_pv = None
|
|
landlord_property_id = "PropertyCode"
|
|
outcomes_filename = [os.path.join(data_folder, "Rooftop_Outcomes.xlsx")]
|
|
outcomes_sheetname = ["OUTCOMES"]
|
|
outcomes_postcode = ["POSTCODE"]
|
|
outcomes_houseno = ["NO"]
|
|
outcomes_address = ["ADDRESS"]
|
|
outcomes_id = [None]
|
|
master_filepaths = [os.path.join(data_folder, "Master.csv")]
|
|
master_to_asset_list_filepath = None
|
|
asset_list_header = 1
|
|
landlord_block_reference = "bl_rec_ref"
|
|
master_id_colnames = [None]
|
|
landlord_roof_construction = None
|
|
phase = False
|
|
landlord_sap = None
|
|
ecosurv_landlords = "rooftop"
|
|
|
|
# For Housing
|
|
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/For Housing/New Programme July 2025"
|
|
data_filename = "FOR HOUSING Asset List (Combined).xlsx"
|
|
sheet_name = "Asset List"
|
|
postcode_column = 'Postcode'
|
|
fulladdress_column = "Address"
|
|
address1_column = None
|
|
address1_method = "house_number_extraction"
|
|
address_cols_to_concat = []
|
|
missing_postcodes_method = None
|
|
landlord_year_built = None
|
|
landlord_os_uprn = None
|
|
landlord_property_type = "Type"
|
|
landlord_built_form = "Type"
|
|
landlord_wall_construction = None
|
|
landlord_heating_system = "Heating - full"
|
|
landlord_existing_pv = None
|
|
landlord_property_id = "UPRN"
|
|
outcomes_filename = [os.path.join(data_folder, "Khalim Combined - for analysis.xlsx")]
|
|
outcomes_sheetname = ["Sheet1"]
|
|
outcomes_postcode = ["POSTCODE"]
|
|
outcomes_houseno = ["NO"]
|
|
outcomes_address = ["ADDRESS"]
|
|
outcomes_id = [None]
|
|
master_filepaths = [os.path.join(data_folder, "submissions.csv")]
|
|
master_to_asset_list_filepath = None
|
|
asset_list_header = 0
|
|
landlord_block_reference = None
|
|
master_id_colnames = [None]
|
|
landlord_roof_construction = None
|
|
phase = False
|
|
landlord_sap = "SAP"
|
|
ecosurv_landlords = "for housing"
|
|
|
|
# CDS
|
|
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/CDS"
|
|
data_filename = "Founder Estates - Asset List.xlsx"
|
|
sheet_name = "Combined"
|
|
postcode_column = 'Postcode'
|
|
fulladdress_column = "Address"
|
|
address1_column = None
|
|
address1_method = "house_number_extraction"
|
|
address_cols_to_concat = []
|
|
missing_postcodes_method = None
|
|
landlord_year_built = None
|
|
landlord_os_uprn = None
|
|
landlord_property_type = None
|
|
landlord_built_form = None
|
|
landlord_wall_construction = None
|
|
landlord_heating_system = "Heating Type"
|
|
landlord_existing_pv = None
|
|
landlord_property_id = "Row ID"
|
|
outcomes_filename = []
|
|
outcomes_sheetname = []
|
|
outcomes_postcode = []
|
|
outcomes_houseno = []
|
|
outcomes_address = []
|
|
outcomes_id = []
|
|
master_filepaths = [os.path.join(data_folder, "submissions.csv")]
|
|
master_to_asset_list_filepath = None
|
|
asset_list_header = 0
|
|
landlord_block_reference = None
|
|
master_id_colnames = [None]
|
|
landlord_roof_construction = None
|
|
phase = False
|
|
landlord_sap = None
|
|
ecosurv_landlords = "cds"
|
|
|
|
# Plus Dane
|
|
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane/New Programme July 2025/"
|
|
data_filename = "20250711 Plus Dane Asset List.xlsx"
|
|
sheet_name = "Sheet1"
|
|
postcode_column = 'Postcode'
|
|
fulladdress_column = "Address"
|
|
address1_column = None
|
|
address1_method = "house_number_extraction"
|
|
address_cols_to_concat = []
|
|
missing_postcodes_method = None
|
|
landlord_year_built = "Property Age"
|
|
landlord_os_uprn = None
|
|
landlord_property_type = "Property Type"
|
|
landlord_built_form = "Built Form"
|
|
landlord_wall_construction = "Wall Construction"
|
|
landlord_heating_system = "Full Heating System"
|
|
landlord_existing_pv = None
|
|
landlord_property_id = "UPRN"
|
|
outcomes_filename = [
|
|
os.path.join(data_folder, "Outcomes - Plus Dane_CWI_2024.xlsx"),
|
|
os.path.join(data_folder, "Outcomes - Plus Dane_CWI_2025.xlsx"),
|
|
os.path.join(data_folder, "Outcomes - Plus Dane_PV_2025.xlsx"),
|
|
]
|
|
outcomes_sheetname = [
|
|
"CWI & LI - 2024", "2025 - CWI", "PV - 2025",
|
|
]
|
|
outcomes_postcode = ["Postcode", "Postcode", "Postcode"]
|
|
outcomes_houseno = ["No.", "No", "No"]
|
|
outcomes_address = ["Address", "Address", "Address"]
|
|
outcomes_id = ["Asset Reference", "LL UPRN", "LL UPRN"]
|
|
master_filepaths = [
|
|
os.path.join(data_folder, "submissions/JJC-Table 1.csv"),
|
|
os.path.join(data_folder, "submissions/SCIS-Table 1.csv")
|
|
]
|
|
master_to_asset_list_filepath = None
|
|
asset_list_header = 1
|
|
landlord_block_reference = None
|
|
master_id_colnames = [None, None]
|
|
landlord_roof_construction = None
|
|
phase = False
|
|
landlord_sap = "SAP Rating"
|
|
ecosurv_landlords = "plus dane"
|
|
|
|
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Brentwood/July 2025 New Programme"
|
|
# data_filename = "20250710 Asset List Brentwood.xlsx"
|
|
# sheet_name = "Sheet1"
|
|
# postcode_column = 'Postcode'
|
|
# fulladdress_column = None
|
|
# address1_column = "House Number"
|
|
# address1_method = None
|
|
# address_cols_to_concat = ["House Number", "Address Line 1", "Address Line 2", "Address Line 3"]
|
|
# missing_postcodes_method = None
|
|
# landlord_year_built = "Year Built"
|
|
# landlord_os_uprn = None
|
|
# landlord_property_type = "Dwelling"
|
|
# landlord_built_form = None
|
|
# landlord_wall_construction = None
|
|
# landlord_heating_system = "Heating"
|
|
# landlord_existing_pv = None
|
|
# landlord_property_id = "UPRN"
|
|
# outcomes_filename = [os.path.join(data_folder, "Brentwood - outcomes for analysis.xlsx")]
|
|
# outcomes_sheetname = ["OUTCOMES"]
|
|
# outcomes_postcode = ["POSTCODE"]
|
|
# outcomes_houseno = [None]
|
|
# outcomes_address = ["ADDRESS"]
|
|
# outcomes_id = [None]
|
|
# master_filepaths = [os.path.join(data_folder, "Submissions.csv")]
|
|
# master_to_asset_list_filepath = None
|
|
# asset_list_header = 1
|
|
# landlord_block_reference = None
|
|
# master_id_colnames = [None]
|
|
# landlord_roof_construction = None
|
|
# phase = False
|
|
# landlord_sap = None
|
|
# ecosurv_landlords = "brentwood"
|
|
|
|
# Brentwood
|
|
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Brentwood/July 2025 New Programme"
|
|
# data_filename = "20250710 Asset List Brentwood.xlsx"
|
|
# sheet_name = "Sheet1"
|
|
# postcode_column = 'Postcode'
|
|
# fulladdress_column = None
|
|
# address1_column = "House Number"
|
|
# address1_method = None
|
|
# address_cols_to_concat = ["House Number", "Address Line 1", "Address Line 2", "Address Line 3"]
|
|
# missing_postcodes_method = None
|
|
# landlord_year_built = "Year Built"
|
|
# landlord_os_uprn = None
|
|
# landlord_property_type = "Dwelling"
|
|
# landlord_built_form = None
|
|
# landlord_wall_construction = None
|
|
# landlord_heating_system = "Heating"
|
|
# landlord_existing_pv = None
|
|
# landlord_property_id = "UPRN"
|
|
# outcomes_filename = [os.path.join(data_folder, "Brentwood - outcomes for analysis.xlsx")]
|
|
# outcomes_sheetname = ["OUTCOMES"]
|
|
# outcomes_postcode = ["POSTCODE"]
|
|
# outcomes_houseno = [None]
|
|
# outcomes_address = ["ADDRESS"]
|
|
# outcomes_id = [None]
|
|
# master_filepaths = [os.path.join(data_folder, "Submissions.csv")]
|
|
# master_to_asset_list_filepath = None
|
|
# asset_list_header = 1
|
|
# landlord_block_reference = None
|
|
# master_id_colnames = [None]
|
|
# landlord_roof_construction = None
|
|
# phase = False
|
|
# landlord_sap = None
|
|
# ecosurv_landlords = "brentwood"
|
|
#
|
|
# # Eastlight
|
|
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Eastlight/New Programme"
|
|
# data_filename = "INSPECTIONS MASTER Non Tech.xlsx"
|
|
# sheet_name = "EASTLIGHT CW"
|
|
# postcode_column = 'Postcode'
|
|
# fulladdress_column = None
|
|
# address1_column = "HouseName"
|
|
# address1_method = None
|
|
# address_cols_to_concat = ["HouseName", "Block", "Address1", "Address2", "Address3"]
|
|
# missing_postcodes_method = None
|
|
# landlord_year_built = "Built In Year"
|
|
# landlord_os_uprn = None
|
|
# landlord_property_type = "AssetType"
|
|
# landlord_built_form = "Archetype" # Using inspections archetype
|
|
# landlord_wall_construction = None
|
|
# landlord_roof_construction = None
|
|
# landlord_heating_system = "Main Heating Source"
|
|
# landlord_existing_pv = None
|
|
# landlord_property_id = "UPRN"
|
|
# landlord_sap = "SAP Score"
|
|
# outcomes_filename = [
|
|
# os.path.join(data_folder, "Eastlight_CWI_JJC_2025.xlsx"),
|
|
# os.path.join(data_folder, "Eastlight_CWI_SCIS_2025.xlsx"),
|
|
# ]
|
|
# outcomes_sheetname = ["Outcomes", "Feedback"]
|
|
# outcomes_postcode = ["Postcode", "Postcode"]
|
|
# outcomes_houseno = ["No", "No."]
|
|
# outcomes_id = [None, None]
|
|
# outcomes_address = ["Address", "Address"]
|
|
# master_filepaths = [
|
|
# os.path.join(data_folder, "ECO 3-Table 1.csv"),
|
|
# os.path.join(data_folder, "ECO 4-Table 1.csv"),
|
|
# ]
|
|
# master_to_asset_list_filepath = None
|
|
# phase = False
|
|
# ecosurv_landlords = "eastlight"
|
|
# asset_list_header = 0
|
|
# landlord_block_reference = None
|
|
# master_id_colnames = [None, None]
|
|
# landlord_sap = None
|
|
|
|
# Pickering and Ferens
|
|
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Pickering & Ferens"
|
|
# data_filename = "SAP 9 vs SAP 10 Sava Intelligent Energy - Property List (190625).xlsx"
|
|
# sheet_name = "Sava Intelligent Energy - Prope"
|
|
# postcode_column = 'Postcode'
|
|
# fulladdress_column = 'Address'
|
|
# address1_column = None
|
|
# address1_method = "house_number_extraction"
|
|
# address_cols_to_concat = []
|
|
# missing_postcodes_method = None
|
|
# landlord_year_built = None
|
|
# landlord_os_uprn = None
|
|
# landlord_property_type = "Property Type" # Using the inspections property type
|
|
# landlord_built_form = "Archetype 2"
|
|
# landlord_wall_construction = None
|
|
# landlord_roof_construction = None
|
|
# landlord_heating_system = None
|
|
# landlord_existing_pv = None
|
|
# landlord_property_id = "UPRN"
|
|
# landlord_sap = "SAP Rating (RdSAP 10)"
|
|
# outcomes_filename = []
|
|
# outcomes_sheetname = []
|
|
# outcomes_postcode = []
|
|
# outcomes_houseno = []
|
|
# outcomes_id = []
|
|
# outcomes_address = []
|
|
# master_filepaths = [
|
|
# os.path.join(data_folder, "PICKERING & FERENS ROLLING MASTER SHEET HEDGEFUND - 26.7.24 - K.csv"),
|
|
# os.path.join(data_folder, "PICKERING & FERENS NEW MASTER GBIS UPDATED 21.8.24 - M - For Analysis.csv"),
|
|
# ]
|
|
# master_to_asset_list_filepath = None
|
|
# phase = False
|
|
# ecosurv_landlords = "pickering"
|
|
# asset_list_header = 0
|
|
# landlord_block_reference = None
|
|
# master_id_colnames = [None, None]
|
|
|
|
# Colchester
|
|
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester"
|
|
# data_filename = "Warmfront data- Colchester Borough Homes (Complete).xlsx"
|
|
# sheet_name = "Sheet1"
|
|
# postcode_column = 'Full Address.1'
|
|
# fulladdress_column = "Full Address"
|
|
# address1_column = None
|
|
# address1_method = "first_word"
|
|
# address_cols_to_concat = []
|
|
# missing_postcodes_method = None
|
|
# landlord_year_built = "Build Date"
|
|
# landlord_os_uprn = None
|
|
# landlord_property_type = "Property Type"
|
|
# landlord_wall_construction = "Wallinsul"
|
|
# landlord_heating_system = "HeatSorc"
|
|
# landlord_existing_pv = None
|
|
# landlord_property_id = "Property Reference"
|
|
# outcomes_filename = []
|
|
# outcomes_sheetname = []
|
|
# outcomes_postcode = []
|
|
# outcomes_houseno = []
|
|
# outcomes_id = []
|
|
# outcomes_address = []
|
|
# master_filepaths = []
|
|
# master_to_asset_list_filepath = None
|
|
# asset_list_header = 0
|
|
# landlord_built_form = None
|
|
# landlord_roof_construction = None
|
|
# landlord_sap = None
|
|
# landlord_block_reference = None
|
|
# phase = False
|
|
# ecosurv_landlords = None
|
|
# master_id_colnames = []
|
|
|
|
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot"
|
|
# data_filename = "EalingFlats.xlsx"
|
|
# sheet_name = "Sheet1"
|
|
# postcode_column = 'Postcode'
|
|
# fulladdress_column = "Address"
|
|
# address1_column = None
|
|
# address1_method = "house_number_extraction"
|
|
# address_cols_to_concat = []
|
|
# missing_postcodes_method = None
|
|
# landlord_year_built = None
|
|
# landlord_os_uprn = None
|
|
# landlord_property_type = None # Using the inspections property type
|
|
# landlord_built_form = None
|
|
# landlord_wall_construction = None
|
|
# landlord_roof_construction = None
|
|
# landlord_heating_system = None
|
|
# landlord_existing_pv = None
|
|
# landlord_property_id = "Property ref"
|
|
# landlord_sap = None
|
|
# outcomes_filename = []
|
|
# outcomes_sheetname = []
|
|
# outcomes_postcode = []
|
|
# outcomes_houseno = []
|
|
# outcomes_id = []
|
|
# outcomes_address = []
|
|
# master_filepaths = []
|
|
# master_to_asset_list_filepath = None
|
|
# phase = False
|
|
# ecosurv_landlords = None
|
|
# asset_list_header = 0
|
|
# landlord_block_reference = "Block Ref"
|
|
# master_id_colnames = []
|
|
|
|
# Southern - Jan list
|
|
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/New Programme/Jan 2025 List"
|
|
# data_filename = "SOUTHERN ASSETS January 2025 Additions Query 21.03.2025.xlsx"
|
|
# sheet_name = "Jan 2025 additions"
|
|
# postcode_column = 'Post Code'
|
|
# fulladdress_column = None
|
|
# address1_column = "NO."
|
|
# address1_method = None
|
|
# address_cols_to_concat = ["NO.", "Street / Block Name", "Town/Area"]
|
|
# missing_postcodes_method = None
|
|
# landlord_year_built = None
|
|
# landlord_os_uprn = None
|
|
# landlord_property_type = None # Using the inspections property type
|
|
# landlord_built_form = None
|
|
# landlord_wall_construction = None
|
|
# landlord_roof_construction = None
|
|
# landlord_heating_system = None
|
|
# landlord_existing_pv = None
|
|
# landlord_property_id = "SH Property Reference"
|
|
# landlord_sap = None
|
|
# outcomes_filename = [
|
|
# os.path.join(data_folder, "RT - Southern Housing Group - JJC.xlsx"),
|
|
# os.path.join(data_folder, "RT - SOUTHERN OUTCOMES - SCIS Merged.xlsx"),
|
|
# ]
|
|
# outcomes_sheetname = ["Feedback", "Collated"]
|
|
# outcomes_postcode = ["Poscode", "Postcode"]
|
|
# outcomes_houseno = ["No.", "No"]
|
|
# outcomes_id = ["UPRNs", None]
|
|
# outcomes_address = ["Address", "Address"]
|
|
# master_filepaths = [
|
|
# os.path.join(data_folder, "southern_submissions/CAVITY'S - DECEMBER 2018-Table 1.csv"),
|
|
# os.path.join(data_folder, "southern_submissions/CAVITY'S 2019-Table 1.csv"),
|
|
# os.path.join(data_folder, "southern_submissions/CAVITY'S ECO4-Table 1.csv"),
|
|
# os.path.join(data_folder, "southern_submissions/LOFT'S-Table 1.csv"),
|
|
# ]
|
|
# master_to_asset_list_filepath = None
|
|
# phase = False
|
|
# ecosurv_landlords = "southern"
|
|
# asset_list_header = 0
|
|
# landlord_block_reference = None
|
|
# master_id_colnames = [None, None, None, None]
|
|
|
|
# NCHA
|
|
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NCHA"
|
|
# data_filename = "Energy Information MASTER June 2025.xlsx"
|
|
# sheet_name = "Data"
|
|
# postcode_column = 'Postcode'
|
|
# fulladdress_column = "Address"
|
|
# address1_column = None
|
|
# address1_method = "house_number_extraction"
|
|
# address_cols_to_concat = []
|
|
# missing_postcodes_method = None
|
|
# landlord_year_built = "Build Date (HAR10)"
|
|
# landlord_os_uprn = None
|
|
# landlord_property_type = "Property Type (HAR10)"
|
|
# landlord_built_form = "Build Form (EPC)"
|
|
# landlord_wall_construction = "Wall Description"
|
|
# landlord_roof_construction = None
|
|
# landlord_heating_system = "HEAT Code"
|
|
# landlord_existing_pv = None
|
|
# landlord_property_id = "Place ref"
|
|
# landlord_sap = "EPC SAP"
|
|
# outcomes_filename = None
|
|
# outcomes_sheetname = None
|
|
# outcomes_postcode = None
|
|
# outcomes_houseno = None
|
|
# outcomes_id = None
|
|
# outcomes_address = None
|
|
# master_filepaths = []
|
|
# master_to_asset_list_filepath = None
|
|
# phase = False
|
|
# ecosurv_landlords = None
|
|
# asset_list_header = 0
|
|
# landlord_block_reference = None
|
|
# master_id_colnames = []
|
|
|
|
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Calico"
|
|
# data_filename = "07.04 CALICO - Final List.xlsx"
|
|
# asset_list_header = 2
|
|
# sheet_name = "Final List"
|
|
# postcode_column = 'Postcode'
|
|
# fulladdress_column = None
|
|
# address1_column = "Property Number / Name"
|
|
# address1_method = None
|
|
# address_cols_to_concat = [
|
|
# "Property Number / Name",
|
|
# "Street",
|
|
# "Town"
|
|
# ]
|
|
# missing_postcodes_method = None
|
|
# landlord_year_built = "NROSH Estimated Build Date"
|
|
# landlord_os_uprn = None
|
|
# landlord_property_type = "Asset Type"
|
|
# landlord_built_form = None
|
|
# landlord_wall_construction = "Wall Type"
|
|
# landlord_heating_system = "Boiler Type"
|
|
# landlord_existing_pv = None
|
|
# landlord_property_id = "Asset Reference"
|
|
# outcomes_filename = []
|
|
# outcomes_sheetname = []
|
|
# outcomes_postcode = []
|
|
# outcomes_houseno = []
|
|
# outcomes_id = []
|
|
# outcomes_address = []
|
|
# master_filepaths = []
|
|
# master_id_colnames = []
|
|
# master_to_asset_list_filepath = None
|
|
# landlord_roof_construction = None
|
|
# landlord_block_reference = None
|
|
# landlord_sap = "Current Efficiency Rating - Score"
|
|
# phase = None
|
|
# ecosurv_landlords = None
|
|
|
|
# data_folder = (
|
|
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/2018 Asset
|
|
# List"
|
|
# )
|
|
# data_filename = "LIVEWEST STOCK - 23rd October 2018.xlsx"
|
|
# sheet_name = "Assets"
|
|
# postcode_column = 'Postcode'
|
|
# fulladdress_column = "Address"
|
|
# address1_column = None
|
|
# address1_method = "house_number_extraction"
|
|
# address_cols_to_concat = []
|
|
# missing_postcodes_method = None
|
|
# landlord_year_built = "Build Year"
|
|
# landlord_os_uprn = None
|
|
# landlord_property_type = "Property Archetype"
|
|
# landlord_built_form = None
|
|
# landlord_wall_construction = None
|
|
# landlord_heating_system = "Heating Fuel Type"
|
|
# landlord_existing_pv = None
|
|
# landlord_property_id = "Uprn - DO NOT DELETE"
|
|
# outcomes_filename = [
|
|
# os.path.join(data_folder, "RT - LiveWest.xlsx")
|
|
# ]
|
|
# outcomes_sheetname = ["Feedback"]
|
|
# outcomes_postcode = ["Poscode"]
|
|
# outcomes_houseno = ["No."]
|
|
# outcomes_id = ["UPRN"]
|
|
# outcomes_address = ["Address"]
|
|
# master_filepaths = [
|
|
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/Rolling
|
|
# Master "
|
|
# "- redacted for analysis/CAVITY-Table 1.csv"
|
|
# ]
|
|
# master_id_colnames = [None]
|
|
# master_to_asset_list_filepath = None
|
|
# landlord_roof_construction = None
|
|
# landlord_block_reference = None
|
|
# landlord_sap = None
|
|
# phase = None
|
|
# ecosurv_landlords = "livewest|live west"
|
|
|
|
# data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March "
|
|
# "2025/Livewest Asset List (Original) - csv")
|
|
# data_filename = "Report-Table 1.csv"
|
|
# sheet_name = None
|
|
# postcode_column = 'Postcode'
|
|
# fulladdress_column = "T1_Address"
|
|
# address1_column = None
|
|
# address1_method = "house_number_extraction"
|
|
# address_cols_to_concat = []
|
|
# missing_postcodes_method = None
|
|
# landlord_year_built = "Build Yr"
|
|
# landlord_os_uprn = None
|
|
# landlord_property_type = "T1_AssetType"
|
|
# landlord_built_form = "T1_AssetType"
|
|
# landlord_wall_construction = "Wall Type Cavity"
|
|
# landlord_heating_system = "Heating Fuel"
|
|
# landlord_existing_pv = None
|
|
# landlord_property_id = "T1_UPRN"
|
|
# outcomes_filename = [
|
|
# os.path.join(data_folder, "RT - LiveWest.xlsx")
|
|
# ]
|
|
# outcomes_address = ["Address"]
|
|
# outcomes_sheetname = ["Feedback"]
|
|
# outcomes_postcode = ["Poscode"]
|
|
# outcomes_houseno = ["No."]
|
|
# outcomes_id = ["UPRN"]
|
|
# master_filepaths = [
|
|
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/Rolling
|
|
# Master "
|
|
# "- redacted for analysis/CAVITY-Table 1.csv"
|
|
# ]
|
|
# master_id_colnames = [None]
|
|
# master_to_asset_list_filepath = None
|
|
# landlord_roof_construction = None
|
|
# landlord_block_reference = None
|
|
# landlord_sap = None
|
|
# phase = None
|
|
# ecosurv_landlords = "livewest|live west"
|
|
|
|
# Stori
|
|
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Storicymru"
|
|
# data_filename = "Asset list - for analysis.xlsx"
|
|
# sheet_name = "SAP and Costs Calculations"
|
|
# postcode_column = 'Postcode'
|
|
# fulladdress_column = "Address1"
|
|
# address1_column = None
|
|
# address1_method = "house_number_extraction"
|
|
# address_cols_to_concat = []
|
|
# missing_postcodes_method = None
|
|
# landlord_year_built = "Age"
|
|
# landlord_os_uprn = None
|
|
# landlord_property_type = "TYPE"
|
|
# landlord_built_form = "AGE / DETACHMENT"
|
|
# landlord_wall_construction = "WALL"
|
|
# landlord_roof_construction = "LOFT INSULATION"
|
|
# landlord_heating_system = "BOILER"
|
|
# landlord_existing_pv = "SOLAR PV"
|
|
# landlord_property_id = "UPRN"
|
|
# landlord_sap = "Current SAP Rating"
|
|
# landlord_block_reference = None
|
|
# outcomes_filename = []
|
|
# outcomes_sheetname = []
|
|
# outcomes_postcode = []
|
|
# outcomes_houseno = []
|
|
# outcomes_id = []
|
|
# outcomes_address = []
|
|
# master_filepaths = []
|
|
# master_to_asset_list_filepath = None
|
|
# master_id_colnames = []
|
|
# phase = False
|
|
# ecosurv_landlords = None
|
|
|
|
# Thrive - reconciliation
|
|
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation"
|
|
# data_filename = "Thrive Asset List - Complete - Updated May 2025.xlsx"
|
|
# sheet_name = "Sheet1"
|
|
# postcode_column = 'postcode'
|
|
# fulladdress_column = "full_address"
|
|
# address1_column = "address_line_1"
|
|
# address1_method = None
|
|
# address_cols_to_concat = []
|
|
# missing_postcodes_method = None
|
|
# landlord_year_built = "age_band_calculated"
|
|
# landlord_os_uprn = None
|
|
# landlord_property_type = "property_type"
|
|
# landlord_built_form = "build_form"
|
|
# landlord_wall_construction = None
|
|
# landlord_roof_construction = "assumed_loft_insulation_thickness_updated"
|
|
# landlord_heating_system = "heating_type_updated"
|
|
# landlord_existing_pv = None
|
|
# landlord_property_id = "thrive_property_id"
|
|
# landlord_sap = "sap_rating_updated"
|
|
# landlord_block_reference = "block_reference"
|
|
# outcomes_filename = [
|
|
# os.path.join(data_folder, "Thrive - Outcomes - April 24-March25 - Corrected.xlsx")
|
|
# ]
|
|
# outcomes_sheetname = ["Sheet1"]
|
|
# outcomes_postcode = ["postcode"]
|
|
# outcomes_houseno = ["No."]
|
|
# outcomes_id = ["thrive_property_id"]
|
|
# outcomes_address = ["address"]
|
|
# master_filepaths = [
|
|
# os.path.join(data_folder, "Thrive Submissions ECO3 - with IDS.csv"),
|
|
# os.path.join(data_folder, "Thrive Submissions ECO4 - with IDS.csv"),
|
|
# ]
|
|
# master_to_asset_list_filepath = None
|
|
# master_id_colnames = ["thrive_property_id", "thrive_property_id"]
|
|
# phase = False
|
|
# ecosurv_landlords = "thrive"
|
|
|
|
# Southern Midlands
|
|
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/Midlands Properties - Apr 2025"
|
|
# data_filename = "Southern Housing Midlands Property List - combined.xlsx"
|
|
# sheet_name = "Sheet 1"
|
|
# postcode_column = 'Post Code'
|
|
# fulladdress_column = "Address"
|
|
# address1_column = None
|
|
# address1_method = "house_number_extraction"
|
|
# address_cols_to_concat = []
|
|
# missing_postcodes_method = None
|
|
# landlord_year_built = "Age_1"
|
|
# landlord_os_uprn = None
|
|
# landlord_property_type = "Prop_Type"
|
|
# landlord_built_form = "Prop_Type"
|
|
# landlord_wall_construction = "Walls_P"
|
|
# landlord_heating_system = "Heating System"
|
|
# landlord_existing_pv = None
|
|
# landlord_property_id = "AssetID"
|
|
# outcomes_filename = None
|
|
# outcomes_sheetname = None
|
|
# outcomes_postcode = None
|
|
# outcomes_houseno = None
|
|
# outcomes_id = None
|
|
# outcomes_address = None
|
|
# master_filepaths = []
|
|
# master_to_asset_list_filepath = None
|
|
|
|
# Maps addresses to uprn in problematic cases
|
|
manual_uprn_map = {}
|
|
|
|
asset_list = AssetList(
|
|
local_filepath=os.path.join(data_folder, data_filename),
|
|
header=asset_list_header,
|
|
sheet_name=sheet_name,
|
|
address1_colname=address1_column,
|
|
postcode_colname=postcode_column,
|
|
landlord_property_id=landlord_property_id,
|
|
full_address_colname=fulladdress_column,
|
|
full_address_cols_to_concat=address_cols_to_concat,
|
|
missing_postcodes_method=missing_postcodes_method,
|
|
address1_extraction_method=address1_method,
|
|
landlord_year_built=landlord_year_built,
|
|
landlord_uprn=landlord_os_uprn,
|
|
landlord_property_type=landlord_property_type,
|
|
landlord_built_form=landlord_built_form,
|
|
landlord_wall_construction=landlord_wall_construction,
|
|
landlord_roof_construction=landlord_roof_construction,
|
|
landlord_heating_system=landlord_heating_system,
|
|
landlord_existing_pv=landlord_existing_pv,
|
|
landlord_sap=landlord_sap,
|
|
landlord_block_reference=landlord_block_reference,
|
|
phase=phase
|
|
)
|
|
asset_list.init_standardise()
|
|
|
|
# We produce the new maps, which can be saved for future useage
|
|
new_property_type_map = {
|
|
k: v for k, v in (
|
|
asset_list.variable_mappings[asset_list.landlord_property_type] if
|
|
asset_list.landlord_property_type else {}
|
|
).items()
|
|
if k not in PROPERTY_MAPPING
|
|
}
|
|
new_built_form_map = {
|
|
k: v for k, v in (
|
|
asset_list.variable_mappings[asset_list.landlord_built_form] if
|
|
asset_list.landlord_built_form else {}
|
|
).items()
|
|
if k not in BUILT_FORM_MAPPINGS
|
|
}
|
|
new_wall_map = {
|
|
k: v for k, v in (
|
|
asset_list.variable_mappings[asset_list.landlord_wall_construction] if
|
|
asset_list.landlord_wall_construction else {}
|
|
).items()
|
|
if k not in WALL_CONSTRUCTION_MAPPINGS
|
|
}
|
|
new_heating_map = {
|
|
k: v for k, v in (
|
|
asset_list.variable_mappings[asset_list.landlord_heating_system] if
|
|
asset_list.landlord_heating_system else {}
|
|
).items()
|
|
if k not in HEATING_MAPPINGS
|
|
}
|
|
new_existing_pv_map = {
|
|
k: v for k, v in (
|
|
asset_list.variable_mappings[asset_list.landlord_existing_pv] if asset_list.landlord_existing_pv else {}
|
|
).items()
|
|
if k not in EXISTING_PV_MAPPINGS
|
|
}
|
|
new_roof_construction_map = {
|
|
k: v for k, v in (
|
|
asset_list.variable_mappings[asset_list.landlord_roof_construction] if
|
|
asset_list.landlord_roof_construction else {}
|
|
).items()
|
|
if k not in ROOF_CONSTRUCTION_MAPPINGS
|
|
}
|
|
|
|
asset_list.apply_standardiation()
|
|
|
|
# We now flag properties that have been treated under existing programmes
|
|
asset_list.flag_outcomes(
|
|
outcomes_filepaths=outcomes_filename,
|
|
outcomes_sheetname=outcomes_sheetname,
|
|
outcomes_address=outcomes_address,
|
|
outcomes_postcode=outcomes_postcode,
|
|
outcomes_houseno=outcomes_houseno,
|
|
outcomes_id=outcomes_id
|
|
)
|
|
|
|
asset_list.flag_survey_master(
|
|
master_filepaths=master_filepaths,
|
|
master_to_asset_list_filepath=master_to_asset_list_filepath,
|
|
master_id_colnames=master_id_colnames,
|
|
)
|
|
|
|
asset_list.flag_ecosurv(ecosurv_landlords)
|
|
|
|
### We retrieve the EPC data
|
|
|
|
# We chunk up this data into 5000 rows at a time
|
|
# Create the chunks directory
|
|
epc_api_only = False
|
|
force_retrieve_data = False
|
|
skip = None # Used to skip already completed chunks
|
|
chunk_size = 2000
|
|
filename = "Chunk {i}.csv"
|
|
download_folder = os.path.join(data_folder, "Chunks")
|
|
if not os.path.exists(download_folder):
|
|
os.makedirs(download_folder)
|
|
|
|
chunk_indexes = list(range(0, len(asset_list.standardised_asset_list), chunk_size))
|
|
downloaded_files = {filename.format(i=i) for i in chunk_indexes}
|
|
|
|
# We check if we have files associated to these files already and if we do, and we do not want to force the
|
|
# fetching of the data, we skip
|
|
folder_contents = os.listdir(download_folder)
|
|
if all(x in folder_contents for x in downloaded_files):
|
|
skip = max(chunk_indexes)
|
|
|
|
if any(x in folder_contents for x in downloaded_files):
|
|
skip = max([i for i in chunk_indexes if filename.format(i=i) in folder_contents])
|
|
|
|
for i in range(0, len(asset_list.standardised_asset_list), chunk_size):
|
|
print(f"Processing chunk {i} to {i + chunk_size}")
|
|
if skip is not None and not force_retrieve_data:
|
|
if i <= skip:
|
|
continue
|
|
chunk = asset_list.standardised_asset_list[i:i + chunk_size]
|
|
epc_data_chunk, errors_chunk, no_epc_chunk = get_data(
|
|
df=chunk,
|
|
row_id_name=asset_list.DOMNA_PROPERTY_ID,
|
|
uprn_column=AssetList.STANDARD_UPRN,
|
|
fulladdress_column=AssetList.STANDARD_FULL_ADDRESS,
|
|
address1_column=AssetList.STANDARD_ADDRESS_1,
|
|
postcode_column=AssetList.STANDARD_POSTCODE,
|
|
property_type_column=AssetList.STANDARD_PROPERTY_TYPE,
|
|
built_form_column=AssetList.STANDARD_BUILT_FORM,
|
|
manual_uprn_map=manual_uprn_map,
|
|
epc_api_only=epc_api_only,
|
|
epc_auth_token=EPC_AUTH_TOKEN
|
|
)
|
|
|
|
# We now retrieve any failed properties
|
|
chunk_failed = chunk[chunk[asset_list.DOMNA_PROPERTY_ID].isin(errors_chunk)]
|
|
epc_data_failed, _, _ = get_data(
|
|
df=chunk_failed,
|
|
row_id_name=asset_list.DOMNA_PROPERTY_ID,
|
|
uprn_column=AssetList.STANDARD_UPRN,
|
|
fulladdress_column=AssetList.STANDARD_FULL_ADDRESS,
|
|
address1_column=AssetList.STANDARD_ADDRESS_1,
|
|
postcode_column=AssetList.STANDARD_POSTCODE,
|
|
property_type_column=AssetList.STANDARD_PROPERTY_TYPE,
|
|
built_form_column=AssetList.STANDARD_BUILT_FORM,
|
|
manual_uprn_map=manual_uprn_map,
|
|
epc_api_only=epc_api_only,
|
|
epc_auth_token=EPC_AUTH_TOKEN
|
|
)
|
|
|
|
epc_data_chunk.extend(epc_data_failed)
|
|
|
|
# Append the failed data to the main data
|
|
# Store the chunk locally as a csv
|
|
pd.DataFrame(epc_data_chunk).to_csv(os.path.join(data_folder, f"Chunks/Chunk {i}.csv"), index=False)
|
|
# Store the errors and no-data locally
|
|
with open(os.path.join(data_folder, f"Chunks/Chunk {i} errors.json"), "w") as f:
|
|
json.dump(errors_chunk, f)
|
|
|
|
with open(os.path.join(data_folder, f"Chunks/Chunk {i} nodata.csv"), "w") as f:
|
|
json.dump(no_epc_chunk, f)
|
|
|
|
# We read in and concatenate the created created chunks
|
|
# List the contents
|
|
epc_data = []
|
|
for file in downloaded_files:
|
|
csv_data = pd.read_csv(os.path.join(download_folder, file))
|
|
# We need to convert the recommendations back to a list
|
|
csv_data["recommendations"] = csv_data["recommendations"].apply(eval)
|
|
# We don't have this if we didn't run the pulling from find my epc
|
|
if "find_my_epc_data" in csv_data.columns:
|
|
csv_data["find_my_epc_data"] = csv_data["find_my_epc_data"].apply(eval)
|
|
epc_data.append(csv_data)
|
|
|
|
epc_df = pd.concat(epc_data)
|
|
if "estimated" not in epc_df.columns:
|
|
epc_df["estimated"] = False
|
|
|
|
epc_df["estimated"] = epc_df["estimated"].fillna(False)
|
|
|
|
# We expand out the recommendations
|
|
recommendations_df = epc_df[[asset_list.DOMNA_PROPERTY_ID, "recommendations"]]
|
|
|
|
unique_recommendations = set()
|
|
for _, row in recommendations_df.iterrows():
|
|
unique_recommendations.update([rec["improvement-summary-text"] for rec in row["recommendations"]])
|
|
|
|
columns = [asset_list.DOMNA_PROPERTY_ID] + list(unique_recommendations)
|
|
transformed_data = []
|
|
for _, row in recommendations_df.iterrows():
|
|
# Initialize a dictionary for this row with False for all recommendations
|
|
row_data = {col: False for col in columns}
|
|
row_data[asset_list.DOMNA_PROPERTY_ID] = row[asset_list.DOMNA_PROPERTY_ID]
|
|
|
|
# Set True for each recommendation present in this row
|
|
for rec in row["recommendations"]:
|
|
recommendation_text = rec["improvement-summary-text"]
|
|
row_data[recommendation_text] = True
|
|
|
|
# Append the row data to transformed_data
|
|
transformed_data.append(row_data)
|
|
|
|
transformed_df = pd.DataFrame(transformed_data)
|
|
for col in [
|
|
"Floor insulation (solid floor)",
|
|
"Floor insulation", "Floor insulation (suspended floor)"
|
|
]:
|
|
if col not in transformed_df.columns:
|
|
transformed_df[col] = False
|
|
transformed_df = transformed_df[
|
|
[
|
|
asset_list.DOMNA_PROPERTY_ID, "Floor insulation (solid floor)",
|
|
"Floor insulation", "Floor insulation (suspended floor)"
|
|
]
|
|
]
|
|
|
|
transformed_df["epc_has_floor_recommendation"] = (
|
|
transformed_df["Floor insulation (solid floor)"] | transformed_df["Floor insulation"] |
|
|
transformed_df["Floor insulation (suspended floor)"]
|
|
)
|
|
|
|
# Get the find my epc data
|
|
if "find_my_epc_data" not in epc_df.columns:
|
|
epc_df["find_my_epc_data"] = None
|
|
|
|
find_my_epc_data = []
|
|
for _, x in epc_df.iterrows():
|
|
if x["find_my_epc_data"]:
|
|
find_my_epc_data.append(
|
|
{
|
|
asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID],
|
|
**x["find_my_epc_data"]
|
|
}
|
|
)
|
|
else:
|
|
find_my_epc_data.append(
|
|
{
|
|
asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID]
|
|
}
|
|
)
|
|
|
|
find_my_epc_data = pd.DataFrame(find_my_epc_data)
|
|
|
|
find_my_epc_data = find_my_epc_data.merge(
|
|
transformed_df[[asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"]],
|
|
how="left", on=asset_list.DOMNA_PROPERTY_ID
|
|
)
|
|
|
|
# We check if we get the solar pv column:
|
|
if "Solar photovoltaics" not in find_my_epc_data.columns:
|
|
find_my_epc_data["Solar photovoltaics"] = False
|
|
|
|
# Retrieve just the data we need
|
|
epc_df = epc_df[
|
|
[asset_list.DOMNA_PROPERTY_ID] + list(asset_list.EPC_API_DATA_NAMES.keys())
|
|
].rename(
|
|
columns=asset_list.EPC_API_DATA_NAMES
|
|
)
|
|
|
|
# Look for columns not in the find my EPC data, which will have happened if we didn't
|
|
# retrieve it in the first place
|
|
missed_find_epc_cols = [c for c in list(asset_list.FIND_EPC_DATA_NAMES.keys()) if c not in find_my_epc_data.columns]
|
|
if missed_find_epc_cols:
|
|
for c in missed_find_epc_cols:
|
|
find_my_epc_data[c] = None
|
|
|
|
epc_df = epc_df.merge(
|
|
find_my_epc_data[
|
|
[asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"] + list(asset_list.FIND_EPC_DATA_NAMES.keys())
|
|
]
|
|
.rename(columns=asset_list.FIND_EPC_DATA_NAMES),
|
|
how="left",
|
|
on=asset_list.DOMNA_PROPERTY_ID
|
|
)
|
|
|
|
asset_list.merge_data(epc_df)
|
|
asset_list.extract_attributes()
|
|
asset_list.identify_worktypes()
|
|
|
|
# We now flag the status of the property
|
|
asset_list.label_property_status()
|
|
asset_list.analyse_geographies()
|
|
asset_list.get_work_figures()
|
|
|
|
# Store as an excel
|
|
filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx"
|
|
# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
|
|
|
|
with pd.ExcelWriter(filename) as writer:
|
|
asset_list.standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False)
|
|
if asset_list.block_analysis_df is not None:
|
|
asset_list.block_analysis_df.to_excel(writer, sheet_name="Block Analysis", index=False)
|
|
# If we have outcomes, we add a tab with the outcomes
|
|
if not asset_list.outcomes_for_output.empty:
|
|
asset_list.outcomes_for_output.to_excel(writer, sheet_name="Outcomes", index=False)
|
|
|
|
if not asset_list.unmatched_submissions.empty:
|
|
asset_list.unmatched_submissions.to_excel(writer, sheet_name="Unmatched Submissions", index=False)
|
|
|
|
if not asset_list.outcomes_no_match.empty:
|
|
asset_list.outcomes_no_match.to_excel(writer, sheet_name="Unmatched Outcomes", index=False)
|
|
|
|
if not asset_list.ecosurv_no_match.empty:
|
|
asset_list.ecosurv_no_match.to_excel(writer, sheet_name="Unmatched Ecosurv", index=False)
|
|
|
|
if not asset_list.geographical_areas.empty:
|
|
asset_list.geographical_areas.to_excel(writer, sheet_name="Geographical Areas", index=False)
|