diff --git a/asset_list/app.py b/asset_list/app.py index ec47b07d..cbb2cd93 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -59,1270 +59,6 @@ def app(): Property UPRN """ - # PFP - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/Nov 2025 Inspections" - data_filename = "Inspections List - PFP (1).xlsx" - sheet_name = "Sheet1" - postcode_column = 'Postcode' - address1_column = "Address 1" - address1_method = None - fulladdress_column = None - address_cols_to_concat = ["Address 1", "Address 2", "Address 3"] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = "Archetype" # Using inspections - landlord_built_form = "Archetype 2" # Using inspections - landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = None - landlord_existing_pv = None - landlord_property_id = "UPRN" - landlord_sap = None - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_id_colnames = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - asset_list_header = 0 - landlord_block_reference = None - - # Stonewater Solar - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/October 2025 Solar" - data_filename = "Copy of AP Stonewater Ammended address list - PV AM Amended - Khalim initial review.xlsx" - sheet_name = "Proposed Sheet" - postcode_column = 'Postcode' - address1_column = None - address1_method = "house_number_extraction" - fulladdress_column = "Address" - address_cols_to_concat = [] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = "Property Type" - landlord_built_form = "Property Type" - landlord_wall_construction = "Walls" - landlord_roof_construction = "Roofs" - landlord_heating_system = "Heating" - landlord_existing_pv = None - landlord_property_id = "Asset Id" - landlord_sap = "SAP" - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_id_colnames = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - asset_list_header = 0 - landlord_block_reference = None - - # - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Cambridge/" - data_filename = "22.10_Cambridge_west addresses.xlsx" - sheet_name = "Asset List" - postcode_column = 'Postcode' - address1_column = None - address1_method = "house_number_extraction" - fulladdress_column = "Full Address" - address_cols_to_concat = [] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = None - landlord_built_form = None - landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = None - landlord_existing_pv = None - landlord_property_id = "id" - landlord_sap = None - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_id_colnames = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - asset_list_header = 0 - landlord_block_reference = None - - # Property Box - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NRLA/Property Box" - data_filename = "Property Box Finance Portfolio.xlsx" - sheet_name = "Sheet1" - postcode_column = 'Postcode' - address1_column = None - address1_method = "house_number_extraction" - fulladdress_column = "Address 1" - address_cols_to_concat = [] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = None - landlord_built_form = None - landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = None - landlord_existing_pv = None - landlord_property_id = "row_id" - landlord_sap = None - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_id_colnames = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - asset_list_header = 0 - landlord_block_reference = "block_id" - - # CDS - able-to-pay - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/CDS/Able to pay" - data_filename = "CDS_ASSET LIST_(2314).xlsx" - sheet_name = "Sheet1" - postcode_column = 'Property Address - Postcode' - address1_column = "Property Address - Line 1" - address1_method = None - fulladdress_column = "Property Address - Line 1" - address_cols_to_concat = [] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = None - landlord_built_form = None - landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = None - landlord_existing_pv = None - landlord_property_id = "row_id" - landlord_sap = None - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_id_colnames = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - asset_list_header = 0 - landlord_block_reference = None - - # Hyde - solar - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hyde/Solar" - data_filename = "Domna Property Analysis HYDE (Chichester Removed)V2-Completed.xlsx" - sheet_name = "Electric Property Inspections" - postcode_column = 'Postcode' - address1_column = None # Is only patchily populated so we create it - address1_method = 'house_number_extraction' - fulladdress_column = "Address" - address_cols_to_concat = [] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = "Property Type" - landlord_built_form = "Property Type" - landlord_wall_construction = "Walls " - landlord_roof_construction = "Roofs" - landlord_heating_system = "Heating" - landlord_existing_pv = None - landlord_property_id = "Address ID" - landlord_sap = "SAP" - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_id_colnames = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - asset_list_header = 0 - landlord_block_reference = None - - # Hyde cavity - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hyde/Cavity" - data_filename = "Domna Property Analysis HYDE (Chichester Removed)V2-Completed.xlsx" - sheet_name = "Cavity Inspections" - postcode_column = 'Postcode' - address1_column = None # Is only patchily populated so we create it - address1_method = 'house_number_extraction' - fulladdress_column = "Address" - address_cols_to_concat = [] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = "Property Type" - landlord_built_form = "Property Type" - landlord_wall_construction = "Walls " - landlord_roof_construction = "Roofs" - landlord_heating_system = "Heating" - landlord_existing_pv = None - landlord_property_id = "Address ID" - landlord_sap = "SAP" - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_id_colnames = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - asset_list_header = 0 - landlord_block_reference = None - - # CDS - Sept 2025 - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/CDS/September 2025 Programme" - data_filename = "Founder Estates CDS.xlsx" - sheet_name = "Combined List" - postcode_column = 'Postcode' - address1_column = None # Is only patchily populated so we create it - address1_method = 'house_number_extraction' - fulladdress_column = "Address" - address_cols_to_concat = [] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = "Property Type" - landlord_built_form = None - landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = "Heating Type" - landlord_existing_pv = None - landlord_property_id = "(Do Not Modify) Property" - landlord_sap = None - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_id_colnames = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - asset_list_header = 0 - landlord_block_reference = None - - # Project from Nick - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/sfr/October 2025 AL portfolio" - data_filename = "22.10 AL Portfolio.xlsx" - sheet_name = "22.10 AL Portfolio" - postcode_column = 'Postcode' - address1_column = None - address1_method = 'house_number_extraction' - fulladdress_column = "Address" - address_cols_to_concat = [] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = None - landlord_built_form = None - landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = None - landlord_existing_pv = None - landlord_property_id = "Row ID" - landlord_sap = None - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_id_colnames = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - asset_list_header = 0 - landlord_block_reference = None - - # Lambeth - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lambeth" - data_filename = "LAMBETH Asset List ( Incomplete).xlsx" - sheet_name = "Green properties" - postcode_column = 'SX3 Postcode' - address1_column = "SX3 Short Address" - address1_method = None - fulladdress_column = None - address_cols_to_concat = ["SX3 Short Address"] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = "Property Type" - landlord_built_form = None - landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = None - landlord_existing_pv = None - landlord_property_id = "row_id" - landlord_sap = None - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_id_colnames = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - asset_list_header = 0 - landlord_block_reference = None - - # # Colchester - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/Aug2025 202 inspections" - # data_filename = "Colchester Borough Homes - Inspections - Additional 202 Addresses JW 280725 copy.xlsx" - # sheet_name = "Extra 202 Colchester Addresses" - # postcode_column = 'domna_postcode' - # address1_column = "domna_address_1" - # address1_method = None - # fulladdress_column = "domna_full_address" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = None - # landlord_os_uprn = None - # landlord_property_type = "landlord_property_type" - # landlord_built_form = "landlord_built_form" - # landlord_wall_construction = None - # landlord_roof_construction = None - # landlord_heating_system = None - # landlord_existing_pv = None - # landlord_property_id = "landlord_property_id" - # landlord_sap = None - # outcomes_filename = None - # outcomes_sheetname = None - # outcomes_postcode = None - # outcomes_houseno = None - # outcomes_id = None - # outcomes_address = None - # master_filepaths = [] - # master_id_colnames = [] - # master_to_asset_list_filepath = None - # phase = False - # ecosurv_landlords = None - # asset_list_header = 0 - # landlord_block_reference = "landlord_block_reference" - - # # Abri - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Abri/Post Inspections" - # data_filename = "Desktop ABRI data - Standardised After Programmes (2).xlsx" - # sheet_name = "Reviewed List" - # postcode_column = 'domna_postcode' - # address1_column = "domna_address_1" - # address1_method = None - # fulladdress_column = "domna_full_address" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = "landlord_year_built" - # landlord_os_uprn = None - # landlord_property_type = "PropertyType_original_from_landlord" - # landlord_built_form = "BuildForm_original_from_landlord" - # landlord_wall_construction = "Wall Construction_original_from_landlord" - # landlord_roof_construction = None - # landlord_heating_system = "HeatingType_original_from_landlord" - # landlord_existing_pv = None - # landlord_property_id = "landlord_property_id" - # landlord_sap = None - # outcomes_filename = None - # outcomes_sheetname = None - # outcomes_postcode = None - # outcomes_houseno = None - # outcomes_id = None - # outcomes_address = None - # master_filepaths = [] - # master_id_colnames = [] - # master_to_asset_list_filepath = None - # phase = False - # ecosurv_landlords = None - # asset_list_header = 0 - # landlord_block_reference = None - - # Freebridge - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Freebridge/Aug2025 programme" - data_filename = "Domna - FCH property data May 25 copy.xlsx" - sheet_name = "EPC Data" - postcode_column = 'Post Code' - address1_column = "Address 1" - address1_method = None - fulladdress_column = None - address_cols_to_concat = ["Address 1", "Address 4"] - missing_postcodes_method = None - landlord_year_built = "Build Date" - landlord_os_uprn = None - landlord_property_type = "Property Type" - landlord_built_form = None - landlord_wall_construction = "Walls Description" - landlord_heating_system = "Heating Type" - landlord_existing_pv = None - landlord_property_id = "Place Ref" - landlord_roof_construction = "Roof Description" - landlord_sap = "Current SAP" - outcomes_filename = [] - outcomes_sheetname = [] - outcomes_postcode = [] - outcomes_houseno = [] - outcomes_address = [] - outcomes_id = [] - master_filepaths = [] - master_to_asset_list_filepath = None - asset_list_header = 0 - landlord_block_reference = None - master_id_colnames = [] - phase = False # Inspections not complete, produce a partial view - ecosurv_landlords = None - - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Broadlands" - # data_filename = "Broadlands Asset List.xlsx" - # sheet_name = "Assets" - # postcode_column = 'POSTCODE' - # fulladdress_column = None - # address1_column = "Address1" - # address1_method = None - # address_cols_to_concat = ["Address1"] - # missing_postcodes_method = None - # landlord_year_built = "DATEBUILT" - # landlord_os_uprn = None - # landlord_property_type = "PropertyType" - # landlord_built_form = "PropertyType" - # landlord_wall_construction = None - # landlord_heating_system = "Heating Fuel" - # landlord_existing_pv = None - # landlord_property_id = "Row ID" - # outcomes_filename = [os.path.join(data_folder, "outcomes.xlsx")] - # outcomes_sheetname = ["Sheet1"] - # outcomes_postcode = ["Postcode"] - # outcomes_houseno = ["No."] - # outcomes_address = ["Address"] - # outcomes_id = [None] - # master_filepaths = [ - # os.path.join(data_folder, "eco3 submissions.csv"), - # os.path.join(data_folder, "eco4 submissions.csv"), - # ] - # master_to_asset_list_filepath = None - # asset_list_header = 0 - # landlord_block_reference = None - # master_id_colnames = [None, None] - # landlord_roof_construction = None - # phase = False - # landlord_sap = None - # ecosurv_landlords = "broadland" - # # - # - # # Community: - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Community Housing/New Programme" - # data_filename = "SUB EPC C to DOMNA - 24.07.25.xlsx" - # sheet_name = "Sheet1" - # postcode_column = 'POSTCODE' - # fulladdress_column = "ADDRESS" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = "BUILD DATE" - # landlord_os_uprn = None - # landlord_property_type = "PROPERTY TYPE" - # landlord_built_form = "Archetype" # Using the inspections archetype - # landlord_wall_construction = "CONSTRUCTION TYPE" - # landlord_roof_construction = None - # landlord_heating_system = None - # landlord_existing_pv = None - # landlord_property_id = "UPRN" - # landlord_sap = None - # outcomes_filename = [] - # outcomes_sheetname = [] - # outcomes_postcode = [] - # outcomes_houseno = [] - # outcomes_id = [] - # outcomes_address = [] - # master_filepaths = [] - # master_to_asset_list_filepath = None - # phase = False - # ecosurv_landlords = None - # asset_list_header = 1 - # landlord_block_reference = None - # master_id_colnames = [] - # - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Programme Analysis" - # data_filename = "EalingProjectRebuildJW210725.xlsx" - # sheet_name = "Refine & Houses" - # postcode_column = 'Postcode' - # fulladdress_column = "Address" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = None - # landlord_os_uprn = None - # landlord_property_type = None # Using the inspections property type - # landlord_built_form = None - # landlord_wall_construction = None - # landlord_roof_construction = None - # landlord_heating_system = None - # landlord_existing_pv = None - # landlord_property_id = "Property ref" - # landlord_sap = None - # outcomes_filename = [] - # outcomes_sheetname = [] - # outcomes_postcode = [] - # outcomes_houseno = [] - # outcomes_id = [] - # outcomes_address = [] - # master_filepaths = [] - # master_to_asset_list_filepath = None - # phase = False - # ecosurv_landlords = None - # asset_list_header = 0 - # landlord_block_reference = "Block Reference" - # master_id_colnames = [] - # - # # TODO: Delete me - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NRLA/" - # data_filename = "20250716 Asset List.xlsx" - # sheet_name = "Sheet 1" - # postcode_column = 'Postcode' - # fulladdress_column = "Full Address" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = None - # landlord_os_uprn = None - # landlord_property_type = None - # landlord_built_form = None - # landlord_wall_construction = None - # landlord_heating_system = None - # landlord_existing_pv = None - # landlord_property_id = "Row ID" - # outcomes_filename = [] - # outcomes_sheetname = [] - # outcomes_postcode = [] - # outcomes_houseno = [] - # outcomes_address = [] - # outcomes_id = [] - # master_filepaths = [] - # master_to_asset_list_filepath = None - # asset_list_header = 0 - # landlord_block_reference = None - # master_id_colnames = [] - # landlord_roof_construction = None - # phase = False - # landlord_sap = None - # ecosurv_landlords = None - # - # # Southend - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southend/July 2025 Programme" - # data_filename = "SOUTHEND - RYAN.xlsx" - # sheet_name = "July 2025 Surveys" - # postcode_column = 'Postcode' - # fulladdress_column = "Full postal address" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = "Property age" - # landlord_os_uprn = None - # landlord_property_type = "Property type" - # landlord_built_form = "Property type" - # landlord_wall_construction = None - # landlord_heating_system = None - # landlord_existing_pv = None - # landlord_property_id = "ID" - # outcomes_filename = [] - # outcomes_sheetname = [] - # outcomes_postcode = [] - # outcomes_houseno = [] - # outcomes_address = [] - # outcomes_id = [] - # master_filepaths = [] - # master_to_asset_list_filepath = None - # asset_list_header = 0 - # landlord_block_reference = None - # master_id_colnames = [] - # landlord_roof_construction = None - # phase = False - # landlord_sap = None - # ecosurv_landlords = None - # - # # For Rooftop - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Rooftop" - # data_filename = "Rooftop Asset List - July 2025.xlsx" - # sheet_name = "Sheet1" - # postcode_column = 'post_code' - # fulladdress_column = None - # address1_column = "add_1" - # address1_method = None - # address_cols_to_concat = [ - # "add_1", "add_2", "add_3", "add_4" - # ] - # missing_postcodes_method = None - # landlord_year_built = "date_built" - # landlord_os_uprn = None - # landlord_property_type = "ConstructionStyle" - # landlord_built_form = "ConstructionStyle" - # landlord_wall_construction = None - # landlord_heating_system = "Description" - # landlord_existing_pv = None - # landlord_property_id = "PropertyCode" - # outcomes_filename = [os.path.join(data_folder, "Rooftop_Outcomes.xlsx")] - # outcomes_sheetname = ["OUTCOMES"] - # outcomes_postcode = ["POSTCODE"] - # outcomes_houseno = ["NO"] - # outcomes_address = ["ADDRESS"] - # outcomes_id = [None] - # master_filepaths = [os.path.join(data_folder, "Master.csv")] - # master_to_asset_list_filepath = None - # asset_list_header = 1 - # landlord_block_reference = "bl_rec_ref" - # master_id_colnames = [None] - # landlord_roof_construction = None - # phase = False - # landlord_sap = None - # ecosurv_landlords = "rooftop" - # - # # For Housing - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/For Housing/New Programme July 2025" - # data_filename = "FOR HOUSING Asset List (Combined).xlsx" - # sheet_name = "Asset List" - # postcode_column = 'Postcode' - # fulladdress_column = "Address" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = None - # landlord_os_uprn = None - # landlord_property_type = "Type" - # landlord_built_form = "Type" - # landlord_wall_construction = None - # landlord_heating_system = "Heating - full" - # landlord_existing_pv = None - # landlord_property_id = "UPRN" - # outcomes_filename = [os.path.join(data_folder, "Khalim Combined - for analysis.xlsx")] - # outcomes_sheetname = ["Sheet1"] - # outcomes_postcode = ["POSTCODE"] - # outcomes_houseno = ["NO"] - # outcomes_address = ["ADDRESS"] - # outcomes_id = [None] - # master_filepaths = [os.path.join(data_folder, "submissions.csv")] - # master_to_asset_list_filepath = None - # asset_list_header = 0 - # landlord_block_reference = None - # master_id_colnames = [None] - # landlord_roof_construction = None - # phase = False - # landlord_sap = "SAP" - # ecosurv_landlords = "for housing" - # - # # CDS - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/CDS" - # data_filename = "Founder Estates - Asset List.xlsx" - # sheet_name = "Combined" - # postcode_column = 'Postcode' - # fulladdress_column = "Address" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = None - # landlord_os_uprn = None - # landlord_property_type = None - # landlord_built_form = None - # landlord_wall_construction = None - # landlord_heating_system = "Heating Type" - # landlord_existing_pv = None - # landlord_property_id = "Row ID" - # outcomes_filename = [] - # outcomes_sheetname = [] - # outcomes_postcode = [] - # outcomes_houseno = [] - # outcomes_address = [] - # outcomes_id = [] - # master_filepaths = [os.path.join(data_folder, "submissions.csv")] - # master_to_asset_list_filepath = None - # asset_list_header = 0 - # landlord_block_reference = None - # master_id_colnames = [None] - # landlord_roof_construction = None - # phase = False - # landlord_sap = None - # ecosurv_landlords = "cds" - # - # # Plus Dane - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane/New Programme July 2025/" - # data_filename = "20250711 Plus Dane Asset List.xlsx" - # sheet_name = "Sheet1" - # postcode_column = 'Postcode' - # fulladdress_column = "Address" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = "Property Age" - # landlord_os_uprn = None - # landlord_property_type = "Property Type" - # landlord_built_form = "Built Form" - # landlord_wall_construction = "Wall Construction" - # landlord_heating_system = "Full Heating System" - # landlord_existing_pv = None - # landlord_property_id = "UPRN" - # outcomes_filename = [ - # os.path.join(data_folder, "Outcomes - Plus Dane_CWI_2024.xlsx"), - # os.path.join(data_folder, "Outcomes - Plus Dane_CWI_2025.xlsx"), - # os.path.join(data_folder, "Outcomes - Plus Dane_PV_2025.xlsx"), - # ] - # outcomes_sheetname = [ - # "CWI & LI - 2024", "2025 - CWI", "PV - 2025", - # ] - # outcomes_postcode = ["Postcode", "Postcode", "Postcode"] - # outcomes_houseno = ["No.", "No", "No"] - # outcomes_address = ["Address", "Address", "Address"] - # outcomes_id = ["Asset Reference", "LL UPRN", "LL UPRN"] - # master_filepaths = [ - # os.path.join(data_folder, "submissions/JJC-Table 1.csv"), - # os.path.join(data_folder, "submissions/SCIS-Table 1.csv") - # ] - # master_to_asset_list_filepath = None - # asset_list_header = 1 - # landlord_block_reference = None - # master_id_colnames = [None, None] - # landlord_roof_construction = None - # phase = False - # landlord_sap = "SAP Rating" - # ecosurv_landlords = "plus dane" - - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Brentwood/July 2025 New Programme" - # data_filename = "20250710 Asset List Brentwood.xlsx" - # sheet_name = "Sheet1" - # postcode_column = 'Postcode' - # fulladdress_column = None - # address1_column = "House Number" - # address1_method = None - # address_cols_to_concat = ["House Number", "Address Line 1", "Address Line 2", "Address Line 3"] - # missing_postcodes_method = None - # landlord_year_built = "Year Built" - # landlord_os_uprn = None - # landlord_property_type = "Dwelling" - # landlord_built_form = None - # landlord_wall_construction = None - # landlord_heating_system = "Heating" - # landlord_existing_pv = None - # landlord_property_id = "UPRN" - # outcomes_filename = [os.path.join(data_folder, "Brentwood - outcomes for analysis.xlsx")] - # outcomes_sheetname = ["OUTCOMES"] - # outcomes_postcode = ["POSTCODE"] - # outcomes_houseno = [None] - # outcomes_address = ["ADDRESS"] - # outcomes_id = [None] - # master_filepaths = [os.path.join(data_folder, "Submissions.csv")] - # master_to_asset_list_filepath = None - # asset_list_header = 1 - # landlord_block_reference = None - # master_id_colnames = [None] - # landlord_roof_construction = None - # phase = False - # landlord_sap = None - # ecosurv_landlords = "brentwood" - - # Brentwood - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Brentwood/July 2025 New Programme" - # data_filename = "20250710 Asset List Brentwood.xlsx" - # sheet_name = "Sheet1" - # postcode_column = 'Postcode' - # fulladdress_column = None - # address1_column = "House Number" - # address1_method = None - # address_cols_to_concat = ["House Number", "Address Line 1", "Address Line 2", "Address Line 3"] - # missing_postcodes_method = None - # landlord_year_built = "Year Built" - # landlord_os_uprn = None - # landlord_property_type = "Dwelling" - # landlord_built_form = None - # landlord_wall_construction = None - # landlord_heating_system = "Heating" - # landlord_existing_pv = None - # landlord_property_id = "UPRN" - # outcomes_filename = [os.path.join(data_folder, "Brentwood - outcomes for analysis.xlsx")] - # outcomes_sheetname = ["OUTCOMES"] - # outcomes_postcode = ["POSTCODE"] - # outcomes_houseno = [None] - # outcomes_address = ["ADDRESS"] - # outcomes_id = [None] - # master_filepaths = [os.path.join(data_folder, "Submissions.csv")] - # master_to_asset_list_filepath = None - # asset_list_header = 1 - # landlord_block_reference = None - # master_id_colnames = [None] - # landlord_roof_construction = None - # phase = False - # landlord_sap = None - # ecosurv_landlords = "brentwood" - # - # # Eastlight - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Eastlight/New Programme" - # data_filename = "INSPECTIONS MASTER Non Tech.xlsx" - # sheet_name = "EASTLIGHT CW" - # postcode_column = 'Postcode' - # fulladdress_column = None - # address1_column = "HouseName" - # address1_method = None - # address_cols_to_concat = ["HouseName", "Block", "Address1", "Address2", "Address3"] - # missing_postcodes_method = None - # landlord_year_built = "Built In Year" - # landlord_os_uprn = None - # landlord_property_type = "AssetType" - # landlord_built_form = "Archetype" # Using inspections archetype - # landlord_wall_construction = None - # landlord_roof_construction = None - # landlord_heating_system = "Main Heating Source" - # landlord_existing_pv = None - # landlord_property_id = "UPRN" - # landlord_sap = "SAP Score" - # outcomes_filename = [ - # os.path.join(data_folder, "Eastlight_CWI_JJC_2025.xlsx"), - # os.path.join(data_folder, "Eastlight_CWI_SCIS_2025.xlsx"), - # ] - # outcomes_sheetname = ["Outcomes", "Feedback"] - # outcomes_postcode = ["Postcode", "Postcode"] - # outcomes_houseno = ["No", "No."] - # outcomes_id = [None, None] - # outcomes_address = ["Address", "Address"] - # master_filepaths = [ - # os.path.join(data_folder, "ECO 3-Table 1.csv"), - # os.path.join(data_folder, "ECO 4-Table 1.csv"), - # ] - # master_to_asset_list_filepath = None - # phase = False - # ecosurv_landlords = "eastlight" - # asset_list_header = 0 - # landlord_block_reference = None - # master_id_colnames = [None, None] - # landlord_sap = None - - # Pickering and Ferens - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Pickering & Ferens" - # data_filename = "SAP 9 vs SAP 10 Sava Intelligent Energy - Property List (190625).xlsx" - # sheet_name = "Sava Intelligent Energy - Prope" - # postcode_column = 'Postcode' - # fulladdress_column = 'Address' - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = None - # landlord_os_uprn = None - # landlord_property_type = "Property Type" # Using the inspections property type - # landlord_built_form = "Archetype 2" - # landlord_wall_construction = None - # landlord_roof_construction = None - # landlord_heating_system = None - # landlord_existing_pv = None - # landlord_property_id = "UPRN" - # landlord_sap = "SAP Rating (RdSAP 10)" - # outcomes_filename = [] - # outcomes_sheetname = [] - # outcomes_postcode = [] - # outcomes_houseno = [] - # outcomes_id = [] - # outcomes_address = [] - # master_filepaths = [ - # os.path.join(data_folder, "PICKERING & FERENS ROLLING MASTER SHEET HEDGEFUND - 26.7.24 - K.csv"), - # os.path.join(data_folder, "PICKERING & FERENS NEW MASTER GBIS UPDATED 21.8.24 - M - For Analysis.csv"), - # ] - # master_to_asset_list_filepath = None - # phase = False - # ecosurv_landlords = "pickering" - # asset_list_header = 0 - # landlord_block_reference = None - # master_id_colnames = [None, None] - - # Colchester - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester" - # data_filename = "Warmfront data- Colchester Borough Homes (Complete).xlsx" - # sheet_name = "Sheet1" - # postcode_column = 'Full Address.1' - # fulladdress_column = "Full Address" - # address1_column = None - # address1_method = "first_word" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = "Build Date" - # landlord_os_uprn = None - # landlord_property_type = "Property Type" - # landlord_wall_construction = "Wallinsul" - # landlord_heating_system = "HeatSorc" - # landlord_existing_pv = None - # landlord_property_id = "Property Reference" - # outcomes_filename = [] - # outcomes_sheetname = [] - # outcomes_postcode = [] - # outcomes_houseno = [] - # outcomes_id = [] - # outcomes_address = [] - # master_filepaths = [] - # master_to_asset_list_filepath = None - # asset_list_header = 0 - # landlord_built_form = None - # landlord_roof_construction = None - # landlord_sap = None - # landlord_block_reference = None - # phase = False - # ecosurv_landlords = None - # master_id_colnames = [] - - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot" - # data_filename = "EalingFlats.xlsx" - # sheet_name = "Sheet1" - # postcode_column = 'Postcode' - # fulladdress_column = "Address" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = None - # landlord_os_uprn = None - # landlord_property_type = None # Using the inspections property type - # landlord_built_form = None - # landlord_wall_construction = None - # landlord_roof_construction = None - # landlord_heating_system = None - # landlord_existing_pv = None - # landlord_property_id = "Property ref" - # landlord_sap = None - # outcomes_filename = [] - # outcomes_sheetname = [] - # outcomes_postcode = [] - # outcomes_houseno = [] - # outcomes_id = [] - # outcomes_address = [] - # master_filepaths = [] - # master_to_asset_list_filepath = None - # phase = False - # ecosurv_landlords = None - # asset_list_header = 0 - # landlord_block_reference = "Block Ref" - # master_id_colnames = [] - - # Southern - Jan list - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/New Programme/Jan 2025 List" - # data_filename = "SOUTHERN ASSETS January 2025 Additions Query 21.03.2025.xlsx" - # sheet_name = "Jan 2025 additions" - # postcode_column = 'Post Code' - # fulladdress_column = None - # address1_column = "NO." - # address1_method = None - # address_cols_to_concat = ["NO.", "Street / Block Name", "Town/Area"] - # missing_postcodes_method = None - # landlord_year_built = None - # landlord_os_uprn = None - # landlord_property_type = None # Using the inspections property type - # landlord_built_form = None - # landlord_wall_construction = None - # landlord_roof_construction = None - # landlord_heating_system = None - # landlord_existing_pv = None - # landlord_property_id = "SH Property Reference" - # landlord_sap = None - # outcomes_filename = [ - # os.path.join(data_folder, "RT - Southern Housing Group - JJC.xlsx"), - # os.path.join(data_folder, "RT - SOUTHERN OUTCOMES - SCIS Merged.xlsx"), - # ] - # outcomes_sheetname = ["Feedback", "Collated"] - # outcomes_postcode = ["Poscode", "Postcode"] - # outcomes_houseno = ["No.", "No"] - # outcomes_id = ["UPRNs", None] - # outcomes_address = ["Address", "Address"] - # master_filepaths = [ - # os.path.join(data_folder, "southern_submissions/CAVITY'S - DECEMBER 2018-Table 1.csv"), - # os.path.join(data_folder, "southern_submissions/CAVITY'S 2019-Table 1.csv"), - # os.path.join(data_folder, "southern_submissions/CAVITY'S ECO4-Table 1.csv"), - # os.path.join(data_folder, "southern_submissions/LOFT'S-Table 1.csv"), - # ] - # master_to_asset_list_filepath = None - # phase = False - # ecosurv_landlords = "southern" - # asset_list_header = 0 - # landlord_block_reference = None - # master_id_colnames = [None, None, None, None] - - # NCHA - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NCHA" - # data_filename = "Energy Information MASTER June 2025.xlsx" - # sheet_name = "Data" - # postcode_column = 'Postcode' - # fulladdress_column = "Address" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = "Build Date (HAR10)" - # landlord_os_uprn = None - # landlord_property_type = "Property Type (HAR10)" - # landlord_built_form = "Build Form (EPC)" - # landlord_wall_construction = "Wall Description" - # landlord_roof_construction = None - # landlord_heating_system = "HEAT Code" - # landlord_existing_pv = None - # landlord_property_id = "Place ref" - # landlord_sap = "EPC SAP" - # outcomes_filename = None - # outcomes_sheetname = None - # outcomes_postcode = None - # outcomes_houseno = None - # outcomes_id = None - # outcomes_address = None - # master_filepaths = [] - # master_to_asset_list_filepath = None - # phase = False - # ecosurv_landlords = None - # asset_list_header = 0 - # landlord_block_reference = None - # master_id_colnames = [] - - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Calico" - # data_filename = "07.04 CALICO - Final List.xlsx" - # asset_list_header = 2 - # sheet_name = "Final List" - # postcode_column = 'Postcode' - # fulladdress_column = None - # address1_column = "Property Number / Name" - # address1_method = None - # address_cols_to_concat = [ - # "Property Number / Name", - # "Street", - # "Town" - # ] - # missing_postcodes_method = None - # landlord_year_built = "NROSH Estimated Build Date" - # landlord_os_uprn = None - # landlord_property_type = "Asset Type" - # landlord_built_form = None - # landlord_wall_construction = "Wall Type" - # landlord_heating_system = "Boiler Type" - # landlord_existing_pv = None - # landlord_property_id = "Asset Reference" - # outcomes_filename = [] - # outcomes_sheetname = [] - # outcomes_postcode = [] - # outcomes_houseno = [] - # outcomes_id = [] - # outcomes_address = [] - # master_filepaths = [] - # master_id_colnames = [] - # master_to_asset_list_filepath = None - # landlord_roof_construction = None - # landlord_block_reference = None - # landlord_sap = "Current Efficiency Rating - Score" - # phase = None - # ecosurv_landlords = None - - # data_folder = ( - # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/2018 Asset - # List" - # ) - # data_filename = "LIVEWEST STOCK - 23rd October 2018.xlsx" - # sheet_name = "Assets" - # postcode_column = 'Postcode' - # fulladdress_column = "Address" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = "Build Year" - # landlord_os_uprn = None - # landlord_property_type = "Property Archetype" - # landlord_built_form = None - # landlord_wall_construction = None - # landlord_heating_system = "Heating Fuel Type" - # landlord_existing_pv = None - # landlord_property_id = "Uprn - DO NOT DELETE" - # outcomes_filename = [ - # os.path.join(data_folder, "RT - LiveWest.xlsx") - # ] - # outcomes_sheetname = ["Feedback"] - # outcomes_postcode = ["Poscode"] - # outcomes_houseno = ["No."] - # outcomes_id = ["UPRN"] - # outcomes_address = ["Address"] - # master_filepaths = [ - # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/Rolling - # Master " - # "- redacted for analysis/CAVITY-Table 1.csv" - # ] - # master_id_colnames = [None] - # master_to_asset_list_filepath = None - # landlord_roof_construction = None - # landlord_block_reference = None - # landlord_sap = None - # phase = None - # ecosurv_landlords = "livewest|live west" - - # data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March " - # "2025/Livewest Asset List (Original) - csv") - # data_filename = "Report-Table 1.csv" - # sheet_name = None - # postcode_column = 'Postcode' - # fulladdress_column = "T1_Address" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = "Build Yr" - # landlord_os_uprn = None - # landlord_property_type = "T1_AssetType" - # landlord_built_form = "T1_AssetType" - # landlord_wall_construction = "Wall Type Cavity" - # landlord_heating_system = "Heating Fuel" - # landlord_existing_pv = None - # landlord_property_id = "T1_UPRN" - # outcomes_filename = [ - # os.path.join(data_folder, "RT - LiveWest.xlsx") - # ] - # outcomes_address = ["Address"] - # outcomes_sheetname = ["Feedback"] - # outcomes_postcode = ["Poscode"] - # outcomes_houseno = ["No."] - # outcomes_id = ["UPRN"] - # master_filepaths = [ - # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/Rolling - # Master " - # "- redacted for analysis/CAVITY-Table 1.csv" - # ] - # master_id_colnames = [None] - # master_to_asset_list_filepath = None - # landlord_roof_construction = None - # landlord_block_reference = None - # landlord_sap = None - # phase = None - # ecosurv_landlords = "livewest|live west" - - # Stori - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Storicymru" - # data_filename = "Asset list - for analysis.xlsx" - # sheet_name = "SAP and Costs Calculations" - # postcode_column = 'Postcode' - # fulladdress_column = "Address1" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = "Age" - # landlord_os_uprn = None - # landlord_property_type = "TYPE" - # landlord_built_form = "AGE / DETACHMENT" - # landlord_wall_construction = "WALL" - # landlord_roof_construction = "LOFT INSULATION" - # landlord_heating_system = "BOILER" - # landlord_existing_pv = "SOLAR PV" - # landlord_property_id = "UPRN" - # landlord_sap = "Current SAP Rating" - # landlord_block_reference = None - # outcomes_filename = [] - # outcomes_sheetname = [] - # outcomes_postcode = [] - # outcomes_houseno = [] - # outcomes_id = [] - # outcomes_address = [] - # master_filepaths = [] - # master_to_asset_list_filepath = None - # master_id_colnames = [] - # phase = False - # ecosurv_landlords = None - - # Thrive - reconciliation - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation" - # data_filename = "Thrive Asset List - Complete - Updated May 2025.xlsx" - # sheet_name = "Sheet1" - # postcode_column = 'postcode' - # fulladdress_column = "full_address" - # address1_column = "address_line_1" - # address1_method = None - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = "age_band_calculated" - # landlord_os_uprn = None - # landlord_property_type = "property_type" - # landlord_built_form = "build_form" - # landlord_wall_construction = None - # landlord_roof_construction = "assumed_loft_insulation_thickness_updated" - # landlord_heating_system = "heating_type_updated" - # landlord_existing_pv = None - # landlord_property_id = "thrive_property_id" - # landlord_sap = "sap_rating_updated" - # landlord_block_reference = "block_reference" - # outcomes_filename = [ - # os.path.join(data_folder, "Thrive - Outcomes - April 24-March25 - Corrected.xlsx") - # ] - # outcomes_sheetname = ["Sheet1"] - # outcomes_postcode = ["postcode"] - # outcomes_houseno = ["No."] - # outcomes_id = ["thrive_property_id"] - # outcomes_address = ["address"] - # master_filepaths = [ - # os.path.join(data_folder, "Thrive Submissions ECO3 - with IDS.csv"), - # os.path.join(data_folder, "Thrive Submissions ECO4 - with IDS.csv"), - # ] - # master_to_asset_list_filepath = None - # master_id_colnames = ["thrive_property_id", "thrive_property_id"] - # phase = False - # ecosurv_landlords = "thrive" - - # Southern Midlands - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/Midlands Properties - Apr 2025" - # data_filename = "Southern Housing Midlands Property List - combined.xlsx" - # sheet_name = "Sheet 1" - # postcode_column = 'Post Code' - # fulladdress_column = "Address" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = "Age_1" - # landlord_os_uprn = None - # landlord_property_type = "Prop_Type" - # landlord_built_form = "Prop_Type" - # landlord_wall_construction = "Walls_P" - # landlord_heating_system = "Heating System" - # landlord_existing_pv = None - # landlord_property_id = "AssetID" - # outcomes_filename = None - # outcomes_sheetname = None - # outcomes_postcode = None - # outcomes_houseno = None - # outcomes_id = None - # outcomes_address = None - # master_filepaths = [] - # master_to_asset_list_filepath = None - # Maps addresses to uprn in problematic cases manual_uprn_map = {} diff --git a/backend/app/db/functions/tasks/Tasks.py b/backend/app/db/functions/tasks/Tasks.py index 06e1c6fe..7508ab2e 100644 --- a/backend/app/db/functions/tasks/Tasks.py +++ b/backend/app/db/functions/tasks/Tasks.py @@ -1,5 +1,3 @@ -from __future__ import annotations - # ---- Standard Library ---- from typing import Optional, Dict, Any from datetime import datetime, timezone @@ -28,7 +26,6 @@ class SubTaskInterface: # CREATE SUBTASK # -------------------------------------------------------- def create_subtask(self, task_id: UUID, inputs: Optional[Dict[str, Any]] = None): - now = datetime.now(timezone.utc) with get_db_session() as session: task = session.get(Task, task_id) @@ -36,11 +33,11 @@ class SubTaskInterface: raise ValueError(f"Task {task_id} not found") subtask = SubTask( - taskId=task_id, + task_id=task_id, inputs=json.dumps(inputs) if inputs else None, status="waiting", - jobStarted=None, - jobCompleted=None, + job_started=None, + job_completed=None, ) session.add(subtask) @@ -49,7 +46,7 @@ class SubTaskInterface: # Recalculate parent task progress self._update_task_progress(session, task_id) - return subtask + return subtask.id # -------------------------------------------------------- # UPDATE STATUS (in progress, complete, failed) @@ -65,21 +62,21 @@ class SubTaskInterface: normalized = status.lower() # When job really starts - if normalized == "in progress" and subtask.jobStarted is None: - subtask.jobStarted = now + if normalized == "in progress" and subtask.job_started is None: + subtask.job_started = now # Completed or failed if normalized in ("complete", "failed"): - subtask.jobCompleted = now + subtask.job_completed = now subtask.status = normalized - subtask.updatedAt = now + subtask.updated_at = now session.add(subtask) session.commit() # Recalculate task status - self._update_task_progress(session, subtask.taskId) + self._update_task_progress(session, subtask.task_id) session.refresh(subtask) return subtask @@ -87,7 +84,8 @@ class SubTaskInterface: # -------------------------------------------------------- # UPDATE OUTPUTS # -------------------------------------------------------- - def update_subtask_output(self, subtask_id: UUID, outputs: Dict[str, Any]): + @staticmethod + def update_subtask_output(subtask_id: UUID, outputs: Dict[str, Any]): now = datetime.now(timezone.utc) with get_db_session() as session: @@ -96,7 +94,7 @@ class SubTaskInterface: raise ValueError(f"SubTask {subtask_id} not found") subtask.outputs = json.dumps(outputs) - subtask.updatedAt = now + subtask.updated_at = now session.add(subtask) session.commit() @@ -106,7 +104,8 @@ class SubTaskInterface: # -------------------------------------------------------- # UPDATE CLOUD LOGS URL # -------------------------------------------------------- - def update_subtask_logs(self, subtask_id: UUID, cloud_logs_url: str): + @staticmethod + def update_subtask_logs(subtask_id: UUID, cloud_logs_url: str): now = datetime.now(timezone.utc) with get_db_session() as session: @@ -114,8 +113,8 @@ class SubTaskInterface: if not subtask: raise ValueError(f"SubTask {subtask_id} not found") - subtask.cloudLogsURL = cloud_logs_url - subtask.updatedAt = now + subtask.cloud_logs_url = cloud_logs_url + subtask.updated_at = now session.add(subtask) session.commit() @@ -125,8 +124,8 @@ class SubTaskInterface: # -------------------------------------------------------- # SET BOTH OUTPUT + LOGS # -------------------------------------------------------- + @staticmethod def set_subtask_result( - self, subtask_id: UUID, outputs: Optional[Dict[str, Any]] = None, cloud_logs_url: Optional[str] = None, @@ -142,9 +141,9 @@ class SubTaskInterface: subtask.outputs = json.dumps(outputs) if cloud_logs_url is not None: - subtask.cloudLogsURL = cloud_logs_url + subtask.cloud_logs_url = cloud_logs_url - subtask.updatedAt = now + subtask.updated_at = now session.add(subtask) session.commit() session.refresh(subtask) @@ -153,13 +152,14 @@ class SubTaskInterface: # -------------------------------------------------------- # TASK PROGRESS CALCULATION # -------------------------------------------------------- - def _update_task_progress(self, session: Session, task_id: UUID): + @staticmethod + def _update_task_progress(session: Session, task_id: UUID): task = session.get(Task, task_id) if not task: return subtasks = session.exec( - select(SubTask).where(SubTask.taskId == task_id) + select(SubTask).where(SubTask.task_id == task_id) ).all() statuses = [s.status.lower() for s in subtasks] @@ -167,24 +167,24 @@ class SubTaskInterface: if "failed" in statuses: task.status = "failed" - task.jobCompleted = now + task.job_completed = now elif all(s == "complete" for s in statuses): task.status = "complete" - task.jobCompleted = now + task.job_completed = now elif "in progress" in statuses: task.status = "in progress" - if task.jobStarted is None: - task.jobStarted = now + if task.job_started is None: + task.job_started = now else: # All waiting task.status = "waiting" - task.jobStarted = None - task.jobCompleted = None + task.job_started = None + task.job_completed = None - task.updatedAt = now + task.updated_at = now session.add(task) session.commit() @@ -212,18 +212,18 @@ class SubTaskInterface: # Set logs if cloud_logs_url is not None: - subtask.cloudLogsURL = cloud_logs_url + subtask.cloud_logs_url = cloud_logs_url # Status + timestamps subtask.status = normalized - subtask.jobCompleted = now - subtask.updatedAt = now + subtask.job_completed = now + subtask.updated_at = now session.add(subtask) session.commit() # Update parent task (complete/failed) - self._update_task_progress(session, subtask.taskId) + self._update_task_progress(session, subtask.task_id) session.refresh(subtask) return subtask @@ -237,38 +237,49 @@ class TasksInterface: High-level operations for Task records. """ + @staticmethod def create_task( - self, - *, task_source: str, service: Optional[str] = None, inputs: Optional[Dict[str, Any]] = None, + task_only: bool = False, ): - now = datetime.now(timezone.utc) - + """ + Create a new Task record, and an initial SubTask in waiting state. Can also be used to create just + a task, without a subtask + :param task_source: Text indicating source of task creation (e.g. file path + function name) + :param service: Optional service name + :param inputs: Inputs of the job being run + :param task_only: If True, only create the Task record, without a SubTask + :return: + """ with get_db_session() as session: task = Task( - taskSource=task_source, + task_source=task_source, service=service, status="waiting", - jobStarted=None, - jobCompleted=None, + job_started=None, + job_completed=None, ) session.add(task) session.commit() session.refresh(task) + if task_only: + return task.id, None + # Create first subtask in waiting state subtask_interface = SubTaskInterface() - subtask = subtask_interface.create_subtask( + subtask_id = subtask_interface.create_subtask( task_id=task.id, inputs=inputs, ) - return task.id, subtask.id + return task.id, subtask_id - def update_task_status(self, task_id: UUID, status: str): + @staticmethod + def update_task_status(task_id: UUID, status: str): now = datetime.now(timezone.utc) with get_db_session() as session: @@ -278,14 +289,14 @@ class TasksInterface: normalized = status.lower() - if normalized == "in progress" and task.jobStarted is None: - task.jobStarted = now + if normalized == "in progress" and task.job_started is None: + task.job_started = now if normalized == "complete": - task.jobCompleted = now + task.job_completed = now task.status = normalized - task.updatedAt = now + task.updated_at = now session.add(task) session.commit() diff --git a/backend/app/db/models/tasks.py b/backend/app/db/models/tasks.py index d8007dcd..cfe18d83 100644 --- a/backend/app/db/models/tasks.py +++ b/backend/app/db/models/tasks.py @@ -1,6 +1,4 @@ -from __future__ import annotations - -from typing import Optional, List +from typing import Optional from datetime import datetime from uuid import UUID, uuid4 @@ -10,64 +8,29 @@ from sqlmodel import SQLModel, Field, Relationship class Task(SQLModel, table=True): __tablename__ = "tasks" - id: UUID = Field( - default_factory=uuid4, - primary_key=True, - index=True, - ) - - taskSource: str = Field(alias="task_source") - - jobStarted: Optional[datetime] = Field( - default=None, alias="job_started" - ) - jobCompleted: Optional[datetime] = Field( - default=None, alias="job_completed" - ) - + id: UUID = Field(default_factory=uuid4, primary_key=True, index=True, ) + task_source: str + job_started: Optional[datetime] = None + job_completed: Optional[datetime] = None status: str = Field(default="In Progress") service: Optional[str] = None + updated_at: datetime = Field(default_factory=datetime.utcnow) - updatedAt: datetime = Field( - default_factory=datetime.utcnow, - alias="updated_at", - ) - - # Relationship - subTasks: List["SubTask"] = Relationship(back_populates="task") + sub_tasks: list["SubTask"] = Relationship(back_populates="task") class SubTask(SQLModel, table=True): __tablename__ = "sub_task" - id: UUID = Field( - default_factory=uuid4, - primary_key=True, - index=True, - ) - - taskId: UUID = Field( - foreign_key="tasks.id", - alias="task_id", - ) - - jobStarted: Optional[datetime] = Field( - default=None, alias="job_started" - ) - jobCompleted: Optional[datetime] = Field( - default=None, alias="job_completed" - ) + id: UUID = Field(default_factory=uuid4, primary_key=True, index=True, ) + task_id: UUID = Field(foreign_key="tasks.id") + job_started: Optional[datetime] = None + job_completed: Optional[datetime] = None status: str = Field(default="In Progress") - inputs: Optional[str] = None outputs: Optional[str] = None - cloudLogsURL: Optional[str] = Field(alias="cloud_logs_url") + cloud_logs_url: Optional[str] = None + updated_at: datetime = Field(default_factory=datetime.utcnow) - updatedAt: datetime = Field( - default_factory=datetime.utcnow, - alias="updated_at", - ) - - # Relationship - task: Optional[Task] = Relationship(back_populates="subTasks") + task: Optional["Task"] = Relationship(back_populates="sub_tasks") diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index af57e35a..2b2306ee 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -81,14 +81,38 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): # Insert the scenario ID into the data payload data["scenario_id"] = scenario_id + # Create a task, and associated sub-tasks + from backend.app.db.functions.tasks.Tasks import TasksInterface, SubTaskInterface + # Create a main task + task_id = TasksInterface.create_task( + task_source="backend/plan/router.py:trigger_plan_entrypoint", + service="plan_engine", + inputs=data, + task_only=True + ) + + subtask_interface = SubTaskInterface() + for i in range(total_chunks): # Create an entry in the request logs table index_start = i * chunk_size index_end = min((i + 1) * chunk_size, total_rows) - message_payload = {**data, "index_start": index_start, "index_end": index_end} + message_payload = { + **data, "index_start": index_start, "index_end": index_end, + } message_body = json.dumps(message_payload) + # Create a subtask for this chunk + subtask_id = subtask_interface.create_subtask( + task_id=task_id, + inputs=message_payload + ) + + # Add task and subtask to message + message_payload["task_id"] = str(task_id) + message_payload["subtask_id"] = str(subtask_id) + response = sqs_client.send_message( QueueUrl=settings.ENGINE_SQS_URL, MessageBody=message_body diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py index 858a0a35..6f6db328 100644 --- a/backend/app/plan/schemas.py +++ b/backend/app/plan/schemas.py @@ -129,6 +129,10 @@ class PlanTriggerRequest(BaseModel): index_start: Optional[int] = None index_end: Optional[int] = None + # Task and subtask IDs + task_id: Optional[str] = None + subtask_id: Optional[str] = None + @model_validator(mode="after") def check_indexes(self): if (self.index_start is None) != (self.index_end is None): diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py b/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py index 6dd71b98..77736aff 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py @@ -10,6 +10,7 @@ import json import time import os import pandas as pd +import numpy as np from tqdm import tqdm from dotenv import load_dotenv from asset_list.utils import get_data_for_property @@ -52,8 +53,6 @@ n_postcodes = property_list["Post Code"].nunique() postcode_summary = property_list.groupby("Post Code")["UPRN"].count().reset_index() postcode_summary["UPRN"].mean() -test_match = property_list.merge(sustainability_data, left_on="UPRN", right_on="Org Ref") - def classify_floor_area(x): if x <= 72: @@ -70,20 +69,187 @@ sustainability_data["Floor Area Band"] = sustainability_data["Total Floor Area ( lambda x: classify_floor_area(x) ) -archetypes = sustainability_data[ - ["Type", "Attachment", "Construction Years", "Wall Construction", "Wall Insulation", - "Roof Construction", "Roof Insulation", "Floor Construction", "Floor Insulation", - "Glazing", "Heating", "Boiler Efficiency", "Main Fuel", "Controls Adequacy", - "Floor Area Band"] -].drop_duplicates() +# Archetype reductions -# Potential reductions: +# Roof insulation category # 1) Split roof insulation into > 100mm loft and <= 100mm loft +sustainability_data["Roof Insulation Category"] = sustainability_data["Roof Insulation"].copy() +sustainability_data["Roof Insulation Category"] = np.where( + sustainability_data["Roof Insulation Category"].isin( + ['mm200', 'mm300', 'mm250', 'mm150', 'mm270', 'mm400', 'mm350'], + ), + "LI > 100mm", + sustainability_data["Roof Insulation Category"], +) + +sustainability_data["Roof Insulation Category"] = np.where( + sustainability_data["Roof Insulation Category"].isin( + ['mm100', 'mm50', 'mm75', 'mm25'], + ), + "LI <= 100mm", + sustainability_data["Roof Insulation Category"], +) + # 2) Group all of the glazed together (e.g. double glazed, secondary glazed, triple glazed) -# 3) Group up boiler efficiency A-C, D - F, G? or someting like this +sustainability_data["Glazing Type"] = sustainability_data["Glazing"].copy() +sustainability_data["Glazing Type"] = np.where( + sustainability_data["Glazing Type"].isin( + ['Double 2002 or later', 'Double before 2002', 'Double but age unknown', 'DoubleKnownData'] + ), + "Double Glazed", + sustainability_data["Glazing Type"], +) +sustainability_data["Glazing Type"] = np.where( + sustainability_data["Glazing Type"].isin(['Triple', 'TripleKnownData']), + "Triple Glazed", + sustainability_data["Glazing Type"], +) + +# 3) Group up boiler efficiency A, B-D, E - G? or someting like this +sustainability_data["Boiler Efficiency Group"] = sustainability_data["Boiler Efficiency"].copy() +sustainability_data["Boiler Efficiency Group"] = np.where( + sustainability_data["Boiler Efficiency Group"].isin(['B', 'C', 'D']), + "B-D", + sustainability_data["Boiler Efficiency Group"], +) +sustainability_data["Boiler Efficiency Group"] = np.where( + sustainability_data["Boiler Efficiency Group"].isin(['E', 'F', 'G']), + "E-G", + sustainability_data["Boiler Efficiency Group"], +) + # 4) Group up main fuel into gas, electric, oil, other? +sustainability_data["Main Fuel Group"] = sustainability_data["Main Fuel"].copy() +sustainability_data["Main Fuel Group"] = np.where( + sustainability_data["Main Fuel Group"].isin( + ["SmokelessCoal", "BiomassCommunity", "B30DCommunity"] + ), + "Other Fuel", + sustainability_data["Main Fuel Group"], +) + # 5) Wall Construction - group up Sandstone and Granite into one category +sustainability_data["Wall Construction"] = np.where( + sustainability_data["Wall Construction"].isin(["Sandstone", "Granite"]), + "Sandstone/Granite", + sustainability_data["Wall Construction"] +) + +sustainability_data["Wall Construction"] = np.where( + sustainability_data["Wall Construction"].isin(["Timber Frame", "System", "Solid Brick"]), + "Solid", + sustainability_data["Wall Construction"] +) + # 6) Reduce or remove floor construction +sustainability_data["Floor Construction"] = np.where( + sustainability_data["Floor Construction"].isin(["SuspendedTimber", "SuspendedNotTimber"]), + "Suspended Floor", + sustainability_data["Floor Construction"] +) + +# 7) Reduce wall insulation +sustainability_data["Wall Insulation"] = np.where( + sustainability_data["Wall Insulation"].isin( + ["FilledCavityPlusInternal", "FilledCavityPlusExternal", "FilledCavity", "External", "Internal"] + ), + "Insulated", + sustainability_data["Wall Insulation"] +) + +# 8) Fill floor insulation +sustainability_data["Floor Insulation"] = sustainability_data["Floor Insulation"].fillna("Unknown") + +# 9) Reduce Age bands +sustainability_data["Construction Years"] = np.where( + sustainability_data["Construction Years"].isin(["2003-2006", "2007-2011", "2012 onwards"]), + "2003 onwards", + sustainability_data["Construction Years"], +) + +sustainability_data["Construction Years"] = np.where( + sustainability_data["Construction Years"].isin(["Before 1900", "1900-1929"]), + "Before 1929", + sustainability_data["Construction Years"], +) + +sustainability_data["Construction Years"] = np.where( + sustainability_data["Construction Years"].isin(["1983-1990", "1991-1995"]), + "1983-1995", + sustainability_data["Construction Years"], +) + +sustainability_data["Construction Years"] = np.where( + sustainability_data["Construction Years"].isin(["1950-1966", "1967-1975", "1976-1982"]), + "1950-1982", + sustainability_data["Construction Years"], +) + +# Roof +sustainability_data["Roof Construction"] = np.where( + sustainability_data["Roof Construction"].isin( + ["PitchedNormalLoftAccess", "PitchedThatched", "PitchedNormalNoLoftAccess", "PitchedWithSlopingCeiling"] + ), + "Pitched Roof", + sustainability_data["Roof Construction"] +) + +archetype_variables = [ + "Type", "Attachment", "Construction Years", "Wall Construction", "Wall Insulation", + "Roof Construction", "Roof Insulation Category", "Floor Construction", "Floor Insulation", + "Glazing Type", "Heating", "Boiler Efficiency Group", "Main Fuel Group", "Controls Adequacy", + "Floor Area Band" +] + +archetypes = sustainability_data[archetype_variables + ["UPRN"]].dropna().groupby(archetype_variables)[ + "UPRN"].nunique().reset_index().rename(columns={"UPRN": "Count"}).sort_values(by="Count", + ascending=False).reset_index( + drop=True) + +# We take a sample that represents 95% of the properties +archetypes["Cumulative Count"] = archetypes["Count"].cumsum() +archetypes["Cumulative Proportion"] = archetypes["Cumulative Count"] / archetypes["Count"].sum() + +archetypes_85 = archetypes[archetypes["Cumulative Proportion"] <= 0.80] +archetypes_85["Archetypes_85_reference"] = archetypes_85.index + 1 +archetypes_85["Archetypes_85_reference"] = "Archetype_Sample_" + archetypes_85["Archetypes_85_reference"].astype(str) + +# We now take a sample of the properties that represent 85% of the total properties +sustainability_data = sustainability_data.merge( + archetypes_85, + on=archetype_variables, + how="inner" +) +# We take 1 random property, by archetype 85 reference +modelling_sample = sustainability_data.groupby("Archetypes_85_reference").apply( + lambda x: x.sample(1, random_state=42) +).reset_index(drop=True) + + +# Checking distributions +def compare_distributions(full_df, sample_df, column): + full_dist = full_df[column].value_counts(normalize=True) + sample_dist = sample_df[column].value_counts(normalize=True) + comparison = pd.concat([full_dist, sample_dist], axis=1, keys=['Full', 'Sample']).fillna(0) + return comparison + + +for col in archetype_variables: + print(f"--- {col} ---") + print(compare_distributions(sustainability_data, modelling_sample, col)) + +# Save this CSV as input +modelling_sample.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/modelling_sample.xlsx", +) +# Save the archetype definitions +archetypes_85.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/archetypes_85.xlsx", +) +# Save the full archetypes +archetypes.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/full_archetypes.xlsx", +) # Maps the property types to the format recognised by the EPC api property_type_map = {} diff --git a/etl/find_my_epc/RetrieveFindMyEpc.py b/etl/find_my_epc/RetrieveFindMyEpc.py index ae9e5ff7..519c3e52 100644 --- a/etl/find_my_epc/RetrieveFindMyEpc.py +++ b/etl/find_my_epc/RetrieveFindMyEpc.py @@ -21,14 +21,16 @@ class RetrieveFindMyEpc: 'Chrome/111.0.0.0 Safari/537.36' } - def __init__(self, address: str, postcode: str): + def __init__(self, address: str, postcode: str, rrn: str = None): """ This class is tasked with retrieving the latest EPC data from the find my epc website :param address: The address of the property :param postcode: The postcode of the property + :param rrn: The RRN of the EPC (if known) """ self.address = address self.postcode = postcode + self.rrn = rrn self.address_cleaned = self.address.replace(",", "").replace(" ", "").lower() self.walls = [] @@ -286,54 +288,12 @@ class RetrieveFindMyEpc: :return: """ - postcode_input = self.postcode.replace(" ", "+") - postcode_search = self.SEARCH_POSTCODE_URL.format(postcode_input=postcode_input) - postcode_response = requests.get(postcode_search, headers=self.HEADERS) - - postcode_res = BeautifulSoup(postcode_response.text, features="html.parser") - rows = postcode_res.find_all('tr', class_='govuk-table__row') - - extracted_table = [] - for row in rows: - # Extract the address and URL - address_tag = row.find('a', class_='govuk-link') - if address_tag is None: - continue - extracted_address = None - extracted_address_url = None - if address_tag: - extracted_address = address_tag.text.strip() - extracted_address_url = address_tag['href'] - - extracted_address_cleaned = extracted_address.replace(",", "").replace(" ", "").lower() - if not extracted_address_cleaned.startswith(self.address_cleaned): - continue - - # If the address is a match, we can extract the data - - # Extract the expiry date - expiry_date_tag = row.find('td', class_='govuk-table__cell date') - expiry_date = None - if expiry_date_tag is not None: - expiry_date = expiry_date_tag.parent.find('span').text.strip() - - extracted_table.append( - { - "extracted_address": extracted_address, - "extracted_address_url": extracted_address_url, - "expiry_date": datetime.strptime(expiry_date, '%d %B %Y'), - } - ) - - if not extracted_table: - raise ValueError("No EPC found") - - if len(extracted_table) > 1: - # We take the one with the most recent expiry date - extracted_table = sorted(extracted_table, key=lambda x: x['expiry_date'], reverse=True) - - chosen_epc = self.BASE_ENERGY_URL + extracted_table[0]['extracted_address_url'] - epc_certificate = chosen_epc.split('/')[-1] + if self.rrn: + # We build the URL directly + epc_certificate = self.rrn + chosen_epc = f"{self.BASE_ENERGY_URL}/energy-certificate/{epc_certificate}" + else: + chosen_epc, epc_certificate = self._find_epc_page() address_response = requests.get(chosen_epc, headers=self.HEADERS) address_res = BeautifulSoup(address_response.text, features="html.parser") @@ -438,11 +398,17 @@ class RetrieveFindMyEpc: For a post code and address, we pull out all the required data from the find my epc website """ - if epc_page_source is None: + if epc_page_source is None and rrn is None: chosen_epc, rrn = self._find_epc_page() address_response = requests.get(chosen_epc, headers=self.HEADERS) epc_page_source = address_response.text address_res = BeautifulSoup(address_response.text, features="html.parser") + elif self.rrn: + epc_certificate = self.rrn + chosen_epc = f"{self.BASE_ENERGY_URL}/energy-certificate/{epc_certificate}" + address_response = requests.get(chosen_epc, headers=self.HEADERS) + epc_page_source = address_response.text + address_res = BeautifulSoup(address_response.text, features="html.parser") else: if rrn is None: raise ValueError("rrn must be provided if epc_page_source is provided") @@ -581,6 +547,19 @@ class RetrieveFindMyEpc: # 5) Pull out the EPC data epc_data = self.extract_epc_data(address_res) + # Pull out the address information which can be found in the box with the class "epc-address" + # We split it up on break tags + addr = address_res.find("p", class_="epc-address").get_text(separator="\n").strip() + lines = addr.split("\n") + if len(lines) > 2: + address1 = lines[0] + address2 = lines[1] + postcode = lines[-1] + else: + address1 = lines[0] + address2 = "" + postcode = lines[-1] + resulting_data = { 'epc_certificate': rrn, 'current_epc_rating': current_rating.split(' ')[-6], @@ -594,6 +573,10 @@ class RetrieveFindMyEpc: **assessment_data, **low_carbon_energy_sources, "page_source": epc_page_source, + # Add in address a postcode from the page - covers use cases where we are given RRN + "address1": address1, + "address2": address2, + "postcode": postcode, } if return_page: