diff --git a/asset_list/app.py b/asset_list/app.py index ec47b07d..cbb2cd93 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -59,1270 +59,6 @@ def app(): Property UPRN """ - # PFP - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/Nov 2025 Inspections" - data_filename = "Inspections List - PFP (1).xlsx" - sheet_name = "Sheet1" - postcode_column = 'Postcode' - address1_column = "Address 1" - address1_method = None - fulladdress_column = None - address_cols_to_concat = ["Address 1", "Address 2", "Address 3"] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = "Archetype" # Using inspections - landlord_built_form = "Archetype 2" # Using inspections - landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = None - landlord_existing_pv = None - landlord_property_id = "UPRN" - landlord_sap = None - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_id_colnames = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - asset_list_header = 0 - landlord_block_reference = None - - # Stonewater Solar - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/October 2025 Solar" - data_filename = "Copy of AP Stonewater Ammended address list - PV AM Amended - Khalim initial review.xlsx" - sheet_name = "Proposed Sheet" - postcode_column = 'Postcode' - address1_column = None - address1_method = "house_number_extraction" - fulladdress_column = "Address" - address_cols_to_concat = [] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = "Property Type" - landlord_built_form = "Property Type" - landlord_wall_construction = "Walls" - landlord_roof_construction = "Roofs" - landlord_heating_system = "Heating" - landlord_existing_pv = None - landlord_property_id = "Asset Id" - landlord_sap = "SAP" - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_id_colnames = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - asset_list_header = 0 - landlord_block_reference = None - - # - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Cambridge/" - data_filename = "22.10_Cambridge_west addresses.xlsx" - sheet_name = "Asset List" - postcode_column = 'Postcode' - address1_column = None - address1_method = "house_number_extraction" - fulladdress_column = "Full Address" - address_cols_to_concat = [] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = None - landlord_built_form = None - landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = None - landlord_existing_pv = None - landlord_property_id = "id" - landlord_sap = None - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_id_colnames = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - asset_list_header = 0 - landlord_block_reference = None - - # Property Box - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NRLA/Property Box" - data_filename = "Property Box Finance Portfolio.xlsx" - sheet_name = "Sheet1" - postcode_column = 'Postcode' - address1_column = None - address1_method = "house_number_extraction" - fulladdress_column = "Address 1" - address_cols_to_concat = [] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = None - landlord_built_form = None - landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = None - landlord_existing_pv = None - landlord_property_id = "row_id" - landlord_sap = None - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_id_colnames = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - asset_list_header = 0 - landlord_block_reference = "block_id" - - # CDS - able-to-pay - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/CDS/Able to pay" - data_filename = "CDS_ASSET LIST_(2314).xlsx" - sheet_name = "Sheet1" - postcode_column = 'Property Address - Postcode' - address1_column = "Property Address - Line 1" - address1_method = None - fulladdress_column = "Property Address - Line 1" - address_cols_to_concat = [] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = None - landlord_built_form = None - landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = None - landlord_existing_pv = None - landlord_property_id = "row_id" - landlord_sap = None - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_id_colnames = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - asset_list_header = 0 - landlord_block_reference = None - - # Hyde - solar - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hyde/Solar" - data_filename = "Domna Property Analysis HYDE (Chichester Removed)V2-Completed.xlsx" - sheet_name = "Electric Property Inspections" - postcode_column = 'Postcode' - address1_column = None # Is only patchily populated so we create it - address1_method = 'house_number_extraction' - fulladdress_column = "Address" - address_cols_to_concat = [] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = "Property Type" - landlord_built_form = "Property Type" - landlord_wall_construction = "Walls " - landlord_roof_construction = "Roofs" - landlord_heating_system = "Heating" - landlord_existing_pv = None - landlord_property_id = "Address ID" - landlord_sap = "SAP" - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_id_colnames = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - asset_list_header = 0 - landlord_block_reference = None - - # Hyde cavity - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hyde/Cavity" - data_filename = "Domna Property Analysis HYDE (Chichester Removed)V2-Completed.xlsx" - sheet_name = "Cavity Inspections" - postcode_column = 'Postcode' - address1_column = None # Is only patchily populated so we create it - address1_method = 'house_number_extraction' - fulladdress_column = "Address" - address_cols_to_concat = [] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = "Property Type" - landlord_built_form = "Property Type" - landlord_wall_construction = "Walls " - landlord_roof_construction = "Roofs" - landlord_heating_system = "Heating" - landlord_existing_pv = None - landlord_property_id = "Address ID" - landlord_sap = "SAP" - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_id_colnames = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - asset_list_header = 0 - landlord_block_reference = None - - # CDS - Sept 2025 - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/CDS/September 2025 Programme" - data_filename = "Founder Estates CDS.xlsx" - sheet_name = "Combined List" - postcode_column = 'Postcode' - address1_column = None # Is only patchily populated so we create it - address1_method = 'house_number_extraction' - fulladdress_column = "Address" - address_cols_to_concat = [] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = "Property Type" - landlord_built_form = None - landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = "Heating Type" - landlord_existing_pv = None - landlord_property_id = "(Do Not Modify) Property" - landlord_sap = None - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_id_colnames = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - asset_list_header = 0 - landlord_block_reference = None - - # Project from Nick - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/sfr/October 2025 AL portfolio" - data_filename = "22.10 AL Portfolio.xlsx" - sheet_name = "22.10 AL Portfolio" - postcode_column = 'Postcode' - address1_column = None - address1_method = 'house_number_extraction' - fulladdress_column = "Address" - address_cols_to_concat = [] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = None - landlord_built_form = None - landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = None - landlord_existing_pv = None - landlord_property_id = "Row ID" - landlord_sap = None - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_id_colnames = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - asset_list_header = 0 - landlord_block_reference = None - - # Lambeth - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lambeth" - data_filename = "LAMBETH Asset List ( Incomplete).xlsx" - sheet_name = "Green properties" - postcode_column = 'SX3 Postcode' - address1_column = "SX3 Short Address" - address1_method = None - fulladdress_column = None - address_cols_to_concat = ["SX3 Short Address"] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = "Property Type" - landlord_built_form = None - landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = None - landlord_existing_pv = None - landlord_property_id = "row_id" - landlord_sap = None - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_id_colnames = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - asset_list_header = 0 - landlord_block_reference = None - - # # Colchester - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester/Aug2025 202 inspections" - # data_filename = "Colchester Borough Homes - Inspections - Additional 202 Addresses JW 280725 copy.xlsx" - # sheet_name = "Extra 202 Colchester Addresses" - # postcode_column = 'domna_postcode' - # address1_column = "domna_address_1" - # address1_method = None - # fulladdress_column = "domna_full_address" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = None - # landlord_os_uprn = None - # landlord_property_type = "landlord_property_type" - # landlord_built_form = "landlord_built_form" - # landlord_wall_construction = None - # landlord_roof_construction = None - # landlord_heating_system = None - # landlord_existing_pv = None - # landlord_property_id = "landlord_property_id" - # landlord_sap = None - # outcomes_filename = None - # outcomes_sheetname = None - # outcomes_postcode = None - # outcomes_houseno = None - # outcomes_id = None - # outcomes_address = None - # master_filepaths = [] - # master_id_colnames = [] - # master_to_asset_list_filepath = None - # phase = False - # ecosurv_landlords = None - # asset_list_header = 0 - # landlord_block_reference = "landlord_block_reference" - - # # Abri - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Abri/Post Inspections" - # data_filename = "Desktop ABRI data - Standardised After Programmes (2).xlsx" - # sheet_name = "Reviewed List" - # postcode_column = 'domna_postcode' - # address1_column = "domna_address_1" - # address1_method = None - # fulladdress_column = "domna_full_address" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = "landlord_year_built" - # landlord_os_uprn = None - # landlord_property_type = "PropertyType_original_from_landlord" - # landlord_built_form = "BuildForm_original_from_landlord" - # landlord_wall_construction = "Wall Construction_original_from_landlord" - # landlord_roof_construction = None - # landlord_heating_system = "HeatingType_original_from_landlord" - # landlord_existing_pv = None - # landlord_property_id = "landlord_property_id" - # landlord_sap = None - # outcomes_filename = None - # outcomes_sheetname = None - # outcomes_postcode = None - # outcomes_houseno = None - # outcomes_id = None - # outcomes_address = None - # master_filepaths = [] - # master_id_colnames = [] - # master_to_asset_list_filepath = None - # phase = False - # ecosurv_landlords = None - # asset_list_header = 0 - # landlord_block_reference = None - - # Freebridge - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Freebridge/Aug2025 programme" - data_filename = "Domna - FCH property data May 25 copy.xlsx" - sheet_name = "EPC Data" - postcode_column = 'Post Code' - address1_column = "Address 1" - address1_method = None - fulladdress_column = None - address_cols_to_concat = ["Address 1", "Address 4"] - missing_postcodes_method = None - landlord_year_built = "Build Date" - landlord_os_uprn = None - landlord_property_type = "Property Type" - landlord_built_form = None - landlord_wall_construction = "Walls Description" - landlord_heating_system = "Heating Type" - landlord_existing_pv = None - landlord_property_id = "Place Ref" - landlord_roof_construction = "Roof Description" - landlord_sap = "Current SAP" - outcomes_filename = [] - outcomes_sheetname = [] - outcomes_postcode = [] - outcomes_houseno = [] - outcomes_address = [] - outcomes_id = [] - master_filepaths = [] - master_to_asset_list_filepath = None - asset_list_header = 0 - landlord_block_reference = None - master_id_colnames = [] - phase = False # Inspections not complete, produce a partial view - ecosurv_landlords = None - - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Broadlands" - # data_filename = "Broadlands Asset List.xlsx" - # sheet_name = "Assets" - # postcode_column = 'POSTCODE' - # fulladdress_column = None - # address1_column = "Address1" - # address1_method = None - # address_cols_to_concat = ["Address1"] - # missing_postcodes_method = None - # landlord_year_built = "DATEBUILT" - # landlord_os_uprn = None - # landlord_property_type = "PropertyType" - # landlord_built_form = "PropertyType" - # landlord_wall_construction = None - # landlord_heating_system = "Heating Fuel" - # landlord_existing_pv = None - # landlord_property_id = "Row ID" - # outcomes_filename = [os.path.join(data_folder, "outcomes.xlsx")] - # outcomes_sheetname = ["Sheet1"] - # outcomes_postcode = ["Postcode"] - # outcomes_houseno = ["No."] - # outcomes_address = ["Address"] - # outcomes_id = [None] - # master_filepaths = [ - # os.path.join(data_folder, "eco3 submissions.csv"), - # os.path.join(data_folder, "eco4 submissions.csv"), - # ] - # master_to_asset_list_filepath = None - # asset_list_header = 0 - # landlord_block_reference = None - # master_id_colnames = [None, None] - # landlord_roof_construction = None - # phase = False - # landlord_sap = None - # ecosurv_landlords = "broadland" - # # - # - # # Community: - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Community Housing/New Programme" - # data_filename = "SUB EPC C to DOMNA - 24.07.25.xlsx" - # sheet_name = "Sheet1" - # postcode_column = 'POSTCODE' - # fulladdress_column = "ADDRESS" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = "BUILD DATE" - # landlord_os_uprn = None - # landlord_property_type = "PROPERTY TYPE" - # landlord_built_form = "Archetype" # Using the inspections archetype - # landlord_wall_construction = "CONSTRUCTION TYPE" - # landlord_roof_construction = None - # landlord_heating_system = None - # landlord_existing_pv = None - # landlord_property_id = "UPRN" - # landlord_sap = None - # outcomes_filename = [] - # outcomes_sheetname = [] - # outcomes_postcode = [] - # outcomes_houseno = [] - # outcomes_id = [] - # outcomes_address = [] - # master_filepaths = [] - # master_to_asset_list_filepath = None - # phase = False - # ecosurv_landlords = None - # asset_list_header = 1 - # landlord_block_reference = None - # master_id_colnames = [] - # - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Programme Analysis" - # data_filename = "EalingProjectRebuildJW210725.xlsx" - # sheet_name = "Refine & Houses" - # postcode_column = 'Postcode' - # fulladdress_column = "Address" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = None - # landlord_os_uprn = None - # landlord_property_type = None # Using the inspections property type - # landlord_built_form = None - # landlord_wall_construction = None - # landlord_roof_construction = None - # landlord_heating_system = None - # landlord_existing_pv = None - # landlord_property_id = "Property ref" - # landlord_sap = None - # outcomes_filename = [] - # outcomes_sheetname = [] - # outcomes_postcode = [] - # outcomes_houseno = [] - # outcomes_id = [] - # outcomes_address = [] - # master_filepaths = [] - # master_to_asset_list_filepath = None - # phase = False - # ecosurv_landlords = None - # asset_list_header = 0 - # landlord_block_reference = "Block Reference" - # master_id_colnames = [] - # - # # TODO: Delete me - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NRLA/" - # data_filename = "20250716 Asset List.xlsx" - # sheet_name = "Sheet 1" - # postcode_column = 'Postcode' - # fulladdress_column = "Full Address" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = None - # landlord_os_uprn = None - # landlord_property_type = None - # landlord_built_form = None - # landlord_wall_construction = None - # landlord_heating_system = None - # landlord_existing_pv = None - # landlord_property_id = "Row ID" - # outcomes_filename = [] - # outcomes_sheetname = [] - # outcomes_postcode = [] - # outcomes_houseno = [] - # outcomes_address = [] - # outcomes_id = [] - # master_filepaths = [] - # master_to_asset_list_filepath = None - # asset_list_header = 0 - # landlord_block_reference = None - # master_id_colnames = [] - # landlord_roof_construction = None - # phase = False - # landlord_sap = None - # ecosurv_landlords = None - # - # # Southend - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southend/July 2025 Programme" - # data_filename = "SOUTHEND - RYAN.xlsx" - # sheet_name = "July 2025 Surveys" - # postcode_column = 'Postcode' - # fulladdress_column = "Full postal address" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = "Property age" - # landlord_os_uprn = None - # landlord_property_type = "Property type" - # landlord_built_form = "Property type" - # landlord_wall_construction = None - # landlord_heating_system = None - # landlord_existing_pv = None - # landlord_property_id = "ID" - # outcomes_filename = [] - # outcomes_sheetname = [] - # outcomes_postcode = [] - # outcomes_houseno = [] - # outcomes_address = [] - # outcomes_id = [] - # master_filepaths = [] - # master_to_asset_list_filepath = None - # asset_list_header = 0 - # landlord_block_reference = None - # master_id_colnames = [] - # landlord_roof_construction = None - # phase = False - # landlord_sap = None - # ecosurv_landlords = None - # - # # For Rooftop - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Rooftop" - # data_filename = "Rooftop Asset List - July 2025.xlsx" - # sheet_name = "Sheet1" - # postcode_column = 'post_code' - # fulladdress_column = None - # address1_column = "add_1" - # address1_method = None - # address_cols_to_concat = [ - # "add_1", "add_2", "add_3", "add_4" - # ] - # missing_postcodes_method = None - # landlord_year_built = "date_built" - # landlord_os_uprn = None - # landlord_property_type = "ConstructionStyle" - # landlord_built_form = "ConstructionStyle" - # landlord_wall_construction = None - # landlord_heating_system = "Description" - # landlord_existing_pv = None - # landlord_property_id = "PropertyCode" - # outcomes_filename = [os.path.join(data_folder, "Rooftop_Outcomes.xlsx")] - # outcomes_sheetname = ["OUTCOMES"] - # outcomes_postcode = ["POSTCODE"] - # outcomes_houseno = ["NO"] - # outcomes_address = ["ADDRESS"] - # outcomes_id = [None] - # master_filepaths = [os.path.join(data_folder, "Master.csv")] - # master_to_asset_list_filepath = None - # asset_list_header = 1 - # landlord_block_reference = "bl_rec_ref" - # master_id_colnames = [None] - # landlord_roof_construction = None - # phase = False - # landlord_sap = None - # ecosurv_landlords = "rooftop" - # - # # For Housing - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/For Housing/New Programme July 2025" - # data_filename = "FOR HOUSING Asset List (Combined).xlsx" - # sheet_name = "Asset List" - # postcode_column = 'Postcode' - # fulladdress_column = "Address" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = None - # landlord_os_uprn = None - # landlord_property_type = "Type" - # landlord_built_form = "Type" - # landlord_wall_construction = None - # landlord_heating_system = "Heating - full" - # landlord_existing_pv = None - # landlord_property_id = "UPRN" - # outcomes_filename = [os.path.join(data_folder, "Khalim Combined - for analysis.xlsx")] - # outcomes_sheetname = ["Sheet1"] - # outcomes_postcode = ["POSTCODE"] - # outcomes_houseno = ["NO"] - # outcomes_address = ["ADDRESS"] - # outcomes_id = [None] - # master_filepaths = [os.path.join(data_folder, "submissions.csv")] - # master_to_asset_list_filepath = None - # asset_list_header = 0 - # landlord_block_reference = None - # master_id_colnames = [None] - # landlord_roof_construction = None - # phase = False - # landlord_sap = "SAP" - # ecosurv_landlords = "for housing" - # - # # CDS - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/CDS" - # data_filename = "Founder Estates - Asset List.xlsx" - # sheet_name = "Combined" - # postcode_column = 'Postcode' - # fulladdress_column = "Address" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = None - # landlord_os_uprn = None - # landlord_property_type = None - # landlord_built_form = None - # landlord_wall_construction = None - # landlord_heating_system = "Heating Type" - # landlord_existing_pv = None - # landlord_property_id = "Row ID" - # outcomes_filename = [] - # outcomes_sheetname = [] - # outcomes_postcode = [] - # outcomes_houseno = [] - # outcomes_address = [] - # outcomes_id = [] - # master_filepaths = [os.path.join(data_folder, "submissions.csv")] - # master_to_asset_list_filepath = None - # asset_list_header = 0 - # landlord_block_reference = None - # master_id_colnames = [None] - # landlord_roof_construction = None - # phase = False - # landlord_sap = None - # ecosurv_landlords = "cds" - # - # # Plus Dane - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane/New Programme July 2025/" - # data_filename = "20250711 Plus Dane Asset List.xlsx" - # sheet_name = "Sheet1" - # postcode_column = 'Postcode' - # fulladdress_column = "Address" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = "Property Age" - # landlord_os_uprn = None - # landlord_property_type = "Property Type" - # landlord_built_form = "Built Form" - # landlord_wall_construction = "Wall Construction" - # landlord_heating_system = "Full Heating System" - # landlord_existing_pv = None - # landlord_property_id = "UPRN" - # outcomes_filename = [ - # os.path.join(data_folder, "Outcomes - Plus Dane_CWI_2024.xlsx"), - # os.path.join(data_folder, "Outcomes - Plus Dane_CWI_2025.xlsx"), - # os.path.join(data_folder, "Outcomes - Plus Dane_PV_2025.xlsx"), - # ] - # outcomes_sheetname = [ - # "CWI & LI - 2024", "2025 - CWI", "PV - 2025", - # ] - # outcomes_postcode = ["Postcode", "Postcode", "Postcode"] - # outcomes_houseno = ["No.", "No", "No"] - # outcomes_address = ["Address", "Address", "Address"] - # outcomes_id = ["Asset Reference", "LL UPRN", "LL UPRN"] - # master_filepaths = [ - # os.path.join(data_folder, "submissions/JJC-Table 1.csv"), - # os.path.join(data_folder, "submissions/SCIS-Table 1.csv") - # ] - # master_to_asset_list_filepath = None - # asset_list_header = 1 - # landlord_block_reference = None - # master_id_colnames = [None, None] - # landlord_roof_construction = None - # phase = False - # landlord_sap = "SAP Rating" - # ecosurv_landlords = "plus dane" - - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Brentwood/July 2025 New Programme" - # data_filename = "20250710 Asset List Brentwood.xlsx" - # sheet_name = "Sheet1" - # postcode_column = 'Postcode' - # fulladdress_column = None - # address1_column = "House Number" - # address1_method = None - # address_cols_to_concat = ["House Number", "Address Line 1", "Address Line 2", "Address Line 3"] - # missing_postcodes_method = None - # landlord_year_built = "Year Built" - # landlord_os_uprn = None - # landlord_property_type = "Dwelling" - # landlord_built_form = None - # landlord_wall_construction = None - # landlord_heating_system = "Heating" - # landlord_existing_pv = None - # landlord_property_id = "UPRN" - # outcomes_filename = [os.path.join(data_folder, "Brentwood - outcomes for analysis.xlsx")] - # outcomes_sheetname = ["OUTCOMES"] - # outcomes_postcode = ["POSTCODE"] - # outcomes_houseno = [None] - # outcomes_address = ["ADDRESS"] - # outcomes_id = [None] - # master_filepaths = [os.path.join(data_folder, "Submissions.csv")] - # master_to_asset_list_filepath = None - # asset_list_header = 1 - # landlord_block_reference = None - # master_id_colnames = [None] - # landlord_roof_construction = None - # phase = False - # landlord_sap = None - # ecosurv_landlords = "brentwood" - - # Brentwood - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Brentwood/July 2025 New Programme" - # data_filename = "20250710 Asset List Brentwood.xlsx" - # sheet_name = "Sheet1" - # postcode_column = 'Postcode' - # fulladdress_column = None - # address1_column = "House Number" - # address1_method = None - # address_cols_to_concat = ["House Number", "Address Line 1", "Address Line 2", "Address Line 3"] - # missing_postcodes_method = None - # landlord_year_built = "Year Built" - # landlord_os_uprn = None - # landlord_property_type = "Dwelling" - # landlord_built_form = None - # landlord_wall_construction = None - # landlord_heating_system = "Heating" - # landlord_existing_pv = None - # landlord_property_id = "UPRN" - # outcomes_filename = [os.path.join(data_folder, "Brentwood - outcomes for analysis.xlsx")] - # outcomes_sheetname = ["OUTCOMES"] - # outcomes_postcode = ["POSTCODE"] - # outcomes_houseno = [None] - # outcomes_address = ["ADDRESS"] - # outcomes_id = [None] - # master_filepaths = [os.path.join(data_folder, "Submissions.csv")] - # master_to_asset_list_filepath = None - # asset_list_header = 1 - # landlord_block_reference = None - # master_id_colnames = [None] - # landlord_roof_construction = None - # phase = False - # landlord_sap = None - # ecosurv_landlords = "brentwood" - # - # # Eastlight - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Eastlight/New Programme" - # data_filename = "INSPECTIONS MASTER Non Tech.xlsx" - # sheet_name = "EASTLIGHT CW" - # postcode_column = 'Postcode' - # fulladdress_column = None - # address1_column = "HouseName" - # address1_method = None - # address_cols_to_concat = ["HouseName", "Block", "Address1", "Address2", "Address3"] - # missing_postcodes_method = None - # landlord_year_built = "Built In Year" - # landlord_os_uprn = None - # landlord_property_type = "AssetType" - # landlord_built_form = "Archetype" # Using inspections archetype - # landlord_wall_construction = None - # landlord_roof_construction = None - # landlord_heating_system = "Main Heating Source" - # landlord_existing_pv = None - # landlord_property_id = "UPRN" - # landlord_sap = "SAP Score" - # outcomes_filename = [ - # os.path.join(data_folder, "Eastlight_CWI_JJC_2025.xlsx"), - # os.path.join(data_folder, "Eastlight_CWI_SCIS_2025.xlsx"), - # ] - # outcomes_sheetname = ["Outcomes", "Feedback"] - # outcomes_postcode = ["Postcode", "Postcode"] - # outcomes_houseno = ["No", "No."] - # outcomes_id = [None, None] - # outcomes_address = ["Address", "Address"] - # master_filepaths = [ - # os.path.join(data_folder, "ECO 3-Table 1.csv"), - # os.path.join(data_folder, "ECO 4-Table 1.csv"), - # ] - # master_to_asset_list_filepath = None - # phase = False - # ecosurv_landlords = "eastlight" - # asset_list_header = 0 - # landlord_block_reference = None - # master_id_colnames = [None, None] - # landlord_sap = None - - # Pickering and Ferens - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Pickering & Ferens" - # data_filename = "SAP 9 vs SAP 10 Sava Intelligent Energy - Property List (190625).xlsx" - # sheet_name = "Sava Intelligent Energy - Prope" - # postcode_column = 'Postcode' - # fulladdress_column = 'Address' - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = None - # landlord_os_uprn = None - # landlord_property_type = "Property Type" # Using the inspections property type - # landlord_built_form = "Archetype 2" - # landlord_wall_construction = None - # landlord_roof_construction = None - # landlord_heating_system = None - # landlord_existing_pv = None - # landlord_property_id = "UPRN" - # landlord_sap = "SAP Rating (RdSAP 10)" - # outcomes_filename = [] - # outcomes_sheetname = [] - # outcomes_postcode = [] - # outcomes_houseno = [] - # outcomes_id = [] - # outcomes_address = [] - # master_filepaths = [ - # os.path.join(data_folder, "PICKERING & FERENS ROLLING MASTER SHEET HEDGEFUND - 26.7.24 - K.csv"), - # os.path.join(data_folder, "PICKERING & FERENS NEW MASTER GBIS UPDATED 21.8.24 - M - For Analysis.csv"), - # ] - # master_to_asset_list_filepath = None - # phase = False - # ecosurv_landlords = "pickering" - # asset_list_header = 0 - # landlord_block_reference = None - # master_id_colnames = [None, None] - - # Colchester - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester" - # data_filename = "Warmfront data- Colchester Borough Homes (Complete).xlsx" - # sheet_name = "Sheet1" - # postcode_column = 'Full Address.1' - # fulladdress_column = "Full Address" - # address1_column = None - # address1_method = "first_word" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = "Build Date" - # landlord_os_uprn = None - # landlord_property_type = "Property Type" - # landlord_wall_construction = "Wallinsul" - # landlord_heating_system = "HeatSorc" - # landlord_existing_pv = None - # landlord_property_id = "Property Reference" - # outcomes_filename = [] - # outcomes_sheetname = [] - # outcomes_postcode = [] - # outcomes_houseno = [] - # outcomes_id = [] - # outcomes_address = [] - # master_filepaths = [] - # master_to_asset_list_filepath = None - # asset_list_header = 0 - # landlord_built_form = None - # landlord_roof_construction = None - # landlord_sap = None - # landlord_block_reference = None - # phase = False - # ecosurv_landlords = None - # master_id_colnames = [] - - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Hubspot" - # data_filename = "EalingFlats.xlsx" - # sheet_name = "Sheet1" - # postcode_column = 'Postcode' - # fulladdress_column = "Address" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = None - # landlord_os_uprn = None - # landlord_property_type = None # Using the inspections property type - # landlord_built_form = None - # landlord_wall_construction = None - # landlord_roof_construction = None - # landlord_heating_system = None - # landlord_existing_pv = None - # landlord_property_id = "Property ref" - # landlord_sap = None - # outcomes_filename = [] - # outcomes_sheetname = [] - # outcomes_postcode = [] - # outcomes_houseno = [] - # outcomes_id = [] - # outcomes_address = [] - # master_filepaths = [] - # master_to_asset_list_filepath = None - # phase = False - # ecosurv_landlords = None - # asset_list_header = 0 - # landlord_block_reference = "Block Ref" - # master_id_colnames = [] - - # Southern - Jan list - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/New Programme/Jan 2025 List" - # data_filename = "SOUTHERN ASSETS January 2025 Additions Query 21.03.2025.xlsx" - # sheet_name = "Jan 2025 additions" - # postcode_column = 'Post Code' - # fulladdress_column = None - # address1_column = "NO." - # address1_method = None - # address_cols_to_concat = ["NO.", "Street / Block Name", "Town/Area"] - # missing_postcodes_method = None - # landlord_year_built = None - # landlord_os_uprn = None - # landlord_property_type = None # Using the inspections property type - # landlord_built_form = None - # landlord_wall_construction = None - # landlord_roof_construction = None - # landlord_heating_system = None - # landlord_existing_pv = None - # landlord_property_id = "SH Property Reference" - # landlord_sap = None - # outcomes_filename = [ - # os.path.join(data_folder, "RT - Southern Housing Group - JJC.xlsx"), - # os.path.join(data_folder, "RT - SOUTHERN OUTCOMES - SCIS Merged.xlsx"), - # ] - # outcomes_sheetname = ["Feedback", "Collated"] - # outcomes_postcode = ["Poscode", "Postcode"] - # outcomes_houseno = ["No.", "No"] - # outcomes_id = ["UPRNs", None] - # outcomes_address = ["Address", "Address"] - # master_filepaths = [ - # os.path.join(data_folder, "southern_submissions/CAVITY'S - DECEMBER 2018-Table 1.csv"), - # os.path.join(data_folder, "southern_submissions/CAVITY'S 2019-Table 1.csv"), - # os.path.join(data_folder, "southern_submissions/CAVITY'S ECO4-Table 1.csv"), - # os.path.join(data_folder, "southern_submissions/LOFT'S-Table 1.csv"), - # ] - # master_to_asset_list_filepath = None - # phase = False - # ecosurv_landlords = "southern" - # asset_list_header = 0 - # landlord_block_reference = None - # master_id_colnames = [None, None, None, None] - - # NCHA - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NCHA" - # data_filename = "Energy Information MASTER June 2025.xlsx" - # sheet_name = "Data" - # postcode_column = 'Postcode' - # fulladdress_column = "Address" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = "Build Date (HAR10)" - # landlord_os_uprn = None - # landlord_property_type = "Property Type (HAR10)" - # landlord_built_form = "Build Form (EPC)" - # landlord_wall_construction = "Wall Description" - # landlord_roof_construction = None - # landlord_heating_system = "HEAT Code" - # landlord_existing_pv = None - # landlord_property_id = "Place ref" - # landlord_sap = "EPC SAP" - # outcomes_filename = None - # outcomes_sheetname = None - # outcomes_postcode = None - # outcomes_houseno = None - # outcomes_id = None - # outcomes_address = None - # master_filepaths = [] - # master_to_asset_list_filepath = None - # phase = False - # ecosurv_landlords = None - # asset_list_header = 0 - # landlord_block_reference = None - # master_id_colnames = [] - - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Calico" - # data_filename = "07.04 CALICO - Final List.xlsx" - # asset_list_header = 2 - # sheet_name = "Final List" - # postcode_column = 'Postcode' - # fulladdress_column = None - # address1_column = "Property Number / Name" - # address1_method = None - # address_cols_to_concat = [ - # "Property Number / Name", - # "Street", - # "Town" - # ] - # missing_postcodes_method = None - # landlord_year_built = "NROSH Estimated Build Date" - # landlord_os_uprn = None - # landlord_property_type = "Asset Type" - # landlord_built_form = None - # landlord_wall_construction = "Wall Type" - # landlord_heating_system = "Boiler Type" - # landlord_existing_pv = None - # landlord_property_id = "Asset Reference" - # outcomes_filename = [] - # outcomes_sheetname = [] - # outcomes_postcode = [] - # outcomes_houseno = [] - # outcomes_id = [] - # outcomes_address = [] - # master_filepaths = [] - # master_id_colnames = [] - # master_to_asset_list_filepath = None - # landlord_roof_construction = None - # landlord_block_reference = None - # landlord_sap = "Current Efficiency Rating - Score" - # phase = None - # ecosurv_landlords = None - - # data_folder = ( - # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/2018 Asset - # List" - # ) - # data_filename = "LIVEWEST STOCK - 23rd October 2018.xlsx" - # sheet_name = "Assets" - # postcode_column = 'Postcode' - # fulladdress_column = "Address" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = "Build Year" - # landlord_os_uprn = None - # landlord_property_type = "Property Archetype" - # landlord_built_form = None - # landlord_wall_construction = None - # landlord_heating_system = "Heating Fuel Type" - # landlord_existing_pv = None - # landlord_property_id = "Uprn - DO NOT DELETE" - # outcomes_filename = [ - # os.path.join(data_folder, "RT - LiveWest.xlsx") - # ] - # outcomes_sheetname = ["Feedback"] - # outcomes_postcode = ["Poscode"] - # outcomes_houseno = ["No."] - # outcomes_id = ["UPRN"] - # outcomes_address = ["Address"] - # master_filepaths = [ - # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/Rolling - # Master " - # "- redacted for analysis/CAVITY-Table 1.csv" - # ] - # master_id_colnames = [None] - # master_to_asset_list_filepath = None - # landlord_roof_construction = None - # landlord_block_reference = None - # landlord_sap = None - # phase = None - # ecosurv_landlords = "livewest|live west" - - # data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March " - # "2025/Livewest Asset List (Original) - csv") - # data_filename = "Report-Table 1.csv" - # sheet_name = None - # postcode_column = 'Postcode' - # fulladdress_column = "T1_Address" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = "Build Yr" - # landlord_os_uprn = None - # landlord_property_type = "T1_AssetType" - # landlord_built_form = "T1_AssetType" - # landlord_wall_construction = "Wall Type Cavity" - # landlord_heating_system = "Heating Fuel" - # landlord_existing_pv = None - # landlord_property_id = "T1_UPRN" - # outcomes_filename = [ - # os.path.join(data_folder, "RT - LiveWest.xlsx") - # ] - # outcomes_address = ["Address"] - # outcomes_sheetname = ["Feedback"] - # outcomes_postcode = ["Poscode"] - # outcomes_houseno = ["No."] - # outcomes_id = ["UPRN"] - # master_filepaths = [ - # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/Rolling - # Master " - # "- redacted for analysis/CAVITY-Table 1.csv" - # ] - # master_id_colnames = [None] - # master_to_asset_list_filepath = None - # landlord_roof_construction = None - # landlord_block_reference = None - # landlord_sap = None - # phase = None - # ecosurv_landlords = "livewest|live west" - - # Stori - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Storicymru" - # data_filename = "Asset list - for analysis.xlsx" - # sheet_name = "SAP and Costs Calculations" - # postcode_column = 'Postcode' - # fulladdress_column = "Address1" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = "Age" - # landlord_os_uprn = None - # landlord_property_type = "TYPE" - # landlord_built_form = "AGE / DETACHMENT" - # landlord_wall_construction = "WALL" - # landlord_roof_construction = "LOFT INSULATION" - # landlord_heating_system = "BOILER" - # landlord_existing_pv = "SOLAR PV" - # landlord_property_id = "UPRN" - # landlord_sap = "Current SAP Rating" - # landlord_block_reference = None - # outcomes_filename = [] - # outcomes_sheetname = [] - # outcomes_postcode = [] - # outcomes_houseno = [] - # outcomes_id = [] - # outcomes_address = [] - # master_filepaths = [] - # master_to_asset_list_filepath = None - # master_id_colnames = [] - # phase = False - # ecosurv_landlords = None - - # Thrive - reconciliation - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation" - # data_filename = "Thrive Asset List - Complete - Updated May 2025.xlsx" - # sheet_name = "Sheet1" - # postcode_column = 'postcode' - # fulladdress_column = "full_address" - # address1_column = "address_line_1" - # address1_method = None - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = "age_band_calculated" - # landlord_os_uprn = None - # landlord_property_type = "property_type" - # landlord_built_form = "build_form" - # landlord_wall_construction = None - # landlord_roof_construction = "assumed_loft_insulation_thickness_updated" - # landlord_heating_system = "heating_type_updated" - # landlord_existing_pv = None - # landlord_property_id = "thrive_property_id" - # landlord_sap = "sap_rating_updated" - # landlord_block_reference = "block_reference" - # outcomes_filename = [ - # os.path.join(data_folder, "Thrive - Outcomes - April 24-March25 - Corrected.xlsx") - # ] - # outcomes_sheetname = ["Sheet1"] - # outcomes_postcode = ["postcode"] - # outcomes_houseno = ["No."] - # outcomes_id = ["thrive_property_id"] - # outcomes_address = ["address"] - # master_filepaths = [ - # os.path.join(data_folder, "Thrive Submissions ECO3 - with IDS.csv"), - # os.path.join(data_folder, "Thrive Submissions ECO4 - with IDS.csv"), - # ] - # master_to_asset_list_filepath = None - # master_id_colnames = ["thrive_property_id", "thrive_property_id"] - # phase = False - # ecosurv_landlords = "thrive" - - # Southern Midlands - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/Midlands Properties - Apr 2025" - # data_filename = "Southern Housing Midlands Property List - combined.xlsx" - # sheet_name = "Sheet 1" - # postcode_column = 'Post Code' - # fulladdress_column = "Address" - # address1_column = None - # address1_method = "house_number_extraction" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = "Age_1" - # landlord_os_uprn = None - # landlord_property_type = "Prop_Type" - # landlord_built_form = "Prop_Type" - # landlord_wall_construction = "Walls_P" - # landlord_heating_system = "Heating System" - # landlord_existing_pv = None - # landlord_property_id = "AssetID" - # outcomes_filename = None - # outcomes_sheetname = None - # outcomes_postcode = None - # outcomes_houseno = None - # outcomes_id = None - # outcomes_address = None - # master_filepaths = [] - # master_to_asset_list_filepath = None - # Maps addresses to uprn in problematic cases manual_uprn_map = {} diff --git a/asset_list/utils.py b/asset_list/utils.py index c7d0cc0a..8746c03a 100644 --- a/asset_list/utils.py +++ b/asset_list/utils.py @@ -2,7 +2,6 @@ import time import random import pandas as pd -from adhoc.investigation import newest_epc from backend.SearchEpc import SearchEpc from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc from tqdm import tqdm diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index eb2b0b23..c47e82c4 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -917,7 +917,7 @@ class SearchEpc: return agg[key].values[0] - def find_property(self, skip_os=False): + def find_property(self, skip_os=False, api_data=None): """ This method will attempt to identify a property. It will, at first, use the EPC api to try and find the EPC for the property and the associated UPRN. If this fails, it will use the Ordnance Survey API to @@ -928,10 +928,17 @@ class SearchEpc: as a final check to see if there is any EPC data. If there is no EPC data, the epc data will be estimated based on the surrounding properties + + :param skip_os: If True, the ordnance survey api will be skipped and only the EPC api will be used + :param api_data: If provided, this data will be used instead of querying the EPC api """ # Step 1: use the epc api to find the property and uprn - response = self.get_epc() + if api_data: + self.data = api_data + response = {"status": 200} + else: + response = self.get_epc() if response["status"] == 200: ( diff --git a/backend/app/config.py b/backend/app/config.py index 98e1c447..dd3f5db1 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -3,7 +3,6 @@ from pydantic_settings import BaseSettings from typing import Optional - class Settings(BaseSettings): API_KEY: str API_KEY_NAME: str = "X-API-KEY" @@ -43,7 +42,8 @@ class Settings(BaseSettings): AWS_DEFAULT_REGION: Optional[str] = None class Config: - env_file = "backend.env" + env_file = "backend/.env" + @lru_cache() def get_settings(): diff --git a/backend/app/db/connection.py b/backend/app/db/connection.py index fbec9102..2ac9bd02 100644 --- a/backend/app/db/connection.py +++ b/backend/app/db/connection.py @@ -14,6 +14,7 @@ db_string = connection_string.format( db_engine = create_engine(db_string, pool_size=5, max_overflow=5) + def get_db_session(): if db_engine is None: raise RuntimeError("Database is not configured. Set DATABASE_URL in environment variables.") diff --git a/backend/app/db/functions/__init__.py b/backend/app/db/functions/__init__.py new file mode 100644 index 00000000..0f239d6e --- /dev/null +++ b/backend/app/db/functions/__init__.py @@ -0,0 +1,12 @@ +from .epc_functions import * +from .address_functions import * +from .portfolio_functions import * +from .energy_assessment_functions import * +from .property_functions import * +from .recommendations_functions import * +from .solar_functions import * +from .funding_functions import * +from .materials_functions import * +from .inspections_functions import * +from .non_intrusive_surveys import * +from .whlg_functions import * diff --git a/backend/app/db/functions/epc_functions.py b/backend/app/db/functions/epc_functions.py new file mode 100644 index 00000000..4b675f1f --- /dev/null +++ b/backend/app/db/functions/epc_functions.py @@ -0,0 +1,125 @@ +from datetime import datetime, timedelta, timezone +from sqlalchemy.orm import Session +from sqlalchemy.exc import SQLAlchemyError +from backend.app.db.models.epc import EpcStore + + +class EpcStoreService: + """ + Service layer for EPC data lookup and persistence. + """ + + FRESHNESS_DAYS = 30 + + # status labels + FRESH = "fresh" + EXPIRED = "expired" + MISSING = "missing" + + @classmethod + def get_epc_for_uprn(cls, session: Session, uprn: int): + """ + Query EPC data for a given UPRN and return a dict describing: + - epc_api: only if within last 30 days + - epc_page: only if epc_api exists + - status: 'fresh', 'expired', or 'missing' + """ + + record = session.query(EpcStore).filter(EpcStore.uprn == uprn).first() + + if not record: + return {"status": cls.MISSING, "epc_api": None, "epc_page": None} + + if not record.epc_api_created_at: + # API data missing → treat as missing even if page data exists + return {"status": cls.MISSING, "epc_api": None, "epc_page": None} + + # check freshness + cutoff = datetime.now(timezone.utc) - timedelta(days=EpcStoreService.FRESHNESS_DAYS) + + if record.epc_api_created_at.date() < cutoff.date(): + return {"status": cls.EXPIRED, "epc_api": None, "epc_page": None} + + # Fresh API → include page only if present + return { + "status": cls.FRESH, + "epc_api": record.epc_api, + "epc_page": record.epc_page if record.epc_page else None, + "epc_page_rrn": record.epc_page_rrn, + "epc_api_created_at": record.epc_api_created_at, + "epc_page_created_at": record.epc_page_created_at, + } + + @classmethod + def check_insert_needed(cls, epc_cache, epc_estimated, uprn): + """ + Check if an insert is needed based on existing data. + :return: + """ + no_existing_epc_cache = epc_cache.get("epc_api") is None + existing_cache_expired = ( + epc_cache.get("status") == cls.EXPIRED + ) + + needs_insert = bool((no_existing_epc_cache or existing_cache_expired) and not epc_estimated and uprn) + + return needs_insert + + @staticmethod + def upsert_epc_data( + session: Session, + uprn: int, + epc_api: dict | None, + epc_page: str | None, + epc_page_rrn: str | None, + epc_api_created_at: datetime | None = None, + epc_page_created_at: datetime | None = None, + ): + """ + Insert or update EPC data for a UPRN. + + Rules: + - If record exists → update it + - If record does not exist → create new + """ + + try: + record = session.query(EpcStore).filter(EpcStore.uprn == uprn).first() + + if record: + # update path + if epc_api is not None: + record.epc_api = epc_api + if epc_api_created_at is None: + epc_api_created_at = datetime.now(timezone.utc) + record.epc_api_created_at = epc_api_created_at + + # update page data only if BOTH: + # 1) the caller passed page data + # 2) epc_api is not None (page only allowed when API exists) + if epc_page is not None and epc_api is not None: + record.epc_page = epc_page + record.epc_page_rrn = epc_page_rrn + if epc_page_created_at is None: + epc_page_created_at = datetime.now(timezone.utc) + record.epc_page_created_at = epc_page_created_at + else: + # insert path + record = EpcStore( + uprn=uprn, + epc_api=epc_api, + epc_api_created_at=epc_api_created_at, + epc_page=epc_page if epc_api is not None else None, + epc_page_rrn=epc_page_rrn if epc_api is not None else None, + epc_page_created_at=epc_page_created_at if epc_api is not None else None, + ) + session.add(record) + + session.flush() + session.commit() + + return record + + except SQLAlchemyError as e: + session.rollback() + raise e diff --git a/backend/app/db/functions/tasks/Tasks.py b/backend/app/db/functions/tasks/Tasks.py index 06e1c6fe..12a2e51b 100644 --- a/backend/app/db/functions/tasks/Tasks.py +++ b/backend/app/db/functions/tasks/Tasks.py @@ -1,5 +1,3 @@ -from __future__ import annotations - # ---- Standard Library ---- from typing import Optional, Dict, Any from datetime import datetime, timezone @@ -27,20 +25,22 @@ class SubTaskInterface: # -------------------------------------------------------- # CREATE SUBTASK # -------------------------------------------------------- - def create_subtask(self, task_id: UUID, inputs: Optional[Dict[str, Any]] = None): - now = datetime.now(timezone.utc) + def create_subtask(self, task_id: UUID, inputs: Optional[Dict[str, Any]] = None, status=None): with get_db_session() as session: task = session.get(Task, task_id) if not task: raise ValueError(f"Task {task_id} not found") + # We treat waiting as the default status + status = "waiting" if status is None else status + subtask = SubTask( - taskId=task_id, + task_id=task_id, inputs=json.dumps(inputs) if inputs else None, - status="waiting", - jobStarted=None, - jobCompleted=None, + status=status, + job_started=None, + job_completed=None, ) session.add(subtask) @@ -49,12 +49,21 @@ class SubTaskInterface: # Recalculate parent task progress self._update_task_progress(session, task_id) - return subtask + return subtask.id # -------------------------------------------------------- # UPDATE STATUS (in progress, complete, failed) # -------------------------------------------------------- - def update_subtask_status(self, subtask_id: UUID, status: str): + def update_subtask_status( + self, subtask_id: UUID, status: str, outputs=None + ): + """ + Update the status of a subtask, and recalculate the parent task progress. + :param subtask_id: UUID of the subtask to update + :param status: New status (in progress, complete, failed) + :param outputs: Optional outputs to set + :return: + """ now = datetime.now(timezone.utc) with get_db_session() as session: @@ -65,21 +74,23 @@ class SubTaskInterface: normalized = status.lower() # When job really starts - if normalized == "in progress" and subtask.jobStarted is None: - subtask.jobStarted = now + if normalized == "in progress" and subtask.job_started is None: + subtask.job_started = now # Completed or failed if normalized in ("complete", "failed"): - subtask.jobCompleted = now + subtask.job_completed = now subtask.status = normalized - subtask.updatedAt = now + subtask.updated_at = now + if outputs is not None: + subtask.outputs = json.dumps(outputs) session.add(subtask) session.commit() # Recalculate task status - self._update_task_progress(session, subtask.taskId) + self._update_task_progress(session, subtask.task_id) session.refresh(subtask) return subtask @@ -87,7 +98,8 @@ class SubTaskInterface: # -------------------------------------------------------- # UPDATE OUTPUTS # -------------------------------------------------------- - def update_subtask_output(self, subtask_id: UUID, outputs: Dict[str, Any]): + @staticmethod + def update_subtask_output(subtask_id: UUID, outputs: Dict[str, Any]): now = datetime.now(timezone.utc) with get_db_session() as session: @@ -96,7 +108,7 @@ class SubTaskInterface: raise ValueError(f"SubTask {subtask_id} not found") subtask.outputs = json.dumps(outputs) - subtask.updatedAt = now + subtask.updated_at = now session.add(subtask) session.commit() @@ -106,7 +118,8 @@ class SubTaskInterface: # -------------------------------------------------------- # UPDATE CLOUD LOGS URL # -------------------------------------------------------- - def update_subtask_logs(self, subtask_id: UUID, cloud_logs_url: str): + @staticmethod + def update_subtask_logs(subtask_id: UUID, cloud_logs_url: str): now = datetime.now(timezone.utc) with get_db_session() as session: @@ -114,8 +127,8 @@ class SubTaskInterface: if not subtask: raise ValueError(f"SubTask {subtask_id} not found") - subtask.cloudLogsURL = cloud_logs_url - subtask.updatedAt = now + subtask.cloud_logs_url = cloud_logs_url + subtask.updated_at = now session.add(subtask) session.commit() @@ -125,8 +138,8 @@ class SubTaskInterface: # -------------------------------------------------------- # SET BOTH OUTPUT + LOGS # -------------------------------------------------------- + @staticmethod def set_subtask_result( - self, subtask_id: UUID, outputs: Optional[Dict[str, Any]] = None, cloud_logs_url: Optional[str] = None, @@ -142,9 +155,9 @@ class SubTaskInterface: subtask.outputs = json.dumps(outputs) if cloud_logs_url is not None: - subtask.cloudLogsURL = cloud_logs_url + subtask.cloud_logs_url = cloud_logs_url - subtask.updatedAt = now + subtask.updated_at = now session.add(subtask) session.commit() session.refresh(subtask) @@ -153,13 +166,14 @@ class SubTaskInterface: # -------------------------------------------------------- # TASK PROGRESS CALCULATION # -------------------------------------------------------- - def _update_task_progress(self, session: Session, task_id: UUID): + @staticmethod + def _update_task_progress(session: Session, task_id: UUID): task = session.get(Task, task_id) if not task: return subtasks = session.exec( - select(SubTask).where(SubTask.taskId == task_id) + select(SubTask).where(SubTask.task_id == task_id) ).all() statuses = [s.status.lower() for s in subtasks] @@ -167,24 +181,24 @@ class SubTaskInterface: if "failed" in statuses: task.status = "failed" - task.jobCompleted = now + task.job_completed = now elif all(s == "complete" for s in statuses): task.status = "complete" - task.jobCompleted = now + task.job_completed = now elif "in progress" in statuses: task.status = "in progress" - if task.jobStarted is None: - task.jobStarted = now + if task.job_started is None: + task.job_started = now else: # All waiting task.status = "waiting" - task.jobStarted = None - task.jobCompleted = None + task.job_started = None + task.job_completed = None - task.updatedAt = now + task.updated_at = now session.add(task) session.commit() @@ -212,18 +226,18 @@ class SubTaskInterface: # Set logs if cloud_logs_url is not None: - subtask.cloudLogsURL = cloud_logs_url + subtask.cloud_logs_url = cloud_logs_url # Status + timestamps subtask.status = normalized - subtask.jobCompleted = now - subtask.updatedAt = now + subtask.job_completed = now + subtask.updated_at = now session.add(subtask) session.commit() # Update parent task (complete/failed) - self._update_task_progress(session, subtask.taskId) + self._update_task_progress(session, subtask.task_id) session.refresh(subtask) return subtask @@ -237,38 +251,49 @@ class TasksInterface: High-level operations for Task records. """ + @staticmethod def create_task( - self, - *, task_source: str, service: Optional[str] = None, inputs: Optional[Dict[str, Any]] = None, + task_only: bool = False, ): - now = datetime.now(timezone.utc) - + """ + Create a new Task record, and an initial SubTask in waiting state. Can also be used to create just + a task, without a subtask + :param task_source: Text indicating source of task creation (e.g. file path + function name) + :param service: Optional service name + :param inputs: Inputs of the job being run + :param task_only: If True, only create the Task record, without a SubTask + :return: + """ with get_db_session() as session: task = Task( - taskSource=task_source, + task_source=task_source, service=service, status="waiting", - jobStarted=None, - jobCompleted=None, + job_started=None, + job_completed=None, ) session.add(task) session.commit() session.refresh(task) + if task_only: + return task.id, None + # Create first subtask in waiting state subtask_interface = SubTaskInterface() - subtask = subtask_interface.create_subtask( + subtask_id = subtask_interface.create_subtask( task_id=task.id, inputs=inputs, ) - return task.id, subtask.id + return task.id, subtask_id - def update_task_status(self, task_id: UUID, status: str): + @staticmethod + def update_task_status(task_id: UUID, status: str): now = datetime.now(timezone.utc) with get_db_session() as session: @@ -278,14 +303,14 @@ class TasksInterface: normalized = status.lower() - if normalized == "in progress" and task.jobStarted is None: - task.jobStarted = now + if normalized == "in progress" and task.job_started is None: + task.job_started = now if normalized == "complete": - task.jobCompleted = now + task.job_completed = now task.status = normalized - task.updatedAt = now + task.updated_at = now session.add(task) session.commit() diff --git a/backend/app/db/models/epc.py b/backend/app/db/models/epc.py new file mode 100644 index 00000000..5a216040 --- /dev/null +++ b/backend/app/db/models/epc.py @@ -0,0 +1,29 @@ +from sqlalchemy import ( + Column, + Integer, + String, + JSON, + TIMESTAMP, + UniqueConstraint, +) +from sqlalchemy.orm import declarative_base + +Base = declarative_base() + + +class EpcStore(Base): + """ + Stores EPC data retrieved from the EPC API and EPC web pages. + """ + __tablename__ = "epc_store" + + id = Column(Integer, primary_key=True, autoincrement=True) + uprn = Column(Integer) + epc_api_created_at = Column(TIMESTAMP(timezone=False)) + epc_api = Column(JSON, nullable=False) + epc_page_created_at = Column(TIMESTAMP(timezone=False)) + epc_page = Column(String) + epc_page_rrn = Column(String) + + def __repr__(self): + return f"" diff --git a/backend/app/db/models/materials.py b/backend/app/db/models/materials.py index 9b38addd..99759438 100644 --- a/backend/app/db/models/materials.py +++ b/backend/app/db/models/materials.py @@ -20,6 +20,7 @@ class MaterialType(enum.Enum): room_roof_insulation = "room_roof_insulation" windows_glazing = "windows_glazing" secondary_glazing = "secondary_glazing" + double_glazing = "double_glazing" cavity_wall_extraction = "cavity_wall_extraction" iwi_wall_demolition = "iwi_wall_demolition" diff --git a/backend/app/db/models/portfolio.py b/backend/app/db/models/portfolio.py index 953e7b3d..fbe9661b 100644 --- a/backend/app/db/models/portfolio.py +++ b/backend/app/db/models/portfolio.py @@ -4,6 +4,7 @@ import datetime from sqlalchemy import Column, Integer, Text, Boolean, Float, DateTime, Enum, ForeignKey, CheckConstraint from sqlalchemy.ext.declarative import declarative_base from backend.app.db.models.users import UserModel # noqa +from backend.app.db.models.materials import MaterialType Base = declarative_base() @@ -225,3 +226,18 @@ class PortfolioUsers(Base): role = Column(Text, nullable=False) created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) + + +class PropertyInstalledMeasures(Base): + """ + This model keeps a record of the installed measures for each property, at the UPRN level + """ + __tablename__ = 'property_installed_measures' + id = Column(Integer, primary_key=True, autoincrement=True) + uprn = Column(Integer, nullable=False) + measure_type = Column( + Enum(MaterialType, values_callable=lambda x: [e.value for e in x], create_constraint=False), + nullable=False + ) + created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) + installed_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) diff --git a/backend/app/db/models/tasks.py b/backend/app/db/models/tasks.py index d8007dcd..cfe18d83 100644 --- a/backend/app/db/models/tasks.py +++ b/backend/app/db/models/tasks.py @@ -1,6 +1,4 @@ -from __future__ import annotations - -from typing import Optional, List +from typing import Optional from datetime import datetime from uuid import UUID, uuid4 @@ -10,64 +8,29 @@ from sqlmodel import SQLModel, Field, Relationship class Task(SQLModel, table=True): __tablename__ = "tasks" - id: UUID = Field( - default_factory=uuid4, - primary_key=True, - index=True, - ) - - taskSource: str = Field(alias="task_source") - - jobStarted: Optional[datetime] = Field( - default=None, alias="job_started" - ) - jobCompleted: Optional[datetime] = Field( - default=None, alias="job_completed" - ) - + id: UUID = Field(default_factory=uuid4, primary_key=True, index=True, ) + task_source: str + job_started: Optional[datetime] = None + job_completed: Optional[datetime] = None status: str = Field(default="In Progress") service: Optional[str] = None + updated_at: datetime = Field(default_factory=datetime.utcnow) - updatedAt: datetime = Field( - default_factory=datetime.utcnow, - alias="updated_at", - ) - - # Relationship - subTasks: List["SubTask"] = Relationship(back_populates="task") + sub_tasks: list["SubTask"] = Relationship(back_populates="task") class SubTask(SQLModel, table=True): __tablename__ = "sub_task" - id: UUID = Field( - default_factory=uuid4, - primary_key=True, - index=True, - ) - - taskId: UUID = Field( - foreign_key="tasks.id", - alias="task_id", - ) - - jobStarted: Optional[datetime] = Field( - default=None, alias="job_started" - ) - jobCompleted: Optional[datetime] = Field( - default=None, alias="job_completed" - ) + id: UUID = Field(default_factory=uuid4, primary_key=True, index=True, ) + task_id: UUID = Field(foreign_key="tasks.id") + job_started: Optional[datetime] = None + job_completed: Optional[datetime] = None status: str = Field(default="In Progress") - inputs: Optional[str] = None outputs: Optional[str] = None - cloudLogsURL: Optional[str] = Field(alias="cloud_logs_url") + cloud_logs_url: Optional[str] = None + updated_at: datetime = Field(default_factory=datetime.utcnow) - updatedAt: datetime = Field( - default_factory=datetime.utcnow, - alias="updated_at", - ) - - # Relationship - task: Optional[Task] = Relationship(back_populates="subTasks") + task: Optional["Task"] = Relationship(back_populates="sub_tasks") diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index af57e35a..d143dc95 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -15,6 +15,7 @@ from utils.logger import setup_logger from backend.app.db.connection import db_engine from backend.app.db.functions.recommendations_functions import create_scenario +from backend.app.db.functions.tasks.Tasks import TasksInterface, SubTaskInterface logger = setup_logger() @@ -81,12 +82,34 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): # Insert the scenario ID into the data payload data["scenario_id"] = scenario_id + # Create a main task + task_id, _ = TasksInterface.create_task( + task_source="backend/plan/router.py:trigger_plan_entrypoint", + service="plan_engine", + inputs=data, + task_only=True + ) + + subtask_interface = SubTaskInterface() for i in range(total_chunks): # Create an entry in the request logs table index_start = i * chunk_size index_end = min((i + 1) * chunk_size, total_rows) - message_payload = {**data, "index_start": index_start, "index_end": index_end} + message_payload = { + **data, "index_start": index_start, "index_end": index_end, + } + + # Create a subtask for this chunk + subtask_id = subtask_interface.create_subtask( + task_id=task_id, + inputs=message_payload + ) + + # Add task and subtask to message + message_payload["task_id"] = str(task_id) + message_payload["subtask_id"] = str(subtask_id) + message_body = json.dumps(message_payload) response = sqs_client.send_message( diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py index 858a0a35..6f6db328 100644 --- a/backend/app/plan/schemas.py +++ b/backend/app/plan/schemas.py @@ -129,6 +129,10 @@ class PlanTriggerRequest(BaseModel): index_start: Optional[int] = None index_end: Optional[int] = None + # Task and subtask IDs + task_id: Optional[str] = None + subtask_id: Optional[str] = None + @model_validator(mode="after") def check_indexes(self): if (self.index_start is None) != (self.index_end is None): diff --git a/backend/app/plan/utils.py b/backend/app/plan/utils.py index 67b7bce1..3f2e0e1f 100644 --- a/backend/app/plan/utils.py +++ b/backend/app/plan/utils.py @@ -1,8 +1,10 @@ import msgpack +from uuid import UUID +from typing import Any from utils.s3 import read_from_s3 from backend.app.config import get_settings from backend.app.plan.data_classes import PropertyRequestData -from typing import Any +from backend.app.db.functions.tasks.Tasks import SubTaskInterface from starlette.responses import Response from utils.logger import setup_logger @@ -211,8 +213,13 @@ def parse_eco_packages(config: dict[str, Any], prepared_epc) -> tuple[list[str], return measures, mapped["target_sap"], mapped["plan_type"], already_installed -def handle_error(session, msg, status=500): +def handle_error(session, msg, e, subtask_id, status=500): # When the pipeline fails, handles error process + SubTaskInterface().update_subtask_status( + subtask_id=UUID(subtask_id), + status="failed", + outputs=str(e) + ) logger.error(msg, exc_info=True) session.rollback() return Response(status_code=status, content=msg) diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 1cd379b9..be770d8e 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -5,26 +5,20 @@ from datetime import datetime from tqdm import tqdm import pandas as pd import numpy as np -from etl.epc.Record import EPCRecord +from uuid import UUID + +from backend.Funding import Funding from backend.SearchEpc import SearchEpc + +from etl.epc.Record import EPCRecord from sqlalchemy.exc import IntegrityError, OperationalError from sqlalchemy.orm import sessionmaker from starlette.responses import Response from backend.app.config import get_settings, get_prediction_buckets from backend.app.db.connection import db_engine -from backend.app.db.functions.materials_functions import get_materials -from backend.app.db.functions.portfolio_functions import aggregate_portfolio_recommendations -from backend.app.db.functions.property_functions import ( - create_property_details_epc, create_property_targets, update_property_data, - update_or_create_property_spatial_details, ensure_property_exists -) -from backend.app.db.functions.recommendations_functions import ( - create_plan, upload_recommendations, create_scenario -) -from backend.app.db.functions.funding_functions import upload_funding -from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn -from backend.app.db.functions.address_functions import get_associated_uprns +import backend.app.db.functions as db_funcs +from backend.app.db.functions.tasks.Tasks import SubTaskInterface from backend.app.db.models.portfolio import rating_lookup from backend.app.plan.schemas import PlanTriggerRequest, WALL_INSULATION_MEASURES, ROOF_INSULATION_MEASURES @@ -33,9 +27,6 @@ from backend.app.plan.utils import ( ) from backend.app.utils import sap_to_epc import backend.app.assumptions as assumptions -from backend.app.db.functions.inspections_functions import ( - extract_inspection_data, bulk_upsert_inspections_pg -) from backend.ml_models.api import ModelApi from backend.Property import Property @@ -45,18 +36,18 @@ from recommendations.optimiser.CostOptimiser import CostOptimiser from recommendations.optimiser.GainOptimiser import GainOptimiser import recommendations.optimiser.optimiser_functions as optimiser_functions from recommendations.Recommendations import Recommendations -from utils.logger import setup_logger -from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3, read_excel_from_s3 from backend.ml_models.Valuation import PropertyValuation from etl.bill_savings.KwhData import KwhData from etl.spatial.OpenUprnClient import OpenUprnClient from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc -from backend.Funding import Funding from recommendations.optimiser.funding_optimiser import optimise_with_funding_paths from recommendations.recommendation_utils import convert_thickness_to_numeric, get_wall_u_value +from utils.logger import setup_logger +from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3, read_excel_from_s3 + logger = setup_logger() BATCH_SIZE = 5 @@ -392,6 +383,26 @@ def parse_heating_system(config): return None +def check_duplicate_uprns(plan_input): + """ + Simple function to check if the input data contains duplicated UPRNS. + If there are duplicates, an exception will be rasied + :return: + """ + # Check for duplicate UPRNS + input_uprns = [x.get("uprn") for x in plan_input if "uprn" in x and x.get("uprn")] + + if input_uprns: + # Check for dupes + if len(input_uprns) != len(set(input_uprns)): + # Find the duplicate UPRNs + duplicates = set([x for x in input_uprns if input_uprns.count(x) > 1]) + # de-dupe input_uprns + raise ValueError(f"Duplicate UPRNs in the input data: {duplicates}") + + return True + + async def model_engine(body: PlanTriggerRequest): logger.info("Model Engine triggered with body: %s", json.loads(body.model_dump_json())) @@ -421,6 +432,9 @@ async def model_engine(body: PlanTriggerRequest): ) # Where the EPC has been estimated, that is because a UPRN wasn't avaialble and so we remove UPRN # This will be reflexted + if "estimated" not in plan_input.columns: + plan_input["estimated"] = False + plan_input["uprn"] = np.where( plan_input["estimated"].isin([1, True]) & ( (plan_input["uprn"] < 0) | pd.isnull(plan_input["uprn"]) @@ -480,16 +494,8 @@ async def model_engine(body: PlanTriggerRequest): if body.index_start is not None and body.index_end is not None: plan_input = plan_input[body.index_start:body.index_end] - # Check for duplicate UPRNS - input_uprns = [x.get("uprn") for x in plan_input if "uprn" in x and x.get("uprn")] - - if input_uprns: - # Check for dupes - if len(input_uprns) != len(set(input_uprns)): - # Find the duplicate UPRNs - duplicates = set([x for x in input_uprns if input_uprns.count(x) > 1]) - # de-dupe input_uprns - raise ValueError(f"Duplicate UPRNs in the input data: {duplicates}") + # Confirm no duplicate UPRNS + check_duplicate_uprns(plan_input) # If we have patches or overrides, we should read them in here patches, already_installed, non_invasive_recommendations, valuation_data = get_request_property_data(body) @@ -515,10 +521,18 @@ async def model_engine(body: PlanTriggerRequest): if uprn: uprn = int(float(uprn)) + epc_api_data, epc_page, rrn, epc_cache = None, None, None, {} + if uprn: + # if we have a UPRN, we check if we already have EPC data associated with this UPRN + epc_cache = db_funcs.epc_functions.EpcStoreService.get_epc_for_uprn(session, uprn) + + if epc_cache["status"] == db_funcs.epc_functions.EpcStoreService.FRESH: + epc_api_data, epc_page, rrn = epc_cache["epc_api"], epc_cache["epc_page"], epc_cache["epc_page_rrn"] + address1 = config.get("address", None) # Handle domna address list format if pd.isnull(address1) and body.file_format == "domna_asset_list": - address1 = config.get("domna_full_address", None) + address1 = config.get("domna_address_1", None) address1 = str(int(address1)) if isinstance(address1, float) else str(address1) full_address = config["domna_full_address"] if body.file_format == "domna_asset_list" else None @@ -528,7 +542,7 @@ async def model_engine(body: PlanTriggerRequest): if (body.event_type == "remote_assessment") and config.get("property_type") == "Flat": # We're running a remote assessment for a flat - we go and grab the associated # UPRNS for other units in the same building - associated_uprns = get_associated_uprns( + associated_uprns = db_funcs.address_functions.get_associated_uprns( session, postcode=config["postcode"], uprn=uprn ) @@ -545,16 +559,20 @@ async def model_engine(body: PlanTriggerRequest): epc_searcher.ordnance_survey_client.built_form = config.get("built_form", None) epc_searcher.ordnance_survey_client.property_type = config.get("property_type", None) # For the moment, our OS API access is unavailable, so we skip and interpolate - epc_searcher.find_property(skip_os=True) + + epc_searcher.find_property(skip_os=True, api_data=epc_api_data) + if epc_searcher.newest_epc.get("estimated") and body.file_format == "domna_asset_list" and ( epc_searcher.newest_epc["uprn"] < 0 ): epc_searcher.newest_epc["uprn-source"] = epc_searcher.UPRN_SOURCE_SIMULATED # We check for an energy assessment we have performed on this property: - energy_assessment = get_latest_assessment_by_uprn(session, uprn if uprn is not None else epc_searcher.uprn) + energy_assessment = db_funcs.energy_assessment_functions.get_latest_assessment_by_uprn( + session, uprn if uprn is not None else epc_searcher.uprn + ) - property_id, is_new = ensure_property_exists( + property_id, is_new = db_funcs.property_functions.ensure_property_exists( session, body, epc_searcher, energy_assessment, landlord_property_id=config.get("landlord_property_id") ) if not property_id: @@ -570,7 +588,7 @@ async def model_engine(body: PlanTriggerRequest): ) if is_new: - create_property_targets( + db_funcs.property_functions.create_property_targets( session, property_id=property_id, portfolio_id=body.portfolio_id, @@ -599,18 +617,19 @@ async def model_engine(body: PlanTriggerRequest): patch = req_data.patch # if we have a remote assment data type, we pull the additional data and include it + epc_page_source = {} if (body.event_type == "remote_assessment") and not (epc_searcher.newest_epc.get("estimated")): logger.info("Retrieving find my epc data") try: - property_non_invasive_recommendations, patch = RetrieveFindMyEpc.get_from_epc( - epc_searcher.newest_epc + property_non_invasive_recommendations, patch, epc_page_source = RetrieveFindMyEpc.get_from_epc( + epc_searcher.newest_epc, epc_page, rrn=rrn ) except Exception as e: logger.error(f"Failed to retrieve without cleaning address {e}") for k in ["address", "address1"]: epc_searcher.newest_epc[k] = epc_searcher.address_clean - property_non_invasive_recommendations, patch = RetrieveFindMyEpc.get_from_epc( - epc_searcher.newest_epc + property_non_invasive_recommendations, patch, epc_page_source = RetrieveFindMyEpc.get_from_epc( + epc_searcher.newest_epc, epc_page, rrn=rrn ) # If we have a property type, this means when we pull the epc data, we might need to make a patch @@ -627,7 +646,7 @@ async def model_engine(body: PlanTriggerRequest): eco_packages[property_id] = parse_eco_packages(config, prepared_epc) # Final step - extract inspections data, if we have it - we inject into property for usage - property_inspections = extract_inspection_data(config) + property_inspections = db_funcs.inspections_functions.extract_inspection_data(config) if property_inspections: inspections_map[property_id] = property_inspections @@ -647,6 +666,24 @@ async def model_engine(body: PlanTriggerRequest): ) ) + # If we have: + # 1) No EPC API data + # 2) A real EPC + # 3) A UPRN (meaning that a UPRN could be fetched against that property) + # We store this data + + if db_funcs.epc_functions.EpcStoreService.check_insert_needed( + epc_cache, epc_searcher.newest_epc.get("estimated"), epc_searcher.uprn + ): + # We store the EPC data we have found for this property + db_funcs.epc_functions.EpcStoreService.upsert_epc_data( + session=session, + uprn=epc_searcher.uprn, + epc_api=epc_searcher.data, + epc_page=epc_page_source.get("page_source"), + epc_page_rrn=epc_page_source.get("rrn"), + ) + if not input_properties: return Response(status_code=204) @@ -654,7 +691,7 @@ async def model_engine(body: PlanTriggerRequest): # aginst each property if if inspections_map: logger.info("Inserting inspections data") - bulk_upsert_inspections_pg(session, inspections_map) + db_funcs.inspections_functions.bulk_upsert_inspections_pg(session, inspections_map) # Set up model api and warm up the lambdas model_api = ModelApi( @@ -671,7 +708,7 @@ async def model_engine(body: PlanTriggerRequest): # consistent requests to the backend for # the same data logger.info("Reading in materials and cleaned datasets") - materials = get_materials(session) + materials = db_funcs.materials_functions.get_materials(session) cleaned = get_cleaned() project_scores_matrix, partial_project_scores_matrix, whlg_eligible_postcodes = get_funding_data() @@ -1096,7 +1133,7 @@ async def model_engine(body: PlanTriggerRequest): # We don't need to create a new scenario, we just use the existing one scenario_id = body.scenario_id else: - engine_scenario = create_scenario( + engine_scenario = db_funcs.recommendations_functions.create_scenario( session=session, scenario={ "name": body.scenario_name, @@ -1140,24 +1177,26 @@ async def model_engine(body: PlanTriggerRequest): ) property_value_increase_ranges[p.id] = valuations + # TODO - this is not right, especially if the existing run failed if p.is_new: property_details_epc = p.get_property_details_epc( portfolio_id=body.portfolio_id, rating_lookup=rating_lookup, ) - create_property_details_epc(session, property_details_epc) + db_funcs.property_functions.create_property_details_epc(session, property_details_epc) - update_or_create_property_spatial_details(session, p.uprn, p.spatial) + db_funcs.property_functions.update_or_create_property_spatial_details( + session, p.uprn, p.spatial + ) property_data = p.get_full_property_data(current_valuation=valuations["current_value"]) - update_property_data( + db_funcs.property_functions.update_property_data( session, property_id=p.id, portfolio_id=body.portfolio_id, property_data=property_data ) if not recommendations_to_upload: continue - - new_plan_id = create_plan(session, { + new_plan_id = db_funcs.recommendations_functions.create_plan(session, { "portfolio_id": body.portfolio_id, "property_id": p.id, "scenario_id": scenario_id, @@ -1175,11 +1214,10 @@ async def model_engine(body: PlanTriggerRequest): "plan_type": eco_packages.get(p.id, (None, None, None))[2] }) - upload_recommendations( + db_funcs.recommendations_functions.upload_recommendations( session, recommendations_to_upload, p.id, new_plan_id ) - - upload_funding(session, p, new_plan_id, recommendations_to_upload) + db_funcs.funding_functions.upload_funding(session, p, new_plan_id, recommendations_to_upload) if valuations["current_value"] > 0: property_valuation_increases.append( @@ -1218,7 +1256,7 @@ async def model_engine(body: PlanTriggerRequest): property_value_increase_ranges=property_value_increase_ranges ) - aggregate_portfolio_recommendations( + db_funcs.portfolio_functions.aggregate_portfolio_recommendations( session, portfolio_id=body.portfolio_id, scenario_id=scenario_id, @@ -1230,17 +1268,20 @@ async def model_engine(body: PlanTriggerRequest): # Commit final changes session.commit() - except IntegrityError: - return handle_error(session, "Database integrity error.", 500) - except OperationalError: - return handle_error(session, "Database operational error.", 500) - except ValueError: - return handle_error(session, "Bad request: malformed data.", 400) + except IntegrityError as e: + return handle_error(session, "Database integrity error.", e, body.subtask_id, 500) + except OperationalError as e: + return handle_error(session, "Database operational error.", e, body.subtask_id, 500) + except ValueError as e: + return handle_error(session, "Bad request: malformed data.", e, body.subtask_id, 400) except Exception as e: # General exception handling - return handle_error(session, "An unexpected error occurred.", 500) + return handle_error(session, "An unexpected error occurred.", e, body.subtask_id, 500) finally: session.close() + # Mark the subtask as successful + SubTaskInterface().update_subtask_status(subtask_id=UUID(body.subtask_id), status="failed") + logger.info("Model Engine completed successfully") return Response(status_code=200) diff --git a/backend/tests/test_integration.py b/backend/tests/test_integration.py index ac61a09c..45dd109a 100644 --- a/backend/tests/test_integration.py +++ b/backend/tests/test_integration.py @@ -93,7 +93,7 @@ costs_by_floor_area = costs_by_floor_area.groupby("current-energy-efficiency")[ epc_data = epc_data[~pd.isnull(epc_data["UPRN"])] sample_epc_data = epc_data[pd.to_datetime(epc_data["LODGEMENT_DATE"]) >= "2015-01-01"].drop_duplicates("UPRN").sample( - 5000).reset_index(drop=True) + 10000).reset_index(drop=True) # TODO: In Property find_energy_sources, sort out biomass community heating - what fuel type # TODO: We might be able to remove find_energy_sources entirely and remove estimate_electrical_consumption. It's used diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py b/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py index c68a0b58..77736aff 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py @@ -10,6 +10,7 @@ import json import time import os import pandas as pd +import numpy as np from tqdm import tqdm from dotenv import load_dotenv from asset_list.utils import get_data_for_property @@ -52,8 +53,6 @@ n_postcodes = property_list["Post Code"].nunique() postcode_summary = property_list.groupby("Post Code")["UPRN"].count().reset_index() postcode_summary["UPRN"].mean() -test_match = property_list.merge(sustainability_data, left_on="UPRN", right_on="Org Ref") - def classify_floor_area(x): if x <= 72: @@ -70,12 +69,187 @@ sustainability_data["Floor Area Band"] = sustainability_data["Total Floor Area ( lambda x: classify_floor_area(x) ) -archetypes = sustainability_data[ - ["Type", "Attachment", "Construction Years", "Wall Construction", "Wall Insulation", - "Roof Construction", "Roof Insulation", "Floor Construction", "Floor Insulation", - "Glazing", "Heating", "Boiler Efficiency", "Main Fuel", "Controls Adequacy", - "Floor Area Band"] -].drop_duplicates() +# Archetype reductions + +# Roof insulation category +# 1) Split roof insulation into > 100mm loft and <= 100mm loft +sustainability_data["Roof Insulation Category"] = sustainability_data["Roof Insulation"].copy() +sustainability_data["Roof Insulation Category"] = np.where( + sustainability_data["Roof Insulation Category"].isin( + ['mm200', 'mm300', 'mm250', 'mm150', 'mm270', 'mm400', 'mm350'], + ), + "LI > 100mm", + sustainability_data["Roof Insulation Category"], +) + +sustainability_data["Roof Insulation Category"] = np.where( + sustainability_data["Roof Insulation Category"].isin( + ['mm100', 'mm50', 'mm75', 'mm25'], + ), + "LI <= 100mm", + sustainability_data["Roof Insulation Category"], +) + +# 2) Group all of the glazed together (e.g. double glazed, secondary glazed, triple glazed) +sustainability_data["Glazing Type"] = sustainability_data["Glazing"].copy() +sustainability_data["Glazing Type"] = np.where( + sustainability_data["Glazing Type"].isin( + ['Double 2002 or later', 'Double before 2002', 'Double but age unknown', 'DoubleKnownData'] + ), + "Double Glazed", + sustainability_data["Glazing Type"], +) +sustainability_data["Glazing Type"] = np.where( + sustainability_data["Glazing Type"].isin(['Triple', 'TripleKnownData']), + "Triple Glazed", + sustainability_data["Glazing Type"], +) + +# 3) Group up boiler efficiency A, B-D, E - G? or someting like this +sustainability_data["Boiler Efficiency Group"] = sustainability_data["Boiler Efficiency"].copy() +sustainability_data["Boiler Efficiency Group"] = np.where( + sustainability_data["Boiler Efficiency Group"].isin(['B', 'C', 'D']), + "B-D", + sustainability_data["Boiler Efficiency Group"], +) +sustainability_data["Boiler Efficiency Group"] = np.where( + sustainability_data["Boiler Efficiency Group"].isin(['E', 'F', 'G']), + "E-G", + sustainability_data["Boiler Efficiency Group"], +) + +# 4) Group up main fuel into gas, electric, oil, other? +sustainability_data["Main Fuel Group"] = sustainability_data["Main Fuel"].copy() +sustainability_data["Main Fuel Group"] = np.where( + sustainability_data["Main Fuel Group"].isin( + ["SmokelessCoal", "BiomassCommunity", "B30DCommunity"] + ), + "Other Fuel", + sustainability_data["Main Fuel Group"], +) + +# 5) Wall Construction - group up Sandstone and Granite into one category +sustainability_data["Wall Construction"] = np.where( + sustainability_data["Wall Construction"].isin(["Sandstone", "Granite"]), + "Sandstone/Granite", + sustainability_data["Wall Construction"] +) + +sustainability_data["Wall Construction"] = np.where( + sustainability_data["Wall Construction"].isin(["Timber Frame", "System", "Solid Brick"]), + "Solid", + sustainability_data["Wall Construction"] +) + +# 6) Reduce or remove floor construction +sustainability_data["Floor Construction"] = np.where( + sustainability_data["Floor Construction"].isin(["SuspendedTimber", "SuspendedNotTimber"]), + "Suspended Floor", + sustainability_data["Floor Construction"] +) + +# 7) Reduce wall insulation +sustainability_data["Wall Insulation"] = np.where( + sustainability_data["Wall Insulation"].isin( + ["FilledCavityPlusInternal", "FilledCavityPlusExternal", "FilledCavity", "External", "Internal"] + ), + "Insulated", + sustainability_data["Wall Insulation"] +) + +# 8) Fill floor insulation +sustainability_data["Floor Insulation"] = sustainability_data["Floor Insulation"].fillna("Unknown") + +# 9) Reduce Age bands +sustainability_data["Construction Years"] = np.where( + sustainability_data["Construction Years"].isin(["2003-2006", "2007-2011", "2012 onwards"]), + "2003 onwards", + sustainability_data["Construction Years"], +) + +sustainability_data["Construction Years"] = np.where( + sustainability_data["Construction Years"].isin(["Before 1900", "1900-1929"]), + "Before 1929", + sustainability_data["Construction Years"], +) + +sustainability_data["Construction Years"] = np.where( + sustainability_data["Construction Years"].isin(["1983-1990", "1991-1995"]), + "1983-1995", + sustainability_data["Construction Years"], +) + +sustainability_data["Construction Years"] = np.where( + sustainability_data["Construction Years"].isin(["1950-1966", "1967-1975", "1976-1982"]), + "1950-1982", + sustainability_data["Construction Years"], +) + +# Roof +sustainability_data["Roof Construction"] = np.where( + sustainability_data["Roof Construction"].isin( + ["PitchedNormalLoftAccess", "PitchedThatched", "PitchedNormalNoLoftAccess", "PitchedWithSlopingCeiling"] + ), + "Pitched Roof", + sustainability_data["Roof Construction"] +) + +archetype_variables = [ + "Type", "Attachment", "Construction Years", "Wall Construction", "Wall Insulation", + "Roof Construction", "Roof Insulation Category", "Floor Construction", "Floor Insulation", + "Glazing Type", "Heating", "Boiler Efficiency Group", "Main Fuel Group", "Controls Adequacy", + "Floor Area Band" +] + +archetypes = sustainability_data[archetype_variables + ["UPRN"]].dropna().groupby(archetype_variables)[ + "UPRN"].nunique().reset_index().rename(columns={"UPRN": "Count"}).sort_values(by="Count", + ascending=False).reset_index( + drop=True) + +# We take a sample that represents 95% of the properties +archetypes["Cumulative Count"] = archetypes["Count"].cumsum() +archetypes["Cumulative Proportion"] = archetypes["Cumulative Count"] / archetypes["Count"].sum() + +archetypes_85 = archetypes[archetypes["Cumulative Proportion"] <= 0.80] +archetypes_85["Archetypes_85_reference"] = archetypes_85.index + 1 +archetypes_85["Archetypes_85_reference"] = "Archetype_Sample_" + archetypes_85["Archetypes_85_reference"].astype(str) + +# We now take a sample of the properties that represent 85% of the total properties +sustainability_data = sustainability_data.merge( + archetypes_85, + on=archetype_variables, + how="inner" +) +# We take 1 random property, by archetype 85 reference +modelling_sample = sustainability_data.groupby("Archetypes_85_reference").apply( + lambda x: x.sample(1, random_state=42) +).reset_index(drop=True) + + +# Checking distributions +def compare_distributions(full_df, sample_df, column): + full_dist = full_df[column].value_counts(normalize=True) + sample_dist = sample_df[column].value_counts(normalize=True) + comparison = pd.concat([full_dist, sample_dist], axis=1, keys=['Full', 'Sample']).fillna(0) + return comparison + + +for col in archetype_variables: + print(f"--- {col} ---") + print(compare_distributions(sustainability_data, modelling_sample, col)) + +# Save this CSV as input +modelling_sample.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/modelling_sample.xlsx", +) +# Save the archetype definitions +archetypes_85.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/archetypes_85.xlsx", +) +# Save the full archetypes +archetypes.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/full_archetypes.xlsx", +) # Maps the property types to the format recognised by the EPC api property_type_map = {} diff --git a/etl/epc/settings.py b/etl/epc/settings.py index 47a75def..16619fa2 100644 --- a/etl/epc/settings.py +++ b/etl/epc/settings.py @@ -20,6 +20,7 @@ DATA_ANOMALY_MATCHES = { # certificate retrieval process is successfully completed. Mandatory data items cannot be applied # retrospectively to energy certificates lodged before the date of the change. "Not recorded", + "Not Recorded", # The data also contains DECs with an operational rating of ‘9999’ (a ‘default’ DEC). The production of a # ‘default’ DEC value was allowed to enable building occupiers, with poor quality or no energy data, # the opportunity to comply with the regulations. From April 2011 the ability to lodge a ‘default’ DEC was no diff --git a/etl/find_my_epc/RetrieveFindMyEpc.py b/etl/find_my_epc/RetrieveFindMyEpc.py index c9cca011..519c3e52 100644 --- a/etl/find_my_epc/RetrieveFindMyEpc.py +++ b/etl/find_my_epc/RetrieveFindMyEpc.py @@ -21,14 +21,16 @@ class RetrieveFindMyEpc: 'Chrome/111.0.0.0 Safari/537.36' } - def __init__(self, address: str, postcode: str): + def __init__(self, address: str, postcode: str, rrn: str = None): """ This class is tasked with retrieving the latest EPC data from the find my epc website :param address: The address of the property :param postcode: The postcode of the property + :param rrn: The RRN of the EPC (if known) """ self.address = address self.postcode = postcode + self.rrn = rrn self.address_cleaned = self.address.replace(",", "").replace(" ", "").lower() self.walls = [] @@ -286,54 +288,12 @@ class RetrieveFindMyEpc: :return: """ - postcode_input = self.postcode.replace(" ", "+") - postcode_search = self.SEARCH_POSTCODE_URL.format(postcode_input=postcode_input) - postcode_response = requests.get(postcode_search, headers=self.HEADERS) - - postcode_res = BeautifulSoup(postcode_response.text, features="html.parser") - rows = postcode_res.find_all('tr', class_='govuk-table__row') - - extracted_table = [] - for row in rows: - # Extract the address and URL - address_tag = row.find('a', class_='govuk-link') - if address_tag is None: - continue - extracted_address = None - extracted_address_url = None - if address_tag: - extracted_address = address_tag.text.strip() - extracted_address_url = address_tag['href'] - - extracted_address_cleaned = extracted_address.replace(",", "").replace(" ", "").lower() - if not extracted_address_cleaned.startswith(self.address_cleaned): - continue - - # If the address is a match, we can extract the data - - # Extract the expiry date - expiry_date_tag = row.find('td', class_='govuk-table__cell date') - expiry_date = None - if expiry_date_tag is not None: - expiry_date = expiry_date_tag.parent.find('span').text.strip() - - extracted_table.append( - { - "extracted_address": extracted_address, - "extracted_address_url": extracted_address_url, - "expiry_date": datetime.strptime(expiry_date, '%d %B %Y'), - } - ) - - if not extracted_table: - raise ValueError("No EPC found") - - if len(extracted_table) > 1: - # We take the one with the most recent expiry date - extracted_table = sorted(extracted_table, key=lambda x: x['expiry_date'], reverse=True) - - chosen_epc = self.BASE_ENERGY_URL + extracted_table[0]['extracted_address_url'] - epc_certificate = chosen_epc.split('/')[-1] + if self.rrn: + # We build the URL directly + epc_certificate = self.rrn + chosen_epc = f"{self.BASE_ENERGY_URL}/energy-certificate/{epc_certificate}" + else: + chosen_epc, epc_certificate = self._find_epc_page() address_response = requests.get(chosen_epc, headers=self.HEADERS) address_res = BeautifulSoup(address_response.text, features="html.parser") @@ -371,9 +331,12 @@ class RetrieveFindMyEpc: return all_find_my_epc_data - def retrieve_newest_find_my_epc_data(self, sap_2012_date=None, return_page=False): + def _find_epc_page(self): """ - For a post code and address, we pull out all the required data from the find my epc website + This function is used to find the EPC page source for a given address and postcode. + It is done by fetching the page, associating to the postcode and then matching the + addresses on the page to the address we have been given. + :return: """ postcode_input = self.postcode.replace(" ", "+") @@ -398,6 +361,7 @@ class RetrieveFindMyEpc: extracted_address_cleaned = ( extracted_address.replace(",", "").replace(" ", "").lower() ) + if not extracted_address_cleaned.startswith(self.address_cleaned): continue @@ -427,8 +391,28 @@ class RetrieveFindMyEpc: chosen_epc = self.BASE_ENERGY_URL + extracted_table[0]['extracted_address_url'] epc_certificate = chosen_epc.split('/')[-1] - address_response = requests.get(chosen_epc, headers=self.HEADERS) - address_res = BeautifulSoup(address_response.text, features="html.parser") + return chosen_epc, epc_certificate + + def retrieve_newest_find_my_epc_data(self, sap_2012_date=None, return_page=False, epc_page_source=None, rrn=None): + """ + For a post code and address, we pull out all the required data from the find my epc website + """ + + if epc_page_source is None and rrn is None: + chosen_epc, rrn = self._find_epc_page() + address_response = requests.get(chosen_epc, headers=self.HEADERS) + epc_page_source = address_response.text + address_res = BeautifulSoup(address_response.text, features="html.parser") + elif self.rrn: + epc_certificate = self.rrn + chosen_epc = f"{self.BASE_ENERGY_URL}/energy-certificate/{epc_certificate}" + address_response = requests.get(chosen_epc, headers=self.HEADERS) + epc_page_source = address_response.text + address_res = BeautifulSoup(address_response.text, features="html.parser") + else: + if rrn is None: + raise ValueError("rrn must be provided if epc_page_source is provided") + address_res = BeautifulSoup(epc_page_source, features="html.parser") # Key data we want to retrieve: # 1) Rating @@ -563,8 +547,21 @@ class RetrieveFindMyEpc: # 5) Pull out the EPC data epc_data = self.extract_epc_data(address_res) + # Pull out the address information which can be found in the box with the class "epc-address" + # We split it up on break tags + addr = address_res.find("p", class_="epc-address").get_text(separator="\n").strip() + lines = addr.split("\n") + if len(lines) > 2: + address1 = lines[0] + address2 = lines[1] + postcode = lines[-1] + else: + address1 = lines[0] + address2 = "" + postcode = lines[-1] + resulting_data = { - 'epc_certificate': epc_certificate, + 'epc_certificate': rrn, 'current_epc_rating': current_rating.split(' ')[-6], 'current_epc_efficiency': current_sap, 'potential_epc_rating': potential_rating.split(' ')[-6], @@ -575,11 +572,16 @@ class RetrieveFindMyEpc: "epc_data": epc_data, **assessment_data, **low_carbon_energy_sources, + "page_source": epc_page_source, + # Add in address a postcode from the page - covers use cases where we are given RRN + "address1": address1, + "address2": address2, + "postcode": postcode, } if return_page: # We return the page text as well, which can be parsed again later - return resulting_data, postcode_response.text + return resulting_data, epc_page_source return resulting_data @@ -721,11 +723,15 @@ class RetrieveFindMyEpc: return formatted_recommendations @classmethod - def get_from_epc(cls, epc): + def get_from_epc(cls, epc, epc_page_source=None, rrn=None): + + if epc_page_source is not None and rrn is None: + raise ValueError("rrn must be provided if epc_page_source is provided") + # Attempt both methods: try: searcher = cls(address=epc["address"], postcode=epc["postcode"]) - find_epc_data = searcher.retrieve_newest_find_my_epc_data() + find_epc_data = searcher.retrieve_newest_find_my_epc_data(epc_page_source=epc_page_source, rrn=rrn) except Exception as e: logger.error(f"Error retrieving find my epc data: {e}") @@ -733,7 +739,7 @@ class RetrieveFindMyEpc: address1 = ",".join(epc["address"].split(",")[:-1]) try: searcher = cls(address=address1, postcode=epc["postcode"]) - find_epc_data = searcher.retrieve_newest_find_my_epc_data() + find_epc_data = searcher.retrieve_newest_find_my_epc_data(epc_page_source=epc_page_source, rrn=rrn) logger.info("Successfully retrieved find my epc data using trimmed address") except Exception as e2: logger.error(f"Error retrieving find my epc data using trimmed address: {e2}") @@ -746,7 +752,7 @@ class RetrieveFindMyEpc: address1 = epc["address1"] # We attempt with the backup add searcher = cls(address=address1, postcode=epc["postcode"]) - find_epc_data = searcher.retrieve_newest_find_my_epc_data() + find_epc_data = searcher.retrieve_newest_find_my_epc_data(epc_page_source=epc_page_source, rrn=rrn) logger.info("Successfully retrieved find my epc data using backup address") non_invasive_recommendations = { @@ -765,4 +771,9 @@ class RetrieveFindMyEpc: **find_epc_data["epc_data"], } - return non_invasive_recommendations, patch + page_source = { + "rrn": find_epc_data["epc_certificate"], + "page_source": find_epc_data["page_source"] + } + + return non_invasive_recommendations, patch, page_source