From 30847ded90ef5c7b7442da62a5b60bd5d74833f7 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 13 May 2025 11:53:38 +0100 Subject: [PATCH] debugging incorrect fetching of flat data --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- asset_list/AssetList.py | 12 +- asset_list/app.py | 541 ++++--------------------- asset_list/mappings/built_form.py | 69 +++- asset_list/mappings/heating_systems.py | 5 +- asset_list/mappings/property_type.py | 28 +- backend/SearchEpc.py | 33 +- etl/customers/mhs/flag_pilot.py | 134 ++++++ 9 files changed, 355 insertions(+), 471 deletions(-) create mode 100644 etl/customers/mhs/flag_pilot.py diff --git a/.idea/Model.iml b/.idea/Model.iml index 96ad7a95..df6c4faa 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index fb10c6b0..50cad4ca 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index 94c3c235..b7dd8d70 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -739,6 +739,11 @@ class AssetList: self.standardised_asset_list[self.landlord_year_built].replace(self.DATETIME_REMAP) ) + no_data_codes = {"No Data": None} + self.standardised_asset_list[self.landlord_year_built] = ( + self.standardised_asset_list[self.landlord_year_built].replace(no_data_codes) + ) + self.standardised_asset_list[self.landlord_year_built] = pd.to_datetime( self.standardised_asset_list[self.landlord_year_built] ) @@ -759,7 +764,8 @@ class AssetList: "This cell has an external reference that can't be shown or edited. Editing this cell will " "remove the external reference.", "ND", - 'PIMSS EMPTY' + 'PIMSS EMPTY', + "UNKNOWN" ] if pd.isnull(date_str) or date_str in known_errors or (date_str == 0): @@ -1229,11 +1235,11 @@ class AssetList: elif self.old_format_non_intrusives_present: non_intrusives_wall_filter = ( self.standardised_asset_list['non-intrusives: WFT Findings'].str.lower().str.strip().isin( - ["empty cavity", "partial fill"] + ["empty cavity", "partial fill", "empty", "EMPTY CAVITY 70MM", "partial"] ) | ( ( self.standardised_asset_list['non-intrusives: WFT Findings'] - .str.lower().str.strip().str.contains("empty cavity|partial fill") & + .str.lower().str.strip().str.contains("empty cavity|partial fill|empty|partial") & ~self.standardised_asset_list['non-intrusives: WFT Findings'] .astype(str).str.lower().str.strip().str.contains("major access issues") ) diff --git a/asset_list/app.py b/asset_list/app.py index be2ef031..d5ce7226 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -62,22 +62,83 @@ def app(): Property UPRN """ - # Community Housing new list - data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Community Housing/Programme " - "Reconciliation") - data_filename = "SUB EPC C to Domna.xlsx" - sheet_name = "Sheet1" - postcode_column = 'POSTCODE' - fulladdress_column = None - address1_column = "ADDRESS" - address1_method = None - address_cols_to_concat = ["ADDRESS", "ESTATE", "TOWN"] + # Thurrock + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thurrock" + data_filename = "THURROCK COUNCIL.xlsx" + sheet_name = "Assets" + postcode_column = 'Postcode' + fulladdress_column = "Full Address" + address1_column = None + address1_method = "house_number_extraction" + address_cols_to_concat = [] missing_postcodes_method = None - landlord_year_built = "BUILD DATE" + landlord_year_built = "Construction Date" landlord_os_uprn = None - landlord_property_type = "PROPERTY TYPE" - landlord_built_form = "PROPERTY TYPE" - landlord_wall_construction = "CONSTRUCTION TYPE" + landlord_property_type = "Property Type" + landlord_built_form = "Property Subtype" + landlord_wall_construction = None + landlord_roof_construction = None + landlord_heating_system = "Main Heating Type" + landlord_existing_pv = None + landlord_property_id = "Property Reference" + landlord_sap = None + outcomes_filename = [] + outcomes_sheetname = [] + outcomes_postcode = [] + outcomes_houseno = [] + outcomes_id = [] + outcomes_address = [] + master_filepaths = [] + master_to_asset_list_filepath = None + phase = False + ecosurv_landlords = None + + # Medway + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Medway" + data_filename = "MEDWAY Asset List.xlsx" + sheet_name = "Asset list" + postcode_column = 'Postcode' + fulladdress_column = None + address1_column = "House Number" + address1_method = None + address_cols_to_concat = ["House Number", "Street 1"] + missing_postcodes_method = None + landlord_year_built = "Year Built" + landlord_os_uprn = None + landlord_property_type = "Property Type - Academy" + landlord_built_form = "Property Type - Academy" + landlord_wall_construction = None + landlord_roof_construction = None + landlord_heating_system = None + landlord_existing_pv = None + landlord_property_id = "Row ID" + landlord_sap = None + outcomes_filename = [] + outcomes_sheetname = [] + outcomes_postcode = [] + outcomes_houseno = [] + outcomes_id = [] + outcomes_address = [] + master_filepaths = [] + master_to_asset_list_filepath = None + phase = False + ecosurv_landlords = None + + # MHS + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS" + data_filename = "MHS HOMES (Full Asset List) - for programme build.xlsx" + sheet_name = "Sheet1" + postcode_column = 'Postcode' + fulladdress_column = "FullAddress" + address1_column = None + address1_method = "house_number_extraction" + address_cols_to_concat = [] + missing_postcodes_method = None + landlord_year_built = "BuiltInYear" + landlord_os_uprn = None + landlord_property_type = "AssetType" + landlord_built_form = "PropertyType" + landlord_wall_construction = None landlord_roof_construction = None landlord_heating_system = None landlord_existing_pv = None @@ -94,459 +155,33 @@ def app(): phase = False ecosurv_landlords = None - # Unitas - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Unitas" - data_filename = "unitas_asset_list_for_analysis.xlsx" - sheet_name = "Sheet1" - postcode_column = 'Post Code' - fulladdress_column = "Address Line 1" - address1_column = "Address Line 1" - address1_method = None - address_cols_to_concat = [] - missing_postcodes_method = None - landlord_year_built = "built year" - landlord_os_uprn = None - landlord_property_type = "Property Type" - landlord_built_form = "Expanded Property Type" - landlord_wall_construction = None - landlord_roof_construction = "loft insulation" - landlord_heating_system = "Bolier Make" - landlord_existing_pv = None - landlord_property_id = "Property Reference" - landlord_sap = "Sap Rating" - outcomes_filename = [ - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Unitas/Unitas - All outcomes - 24.04.2025.xlsx", - ] - outcomes_sheetname = ["Feedback"] - outcomes_postcode = ["Postcode"] - outcomes_houseno = ["No."] - outcomes_id = [None] - outcomes_address = ["Address"] - master_filepaths = [ - os.path.join(data_folder, "Submissions ECO 3.csv"), - os.path.join(data_folder, "Submissions ECO 4 - PHASE 1.csv"), - os.path.join(data_folder, "Submissions ECO 4 - PHASE 2.csv") - ] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = "unitas|everill|baskeyfield" - - # LHP: - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/LHP" - data_filename = "LHP.xlsx" - sheet_name = "Decent Homes Stock" - postcode_column = 'Postcode' - fulladdress_column = "Address" - address1_column = None - address1_method = "house_number_extraction" - address_cols_to_concat = [] - missing_postcodes_method = None - landlord_year_built = "Build Date" - landlord_os_uprn = None - landlord_property_type = "Property Type" - landlord_built_form = None - landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = "Heating Type" - landlord_existing_pv = None - landlord_property_id = "Property ID" - landlord_sap = None - outcomes_filename = [ - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/LHP/LHP Outcomes.xlsx", - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/LHP/Lincolnshire Housing Partnership - Outcomes 20th " - "Feb 2024.xlsx", - ] - outcomes_sheetname = ["Sheet1", "LHP"] - outcomes_postcode = ["Postcode", "Postcode"] - outcomes_houseno = ["No.", "No."] - outcomes_id = [None, None] - outcomes_address = ["Address", "Address"] - master_filepaths = [os.path.join(data_folder, "LHP Rolling Master for analysis.csv")] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = "lhp" - - # Soverign - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sovereign" - data_filename = "Warmfront - Quote for CWI.xlsx" - sheet_name = "Sheet2" - postcode_column = 'Postcode' - fulladdress_column = None - address1_column = "Address Line 1" - address1_method = None - address_cols_to_concat = ["Address Line 1", "Address Line 2", "Address Line 3"] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = None - landlord_built_form = None - landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = None - landlord_existing_pv = None - landlord_property_id = "ID" - landlord_sap = None - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - - # NCHA - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NCHA" - data_filename = "Energy Info Copy.xlsx" - sheet_name = "Data" - postcode_column = 'Postcode' - fulladdress_column = "Address" - address1_column = None - address1_method = "house_number_extraction" - address_cols_to_concat = [] - missing_postcodes_method = None - landlord_year_built = "Build Date (HAR10)" - landlord_os_uprn = None - landlord_property_type = "Property Type (HAR10)" - landlord_built_form = "Build Form (EPC)" - landlord_wall_construction = "Wall Description" - landlord_roof_construction = None - landlord_heating_system = "Heating System" - landlord_existing_pv = None - landlord_property_id = "Place ref" - landlord_sap = "EPC SAP" - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - - # Torus - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Torus/Phase 1" - data_filename = "Torus Property Asset List - Phase 1.xlsx" - sheet_name = "TORUS" - postcode_column = 'Postcode' - fulladdress_column = None - address1_column = "AddressLine1" - address1_method = None - address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"] - missing_postcodes_method = None - landlord_year_built = "Property Age" - landlord_os_uprn = "NatUPRN" - landlord_property_type = "Property Type" - landlord_built_form = "Built Form" - landlord_wall_construction = "Wall Construction" - landlord_roof_construction = "Roof Construction" - landlord_heating_system = "Space Heating Source" - landlord_existing_pv = "Low Carbon Technology (Solar PV)" - landlord_property_id = "UPRN" - landlord_sap = "SAP Score" - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_to_asset_list_filepath = None - phase = True - # Southern Midlands - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/Midlands Properties - Apr 2025" - data_filename = "Southern Housing Midlands Property List - combined.xlsx" - sheet_name = "Sheet 1" - postcode_column = 'Post Code' - fulladdress_column = "Address" - address1_column = None - address1_method = "house_number_extraction" - address_cols_to_concat = [] - missing_postcodes_method = None - landlord_year_built = "Age_1" - landlord_os_uprn = None - landlord_property_type = "Prop_Type" - landlord_built_form = "Prop_Type" - landlord_wall_construction = "Walls_P" - landlord_heating_system = "Heating System" - landlord_existing_pv = None - landlord_property_id = "AssetID" - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_to_asset_list_filepath = None - - # PFP London - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/London" - data_filename = "PFP AREAS SURROUNDING LONDON - JAY, RUTH & LANE.xlsx" - sheet_name = "PFP SURROUNDING LONDON" - postcode_column = 'Postcode' - fulladdress_column = None - address1_column = "AddressLine1" - address1_method = None - address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = "Archetype (PFP)" - landlord_built_form = "Archetype (PFP)" - landlord_wall_construction = None - landlord_heating_system = None - landlord_existing_pv = None - landlord_property_id = "Uprn" - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - master_filepaths = [] - master_to_asset_list_filepath = None - - # PFP North-West - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/North-West" - data_filename = "Places for People NORTH WEST - INSPECTIONS MASTER - UPDATE.xlsx" - sheet_name = "CHECKED" - postcode_column = 'Postcode' - fulladdress_column = None - address1_column = "AddressLine1" - address1_method = None - address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = "Archetype (PFP)" - landlord_built_form = "Archetype (PFP)" - landlord_wall_construction = None - landlord_heating_system = None - landlord_existing_pv = None - landlord_property_id = "Uprn" - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - master_filepaths = [] - master_to_asset_list_filepath = None - - # PFP North-East - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/North-East" - data_filename = "Places for People NORTH EAST - INSPECTIONS MASTER.xlsx" - sheet_name = "CHECKED" - postcode_column = 'Postcode' - fulladdress_column = None - address1_column = "AddressLine1" - address1_method = None - address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = "Archetype (PFP)" - landlord_built_form = "Archetype (PFP)" - landlord_wall_construction = None - landlord_heating_system = None - landlord_existing_pv = None - landlord_property_id = "Uprn" - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - master_filepaths = [] - master_to_asset_list_filepath = None - - # PFP East - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/East" - data_filename = "PFP EAST - Master - DN LN NG NR PE POSTCODES.xlsx" - sheet_name = "PFP EAST" - postcode_column = 'Postcode' - fulladdress_column = None - address1_column = "AddressLine1" - address1_method = None - address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = "Archetype (PFP)" - landlord_built_form = "Archetype (PFP)" - landlord_wall_construction = None - landlord_heating_system = None - landlord_existing_pv = None - landlord_property_id = "Uprn" - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - master_filepaths = [] - master_to_asset_list_filepath = None - - # Wates - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Wates - " - data_filename = "ECO 4 Wates.xlsx" - sheet_name = "Roadmap Homes" - postcode_column = 'Postcode' - fulladdress_column = None - address1_column = "Address Line 1" - address1_method = None - address_cols_to_concat = ["Address Line 1", "Address Line 2", "Address Line 3"] - missing_postcodes_method = None - landlord_year_built = "Build Year" - landlord_os_uprn = None - landlord_property_type = "Archetype" - landlord_built_form = "Archetype" - landlord_wall_construction = "Wall" - landlord_heating_system = "Heating Type" - landlord_existing_pv = None - landlord_property_id = "UPRN" - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - master_filepaths = [] - master_to_asset_list_filepath = None - - # Ealing - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Programme data - 04032025" - # data_filename = "Ealing BC - Property Plus Tenure 25.02.2025.xlsx" - # sheet_name = "IGNORE - FULL MAIN" - # postcode_column = 'Postcode' + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/Midlands Properties - Apr 2025" + # data_filename = "Southern Housing Midlands Property List - combined.xlsx" + # sheet_name = "Sheet 1" + # postcode_column = 'Post Code' # fulladdress_column = "Address" # address1_column = None - # address1_method = "first_word" + # address1_method = "house_number_extraction" # address_cols_to_concat = [] # missing_postcodes_method = None - # landlord_year_built = "Year Built" + # landlord_year_built = "Age_1" # landlord_os_uprn = None - # landlord_property_type = "Property Type Code" - # landlord_wall_construction = None - # landlord_heating_system = None + # landlord_property_type = "Prop_Type" + # landlord_built_form = "Prop_Type" + # landlord_wall_construction = "Walls_P" + # landlord_heating_system = "Heating System" # landlord_existing_pv = None - # landlord_property_id = "Property ref" - - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester" - # data_filename = "Warmfront data- Colchester Borough Homes (Complete).xlsx" - # sheet_name = "Sheet1" - # postcode_column = 'Full Address.1' - # fulladdress_column = "Full Address" - # address1_column = None - # address1_method = "first_word" - # address_cols_to_concat = [] - # missing_postcodes_method = None - # landlord_year_built = "Build Date" - # landlord_os_uprn = None - # landlord_property_type = "Property Type" - # landlord_wall_construction = "Wallinsul" - # landlord_heating_system = "HeatSorc" - # landlord_existing_pv = None - # landlord_property_id = "Property Reference" + # landlord_property_id = "AssetID" # outcomes_filename = None # outcomes_sheetname = None # outcomes_postcode = None # outcomes_houseno = None + # outcomes_id = None + # outcomes_address = None # master_filepaths = [] # master_to_asset_list_filepath = None - # For Westward - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Westward" - data_filename = "WESTWARD - completed list - 20.03.2025.xlsx" - sheet_name = "Sheet1" - postcode_column = "WFT EDIT Postcode" - fulladdress_column = "Address" - address1_column = None - address1_method = "house_number_extraction" - address_cols_to_concat = [] - missing_postcodes_method = None - landlord_year_built = "Build date" - landlord_os_uprn = "UPRN" - landlord_property_type = "Location type" - landlord_built_form = None - landlord_wall_construction = "Wall Construction (EPC)" - landlord_heating_system = "Heat Source" - landlord_existing_pv = "PV (Y/N)" - landlord_property_id = "Place ref" - landlord_roof_construction = None - landlord_sap = None - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - master_filepaths = [] - master_to_asset_list_filepath = None - outcomes_id = None - outcomes_address = None - phase = False - ecosurv_landlords = None - - # For ACIS - programme re-build - # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/ACIS/ACIS Full Programme Review March 2025" - # data_filename = "ACIS asset list.xlsx" - # sheet_name = "Assets" - # address1_column = "House No" - # postcode_column = "Postcode" - # landlord_property_id = "UPRN" - # fulladdress_column = None - # address_cols_to_concat = ["House No", "Street", "Town"] - # missing_postcodes_method = None - # address1_method = None - # landlord_year_built = "YEAR BUILT" - # landlord_os_uprn = None - # landlord_property_type = "Property type" - # landlord_built_form = None - # landlord_wall_construction = "Wall Constuction" - # landlord_roof_construction = None - # landlord_sap = None - # landlord_heating_system = "Heating" - # landlord_existing_pv = None - # outcomes_filename = "ACIS Group - 25.11.2024 - outcomes.xlsx" - # outcomes_sheetname = "Feedback" - # outcomes_postcode = "Postcode" - # outcomes_address = "Address" - # outcomes_houseno = "No" - # outcomes_id = None - # master_filepaths = [ - # os.path.join(data_folder, "ECO 3 -Table 1.csv"), - # os.path.join(data_folder, "ECO 4 -Table 1.csv"), - # ] - # master_to_asset_list_filepath = None - # phase = False - # ecosurv_landlords = None - - # For plus dane - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane" - data_filename = "PLUS DANE Asset List - for analysis.xlsx" - sheet_name = "Asset List" - address1_column = " Address" - postcode_column = " Postcode" - landlord_property_id = "UPRN" - fulladdress_column = " Address" - address_cols_to_concat = [] - missing_postcodes_method = None - address1_method = None - landlord_year_built = "Property Age" - landlord_os_uprn = None - landlord_property_type = "Property Type" - landlord_wall_construction = "Landlord Wall Full" - landlord_heating_system = "Landlord Heating" - landlord_existing_pv = None - outcomes_filename = "plus dane outcomes.xlsx" - outcomes_sheetname = "EVERYTHING" - outcomes_postcode = "Post Code" - outcomes_houseno = "Numb." - master_filepaths = [ - os.path.join(data_folder, "JJC Rolling Master.csv"), - os.path.join(data_folder, "SCIS Rolling Master.csv"), - ] - master_to_asset_list_filepath = os.path.join(data_folder, "surveys_to_assets.csv") - # Maps addresses to uprn in problematic cases manual_uprn_map = {} diff --git a/asset_list/mappings/built_form.py b/asset_list/mappings/built_form.py index ffd698b3..116c3203 100644 --- a/asset_list/mappings/built_form.py +++ b/asset_list/mappings/built_form.py @@ -5,7 +5,7 @@ STANDARD_BUILT_FORMS = { # Houses "end-terrace", "semi-detached", "detached", "mid-terrace", # Flats - "ground floor", "mid-floor", "top-floor", "basement" + "ground floor", "mid-floor", "top-floor", "basement", "low rise", "high rise", } BUILT_FORM_MAPPINGS = { @@ -265,5 +265,70 @@ BUILT_FORM_MAPPINGS = { 'FIRST FLOOR FLAT': 'ground floor', 'GROUND FL MAISONETTE': 'ground floor', 'HOUSE 2 LIVING ROOMS': 'unknown', - 'FLAT OVER SHOP': 'unknown' + 'FLAT OVER SHOP': 'unknown', + + '4 Bed Detached House': 'detached', + '2 Bed Detached House': 'detached', + '3 Bed Detached Bungalow': 'detached', + '1 Bed Semi-Detached House': 'semi-detached', + '2 Bed Semi-Detached House': 'semi-detached', + '2 Bed Detached Bungalow': 'detached', + '1 Bed Mid Terrace Bungalow': 'mid-terrace', + '4 Bed Semi-Detached Bungalow': 'semi-detached', + '3 Bed Mid Terrace Bungalow': 'mid-terrace', + '3 Bed Semi-Detached Bungalow': 'semi-detached', + '3 Bed Mid Terrace House': 'mid-terrace', + '2 Bed Mid Terrace House': 'mid-terrace', + '3 Bed Detached House': 'detached', + '2 Bed Semi-Detached Bungalow': 'semi-detached', + '5 Bed Mid Terrace House': 'mid-terrace', + '2 Bed Mid Terrace Bungalow': 'mid-terrace', + '3 Bed Semi-Detached House': 'semi-detached', + '1 Bed Semi-Detached Bungalow': 'semi-detached', + '4 Bed Mid Terrace House': 'mid-terrace', + '1 Bed Detached Bungalow': 'detached', + '5 Bed Semi-Detached House': 'semi-detached', + '6 Bed Detached House': 'detached', + '1 Bed Mid Terrace House': 'mid-terrace', + '4 Bed Semi-Detached House': 'semi-detached', + 'TBA': 'unknown', + '1 Bed EOT House': 'end-terrace', + '3 Bed Flat': 'unknown', + '5 Bed EOT House': 'end-terrace', + '1 Bed EOT Bungalow': 'end-terrace', + '2 Bed EOT House': 'end-terrace', + '1 Bed Studio Flat': 'unknown', + '3 Bed Maison': 'unknown', + 'Commercial Letting': 'unknown', + '4 Bed Maison': 'unknown', + '2 Bed Flat': 'unknown', + '3 Bed EOT House': 'end-terrace', + '2 Bed Maison': 'unknown', + '4 Bed EOT House': 'end-terrace', + '1 Bed Flat': 'unknown', + '3 Bed EOT Bungalow': 'end-terrace', + '1 Bed Maison': 'unknown', + '2 Bed EOT Bungalow': 'end-terrace', + + 'Bungalow detached': 'detached', + 'Bungalow semi detached': 'semi-detached', + 'Sheltered bungalow semi detached': 'semi-detached', + 'Bedsit bungalow semi detached': 'semi-detached', + 'Semi detached house': 'semi-detached', + 'Bedsit bungalow terraced': 'mid-terrace', 'Terraced house': 'mid-terrace', + 'Sheltered flat': 'unknown', + 'APD Bungalow': 'unknown', + 'Flat with partition': 'unknown', + 'APD flat': 'unknown', + 'Sheltered warden flat': 'unknown', + 'Sheltered bedsit': 'unknown', + 'Sheltered bungalow terraced': 'mid-terrace', + 'Block': 'unknown', + 'Bungalow terraced': 'mid-terrace', + 'Maisonette flat': 'unknown', + 'Sheltered bedsit disabled': 'unknown', + 'Bedsit Flat': 'unknown', + 'Low Rise': 'low rise', + 'Upper Floor': 'top-floor', + 'High Rise': 'high rise', } diff --git a/asset_list/mappings/heating_systems.py b/asset_list/mappings/heating_systems.py index 463e2cef..92f59f2c 100644 --- a/asset_list/mappings/heating_systems.py +++ b/asset_list/mappings/heating_systems.py @@ -288,5 +288,8 @@ HEATING_MAPPINGS = { 'No Gas Boiler': 'no heating', 'Back Boiler': 'solid fuel', "This cell has an external reference that can't be shown or edited. Editing this cell will remove the external " - "reference.": 'unknown' + "reference.": 'unknown', + 'Communal Heating': 'communal heating', + 'No Data': 'unknown', + 'Boiler System': 'gas condensing boiler', } diff --git a/asset_list/mappings/property_type.py b/asset_list/mappings/property_type.py index d455d312..b705d6ef 100644 --- a/asset_list/mappings/property_type.py +++ b/asset_list/mappings/property_type.py @@ -227,6 +227,30 @@ PROPERTY_MAPPING = { 'FIRST FLOOR FLAT': 'flat', 'GROUND FL MAISONETTE': 'maisonette', 'HOUSE 2 LIVING ROOMS': 'house', - 'FLAT OVER SHOP': 'flat' - + 'FLAT OVER SHOP': 'flat', + 'House With Integral Garage': 'house', + 'Flat Over Parking/Accessway': 'flat', + 'Flat Over Binstore': 'flat', + 'Flat Over Garage': 'flat', + 'House With Independent Garage': 'house', + 'Studio': 'flat', + 'Bedsit bungalow terraced': 'bedsit', + 'Terraced house': 'house', + 'Sheltered flat': 'flat', + 'APD Bungalow': 'bungalow', + 'Flat with partition': 'flat', + 'Bungalow detached': 'bungalow', + 'APD flat': 'flat', + 'Sheltered warden flat': 'flat', + 'Bungalow semi detached': 'bungalow', + 'Sheltered bedsit': 'bedsit', + 'Sheltered bungalow terraced': 'bungalow', + 'Sheltered bungalow semi detached': 'bungalow', + 'Bungalow terraced': 'bungalow', + 'Maisonette flat': 'maisonette', + 'Sheltered bedsit disabled': 'bedsit', + 'Bedsit bungalow semi detached': 'bedsit', + 'Bedsit Flat': 'bedsit', + 'Semi detached house': 'house', + 'Unit': 'unknown' } diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index 96b7c5de..e19a776d 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -172,7 +172,7 @@ class SearchEpc: self.address1 = address1 self.postcode = postcode - self.full_address = full_address + self.full_address = full_address if full_address is not None else self.address1 self.uprn = uprn self.house_number = self.get_house_number(self.address1) self.numeric_house_number = self.extract_numeric_housenumber_part(self.house_number) @@ -265,9 +265,7 @@ class SearchEpc: for retry in range(self.max_retries): try: - response = self.client.domestic.call(method="get", url=url, params=params) - if response: self.data = response return { @@ -368,8 +366,11 @@ class SearchEpc: unique_property_types = {r["property-type"] for r in rows} # We allow for variation in property type across flats/maisonettes - if (len(uprns) == 1) and ((len(unique_property_types) == 1) or unique_property_types == {"Flat", "Maisonette"}): - return rows + # If we know that we have a flat/maisonette, we allow for both property types + if property_type in ["Flat", "Maisonette"]: + if ((len(uprns) == 1) and ((len(unique_property_types) == 1) + ) or unique_property_types == {"Flat", "Maisonette"}): + return rows if property_type is not None: # We can do a filter on the property type @@ -388,11 +389,27 @@ class SearchEpc: # We check if post town is included in the address if any([r["posttown"].lower() in address.lower() for r in rows]): - best_match = process.extractOne( + best_match1 = process.extractOne( address, [", ".join([r["address"], r["posttown"]]) for r in rows], score_cutoff=0 ) - # Get all of the scores - rows_filtered = [r for r in rows if ", ".join([r["address"], r["posttown"]]) == best_match[0]] + best_match2 = process.extractOne( + address, [", ".join([r["address"]]) for r in rows], score_cutoff=0 + ) + # Pick the largest score + if best_match1[1] >= best_match2[1]: + # Get all of the scores + rows_filtered = [r for r in rows if ", ".join([r["address"], r["posttown"]]) == best_match1[0]] + else: + # Get all of the scores + rows_filtered = [r for r in rows if r["address"] == best_match2[0]] + + # If we have multiple, we filter on newest lodgment date + if len(rows_filtered) > 1: + rows_filtered = [ + r for r in rows_filtered + if r["lodgement-datetime"] == max([x["lodgement-datetime"] for x in rows_filtered]) + ] + else: best_match = process.extractOne(address, [r["address"] for r in rows], score_cutoff=0) # Get the UPRN for the best match diff --git a/etl/customers/mhs/flag_pilot.py b/etl/customers/mhs/flag_pilot.py new file mode 100644 index 00000000..f96f965d --- /dev/null +++ b/etl/customers/mhs/flag_pilot.py @@ -0,0 +1,134 @@ +""" +On the standardised asset list, this script will flag the pilot assets. +""" +import pandas as pd +import os +import numpy as np +from tqdm import tqdm + +PILOT_PROJECT_CODE = "MHS-000-PILOT" +MHS_PHASE_1_PROJECT_CODE = "MHS-001" + +asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/MHS HOMES (Full Asset List) - for programme build - " + "Standardised.xlsx", + sheet_name="Standardised Asset List", +) +flat_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/MHS HOMES (Full Asset List) - for programme build - " + "Standardised.xlsx", + sheet_name="Flat Data", +) + +pilot = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/MHS 334 x Pilot reviewed - KB notes end column.xlsx" +) +ciga_checks = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/MHS CIGA Check 03042025_201.xlsx" +) +ciga_checks["row_id"] = ciga_checks.index + +asset_list["project_code"] = None + +asset_list["project_code"] = np.where( + asset_list["landlord_property_id"].isin(pilot["Place Reference"]), + PILOT_PROJECT_CODE, + asset_list["project_code"], +) +# We now flag the next phase of the programme +asset_list["project_code"] = np.where( + (~pd.isnull(asset_list["cavity_reason"]) | ~pd.isnull(asset_list["solar_reason"])) & pd.isnull( + asset_list["project_code"]), + MHS_PHASE_1_PROJECT_CODE, + asset_list["project_code"], +) + +# We now flag the CIGA checks +manual_fixes = { + "123 Columbine Close, Rochester": "2213861230" +} +ciga_lookup = [] +for _, row in tqdm(ciga_checks.iterrows(), total=len(ciga_checks)): + + if manual_fixes.get(row["Matched Address"]): + ll_pid = manual_fixes[row["Matched Address"]] + df = asset_list[ + (asset_list["landlord_property_id"].astype(str) == ll_pid) + ] + ciga_lookup.append( + { + "domna_property_id": df["domna_property_id"].values[0], + "row_id": row["row_id"], + } + ) + continue + + df = asset_list[ + (asset_list["domna_postcode"] == row["Postcode"]) + ] + + df = df[ + (df["domna_address_1"].astype(str) == str(row["Address1"])) + ] + + if df.empty: + df = asset_list[ + (asset_list["domna_postcode"] == row["Matched Postcode"]) + ] + df = df[(df["domna_address_1"].astype(str) == str(row["Address1"]))] + + if df.shape[0] > 1: + df = asset_list[ + (asset_list["domna_full_address"].str.lower().str.replace(",", "").str.contains( + row["Matched Address"].lower().replace(",", ""), na=False)) + ] + if df.empty: + df = asset_list[ + (asset_list["domna_full_address"].str.lower().str.replace(",", "").str.contains( + row["Address2"].lower().replace(",", ""), na=False)) + ] + + df = df[(df["domna_address_1"].astype(str) == str(row["Address1"]))] + + if df.shape[0] != 1: + raise Exception("More than one match found for {row['Address1']} in the asset list") + + ciga_lookup.append( + { + "domna_property_id": df["domna_property_id"].values[0], + "row_id": row["row_id"], + } + ) + +ciga_lookup = pd.DataFrame(ciga_lookup) + +ciga_lookup = ciga_lookup.merge( + ciga_checks[["row_id", "Guarantee"]].rename( + columns={"Guarantee": "ciga_guarantee"} + ), how="left", on="row_id" +) +ciga_lookup["ciga_check_complete"] = True + +asset_list = asset_list.merge( + ciga_lookup[["domna_property_id", "ciga_guarantee"]], + how="left", + on="domna_property_id" +) + +# Check we matched addresses correctly +# match_check = ciga_lookup.merge( +# ciga_checks, how="left", on="row_id" +# ).merge( +# asset_list[["domna_property_id", "domna_full_address"]], how="left", on="domna_property_id" +# ) +# match_check = match_check[["Matched Address", "domna_full_address"]] + +# Save + +filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/12052025 MHS Standardised Asset List - " + "programme.xlsx") +# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data + +with pd.ExcelWriter(filename) as writer: + asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False) + flat_data.to_excel(writer, sheet_name="Flat Data", index=False)