From c4eb72fb92986efab0459bf3f91b7131978044e7 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 8 Mar 2025 17:30:54 +0000 Subject: [PATCH] working on plusdane matching --- asset_list/AssetList.py | 53 ++++++++++++++++- asset_list/app.py | 78 +++++++++++++++++--------- asset_list/mappings/heating_systems.py | 28 ++++++++- asset_list/mappings/property_type.py | 3 +- asset_list/mappings/walls.py | 19 ++++++- 5 files changed, 147 insertions(+), 34 deletions(-) diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index 3007269b..21b2111f 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -397,6 +397,13 @@ class AssetList: # Update the reference to landlord UPRn self.landlord_uprn = self.STANDARD_UPRN + # Handle the case when full address and address 1 are the same + if self.full_address_colname == self.address1_colname: + self.full_address_colname = self.STANDARD_FULL_ADDRESS + self.standardised_asset_list[self.full_address_colname] = ( + self.standardised_asset_list[self.address1_colname].copy() + ) + def _extract_address1(self, asset_list, full_address_col, postcode_col, method="first_two_words"): if method not in self.ADDRESS_1_CLEANING_METHODS: @@ -632,7 +639,8 @@ class AssetList: known_errors = [ "#MULTIVALUE", "This cell has an external reference that can't be shown or edited. Editing this cell will " - "remove the external reference." + "remove the external reference.", + "ND" ] if pd.isnull(date_str) or date_str in known_errors: @@ -642,6 +650,9 @@ class AssetList: match = re.match(r"\d{1,2}-[A-Za-z]{3}-(\d{4})", date_str) if match: return int(match.group(1)) # Extract the year and convert to integer + if "-" in date_str: + # We probably have a range + return int(date_str.split("-")[1].strip()) if isinstance(date_str, datetime): return date_str.year @@ -1853,7 +1864,7 @@ class AssetList: self.outcomes = pd.read_excel(outcomes_filepath, sheet_name=outcomes_sheetname) self.outcomes["row_id"] = self.outcomes.index - logger.info("Matching outcomes to ") + logger.info("Matching outcomes to asset list") # Merge the outcomes onto the asset list - we check we're able to match sufficiently well lookup = [] nomatch = [] @@ -1866,7 +1877,7 @@ class AssetList: ].str.lower().str.replace(",", "").str.replace(" ", " ") == address_clean) ] - if not matched.empty and matched.shape[0] == 1: + if matched.shape[0] == 1: lookup.append( { "row_id": x["row_id"], @@ -1875,6 +1886,42 @@ class AssetList: ) continue + if "UPRN" in x: + matched = self.standardised_asset_list[ + self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID] == x["UPRN"] + ] + + if matched.shape[0] == 1: + lookup.append( + { + "row_id": x["row_id"], + self.DOMNA_PROPERTY_ID: matched[self.DOMNA_PROPERTY_ID].values[0] + } + ) + continue + + matched = self.standardised_asset_list[ + (self.standardised_asset_list[self.STANDARD_POSTCODE] == x["Post Code"]) + ].copy() + if not matched.empty: + matched["houseno"] = matched.apply( + lambda x: SearchEpc.get_house_number(x[self.STANDARD_ADDRESS_1], x[self.STANDARD_POSTCODE]), + axis=1 + ) + matched = matched[ + matched["houseno"].astype(str) == str(x["Numb."]) + ] + if matched.shape[0] == 1: + lookup.append( + { + "row_id": x["row_id"], + self.DOMNA_PROPERTY_ID: matched[self.DOMNA_PROPERTY_ID].values[0] + } + ) + continue + elif not matched.empty: + raise NotImplementedError("Implement me - multiple matches on house number") + nomatch.append(x["row_id"]) self.outcomes_no_match = self.outcomes[self.outcomes["row_id"].isin(nomatch)] diff --git a/asset_list/app.py b/asset_list/app.py index 8e2df56d..fb71a70e 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -125,21 +125,22 @@ def get_data( no_epc.append(home[row_id_name]) continue - if epc_api_only: - epc = { - row_id_name: home[row_id_name], - **searcher.newest_epc.copy() - } - - epc_data.append(epc) - continue - # Look for EPC recommendatons try: property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"]) except: property_recommendations = {"rows": []} + if epc_api_only: + epc = { + row_id_name: home[row_id_name], + **searcher.newest_epc.copy(), + "recommendations": property_recommendations["rows"] + } + + epc_data.append(epc) + continue + # Retrieve data from FindMyEPC try: find_epc_searcher = RetrieveFindMyEpc( @@ -283,25 +284,46 @@ def app(): # landlord_property_id = "Place ref" # For ACIS - programme re-build - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/ACIS/ACIS Full Programme Review March 2025" - data_filename = "ACIS asset list.xlsx" - sheet_name = "Assets" - address1_column = "House No" - postcode_column = "Postcode" + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/ACIS/ACIS Full Programme Review March 2025" + # data_filename = "ACIS asset list.xlsx" + # sheet_name = "Assets" + # address1_column = "House No" + # postcode_column = "Postcode" + # landlord_property_id = "UPRN" + # fulladdress_column = None + # address_cols_to_concat = ["House No", "Street", "Town"] + # missing_postcodes_method = None + # address1_method = None + # landlord_year_built = "YEAR BUILT" + # landlord_os_uprn = None + # landlord_property_type = "Property type" + # landlord_wall_construction = "Wall Constuction" + # landlord_heating_system = "Heating" + # landlord_existing_pv = None + # outcomes_filename = "ACIS Group - 25.11.2024 - outcomes.xlsx" + # master_filename_eco3 = "ECO 3 -Table 1.csv" + # master_filename_eco4 = "ECO 4 -Table 1.csv" + + # For plus dane + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane" + data_filename = "PLUS DANE Asset List - for analysis.xlsx" + sheet_name = "Asset List" + address1_column = " Address" + postcode_column = " Postcode" landlord_property_id = "UPRN" - fulladdress_column = None - address_cols_to_concat = ["House No", "Street", "Town"] + fulladdress_column = " Address" + address_cols_to_concat = [] missing_postcodes_method = None address1_method = None - landlord_year_built = "YEAR BUILT" + landlord_year_built = "Property Age" landlord_os_uprn = None - landlord_property_type = "Property type" - landlord_wall_construction = "Wall Constuction" - landlord_heating_system = "Heating" + landlord_property_type = "Property Type" + landlord_wall_construction = "Landlord Wall Full" + landlord_heating_system = "Landlord Heating" landlord_existing_pv = None - outcomes_filename = "ACIS Group - 25.11.2024 - outcomes.xlsx" - master_filename_eco3 = "ECO 3 -Table 1.csv" - master_filename_eco4 = "ECO 4 -Table 1.csv" + outcomes_filename = "plus dane outcomes.xlsx" + outcomes_sheetname = "EVERYTHING" + master_filepaths = ["JJC Rolling Master.csv", "SCIS Rolling Master.csv"] # Maps addresses to uprn in problematic cases manual_uprn_map = {} @@ -360,19 +382,18 @@ def app(): # We now flag properties that have been treated under existing programmes asset_list.flag_outcomes( outcomes_filepath=os.path.join(data_folder, outcomes_filename), - outcomes_sheetname="Feedback" + outcomes_sheetname=outcomes_sheetname ) asset_list.flag_survey_master( - master_filepaths=[ - os.path.join(data_folder, f) for f in [master_filename_eco3, master_filename_eco4] if f is not None - ], + master_filepaths=master_filepaths ) ### We retrieve the EPC data # We chunk up this data into 5000 rows at a time # Create the chunks directory + epc_api_only = False force_retrieve_data = False skip = None # Used to skip already completed chunks chunk_size = 5000 @@ -400,6 +421,7 @@ def app(): df=chunk, row_id_name=asset_list.DOMNA_PROPERTY_ID, manual_uprn_map=manual_uprn_map, + epc_api_only=epc_api_only ) # We now retrieve any failed properties @@ -408,7 +430,7 @@ def app(): df=chunk_failed, row_id_name=asset_list.DOMNA_PROPERTY_ID, manual_uprn_map=manual_uprn_map, - epc_api_only=False + epc_api_only=epc_api_only ) epc_data_chunk.extend(epc_data_failed) diff --git a/asset_list/mappings/heating_systems.py b/asset_list/mappings/heating_systems.py index 33d3701a..f397391c 100644 --- a/asset_list/mappings/heating_systems.py +++ b/asset_list/mappings/heating_systems.py @@ -16,6 +16,7 @@ STANDARD_HEATING_SYSTEMS = { "unknown", "communal gas boiler", "high heat retention storage heaters", + "room heaters" } HEATING_MAPPINGS = { @@ -69,5 +70,30 @@ HEATING_MAPPINGS = { 'Electric': 'electric storage heaters', 'Solid fuel': 'other', 'No Heat': 'unknown', - 'GSHP': 'ground source heat pump' + 'GSHP': 'ground source heat pump', + + 'Boiler Oil': 'oil boiler', + 'Boiler Electricity': 'electric boiler', + 'Boiler ND': 'unknown', + 'ND Mains gas': 'unknown', + 'Room heaters Mains gas': "room heaters", + 'Heat pump (air) Electricity': 'air source heat pump', + 'Room heaters Electricity': 'electric radiators', + 'Room heaters Oil': 'room heaters', + 'No heating system ND': 'unknown', + 'Heat pump (wet) Electricity': 'ground source heat pump', + 'Room heaters Biomass': 'room heaters', + 'ND Solid fuel': 'unknown', + 'Boiler Mains gas': 'gas combi boiler', + 'Boiler LPG': 'boiler - other fuel', + 'Room heaters Solid fuel': 'room heaters', + 'ND ND': 'unknown', + 'Storage heating Electricity': 'electric storage heaters', + 'ND Electricity': 'unknown', + 'Community heating Community (non-gas)': 'district heating', + 'No heating system N/A': 'unknown', + 'Boiler Solid fuel': 'boiler - other fuel', + 'Community heating Community (mains gas)': 'communal gas boiler', + 'Boiler Biomass': 'boiler - other fuel', + 'No heating system Mains gas': 'unknown' } diff --git a/asset_list/mappings/property_type.py b/asset_list/mappings/property_type.py index 1fe1daac..ccee5d3e 100644 --- a/asset_list/mappings/property_type.py +++ b/asset_list/mappings/property_type.py @@ -62,5 +62,6 @@ PROPERTY_MAPPING = { '3 Bed First Floor Maisonette': 'maisonette', '2 Bed 1st Floor Sheltered Flat': 'flat', '1 Bed First Floor Flat': 'flat', - '3 Bed First Floor Flat': 'flat' + '3 Bed First Floor Flat': 'flat', + 'ND': 'unknown' } diff --git a/asset_list/mappings/walls.py b/asset_list/mappings/walls.py index 959701ca..2313f063 100644 --- a/asset_list/mappings/walls.py +++ b/asset_list/mappings/walls.py @@ -100,5 +100,22 @@ WALL_CONSTRUCTION_MAPPINGS = { 'BRICK/BLOCK CAVITY': 'cavity unknown insulation', 'STONE SOLID': 'sandstone or limestone', 'EXT CLADDING SYSTEM': 'system built', - 'BRICK/BLOCK SOLID': 'solid brick unknown insulation' + 'BRICK/BLOCK SOLID': 'solid brick unknown insulation', + + 'Cavity Filled cavity (with internal/external)': 'filled cavity', + 'ND (inferred) Filled cavity': 'filled cavity', + 'Cavity Filled cavity': 'filled cavity', + 'Cavity Unknown insulation': 'cavity unknown insulation', + 'Timber frame As-built': 'timber frame', + 'System build Unknown insulation': 'system built', + 'Cavity As-built': 'unknown', + 'System build External': 'system built', + 'ND (inferred) ND (inferred)': 'unknown', + 'Solid brick External': 'insulated solid brick', + 'Cavity External': 'filled cavity', + 'System build As-built': 'system built', + 'Solid brick Internal': 'insulated solid brick', + 'Cavity Internal': 'filled cavity', + 'System build Internal': 'system built', + 'Solid brick As-built': 'solid brick unknown insulation' }