From c3049732f0d680a38aa9acacd3f15ff9e16d80f0 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 24 Feb 2025 18:44:06 +0000 Subject: [PATCH] handling block of flats --- asset_list/AssetList.py | 7 +++++++ asset_list/app.py | 25 ++++++++++++++++--------- asset_list/mappings/heating_systems.py | 2 +- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index 06ec5907..72086c60 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -344,6 +344,7 @@ class AssetList: # Will be used to store aggregated figures against the various work types self.work_type_figures = {} self.flat_data = None + self.duplicated_addresses = None # We detect the presence of the non-intrusive columns self.non_intrusives_present = True if "CIGA Check Required" in self.raw_asset_list.columns else False @@ -691,6 +692,12 @@ class AssetList: f"There are {self.standardised_asset_list[self.DOMNA_PROPERTY_ID].duplicated().sum()} duplicated " f"addresses - dropping" ) + + # Keep a record of duplicates + self.duplicated_addresses = self.standardised_asset_list[ + self.standardised_asset_list[self.DOMNA_PROPERTY_ID].duplicated() + ][[self.DOMNA_PROPERTY_ID, self.address1_colname, self.postcode_colname]].copy() + self.standardised_asset_list = self.standardised_asset_list[ ~self.standardised_asset_list[self.DOMNA_PROPERTY_ID].duplicated() ] diff --git a/asset_list/app.py b/asset_list/app.py index 1cb7808e..a24c4043 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -45,6 +45,12 @@ def get_data( no_epc = [] for _, home in tqdm(df.iterrows(), total=len(df)): try: + + # If we have a block of flats, we cannot retrieve this data + if home[AssetList.STANDARD_PROPERTY_TYPE] == "block of flats": + no_epc.append(home[row_id_name]) + continue + postcode = home[postcode_column] house_number = str(home[address1_column]).strip() full_address = home[fulladdress_column].strip() @@ -283,16 +289,17 @@ def app(): # We produce the new maps, which can be saved for future useage new_property_type_map = PROPERTY_MAPPING.copy().update( - asset_list.variable_mappings[asset_list.landlord_property_type] + asset_list.variable_mappings[asset_list.landlord_property_type] if asset_list.landlord_property_type else {} ) new_wall_map = WALL_CONSTRUCTION_MAPPINGS.copy().update( - asset_list.variable_mappings[asset_list.landlord_wall_construction] + asset_list.variable_mappings[asset_list.landlord_wall_construction] if + asset_list.landlord_wall_construction else {} ) new_heating_map = HEATING_MAPPINGS.copy().update( - asset_list.variable_mappings[asset_list.landlord_heating_system] + asset_list.variable_mappings[asset_list.landlord_heating_system] if asset_list.landlord_heating_system else {} ) new_existing_pv_map = EXISTING_PV_MAPPINGS.copy().update( - asset_list.variable_mappings[asset_list.landlord_existing_pv] + asset_list.variable_mappings[asset_list.landlord_existing_pv] if asset_list.landlord_existing_pv else {} ) asset_list.apply_standardiation() @@ -305,7 +312,7 @@ def app(): skip = None # Used to skip already completed chunks chunk_size = 5000 filename = "Chunk {i}.csv" - download_folder = os.path.join(DATA_FOLDER, "Chunks") + download_folder = os.path.join(data_folder, "Chunks") if not os.path.exists(download_folder): os.makedirs(download_folder) @@ -343,12 +350,12 @@ def app(): # Append the failed data to the main data # Store the chunk locally as a csv - pd.DataFrame(epc_data_chunk).to_csv(os.path.join(DATA_FOLDER, f"Chunks/Chunk {i}.csv"), index=False) + pd.DataFrame(epc_data_chunk).to_csv(os.path.join(data_folder, f"Chunks/Chunk {i}.csv"), index=False) # Store the errors and no-data locally - with open(os.path.join(DATA_FOLDER, f"Chunks/Chunk {i} errors.json"), "w") as f: + with open(os.path.join(data_folder, f"Chunks/Chunk {i} errors.json"), "w") as f: json.dump(errors_chunk, f) - with open(os.path.join(DATA_FOLDER, f"Chunks/Chunk {i} nodata.csv"), "w") as f: + with open(os.path.join(data_folder, f"Chunks/Chunk {i} nodata.csv"), "w") as f: json.dump(no_epc_chunk, f) # We read in and concatenate the created created chunks @@ -446,7 +453,7 @@ def app(): asset_list.flat_analysis() # Store as an excel - filename = os.path.join(DATA_FOLDER, ".".join(DATA_FILENAME.split(".")[:-1])) + " - Standardised.xlsx" + filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx" # Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data with pd.ExcelWriter(filename) as writer: diff --git a/asset_list/mappings/heating_systems.py b/asset_list/mappings/heating_systems.py index b58f13f2..4879efcc 100644 --- a/asset_list/mappings/heating_systems.py +++ b/asset_list/mappings/heating_systems.py @@ -62,6 +62,6 @@ HEATING_MAPPINGS = { 'HDU': 'district heating', 'OILBLR': 'oil boiler', 'SOLIDFUEL': 'boiler - other fuel', - 'STORHTR': 'high heat retention storage heaters', + 'STORHTR': 'electric storage heaters', np.nan: 'unknown', }