From c3049732f0d680a38aa9acacd3f15ff9e16d80f0 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 24 Feb 2025 18:44:06 +0000
Subject: [PATCH] handling block of flats

---
 asset_list/AssetList.py                |  7 +++++++
 asset_list/app.py                      | 25 ++++++++++++++++---------
 asset_list/mappings/heating_systems.py |  2 +-
 3 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py
index 06ec5907..72086c60 100644
--- a/asset_list/AssetList.py
+++ b/asset_list/AssetList.py
@@ -344,6 +344,7 @@ class AssetList:
         # Will be used to store aggregated figures against the various work types
         self.work_type_figures = {}
         self.flat_data = None
+        self.duplicated_addresses = None
 
         # We detect the presence of the non-intrusive columns
         self.non_intrusives_present = True if "CIGA Check Required" in self.raw_asset_list.columns else False
@@ -691,6 +692,12 @@ class AssetList:
                 f"There are {self.standardised_asset_list[self.DOMNA_PROPERTY_ID].duplicated().sum()} duplicated "
                 f"addresses - dropping"
             )
+
+            # Keep a record of duplicates
+            self.duplicated_addresses = self.standardised_asset_list[
+                self.standardised_asset_list[self.DOMNA_PROPERTY_ID].duplicated()
+            ][[self.DOMNA_PROPERTY_ID, self.address1_colname, self.postcode_colname]].copy()
+
             self.standardised_asset_list = self.standardised_asset_list[
                 ~self.standardised_asset_list[self.DOMNA_PROPERTY_ID].duplicated()
             ]
diff --git a/asset_list/app.py b/asset_list/app.py
index 1cb7808e..a24c4043 100644
--- a/asset_list/app.py
+++ b/asset_list/app.py
@@ -45,6 +45,12 @@ def get_data(
     no_epc = []
     for _, home in tqdm(df.iterrows(), total=len(df)):
         try:
+
+            # If we have a block of flats, we cannot retrieve this data
+            if home[AssetList.STANDARD_PROPERTY_TYPE] == "block of flats":
+                no_epc.append(home[row_id_name])
+                continue
+
             postcode = home[postcode_column]
             house_number = str(home[address1_column]).strip()
             full_address = home[fulladdress_column].strip()
@@ -283,16 +289,17 @@ def app():
     # We produce the new maps, which can be saved for future useage
 
     new_property_type_map = PROPERTY_MAPPING.copy().update(
-        asset_list.variable_mappings[asset_list.landlord_property_type]
+        asset_list.variable_mappings[asset_list.landlord_property_type] if asset_list.landlord_property_type else {}
     )
     new_wall_map = WALL_CONSTRUCTION_MAPPINGS.copy().update(
-        asset_list.variable_mappings[asset_list.landlord_wall_construction]
+        asset_list.variable_mappings[asset_list.landlord_wall_construction] if
+        asset_list.landlord_wall_construction else {}
     )
     new_heating_map = HEATING_MAPPINGS.copy().update(
-        asset_list.variable_mappings[asset_list.landlord_heating_system]
+        asset_list.variable_mappings[asset_list.landlord_heating_system] if asset_list.landlord_heating_system else {}
     )
     new_existing_pv_map = EXISTING_PV_MAPPINGS.copy().update(
-        asset_list.variable_mappings[asset_list.landlord_existing_pv]
+        asset_list.variable_mappings[asset_list.landlord_existing_pv] if asset_list.landlord_existing_pv else {}
     )
 
     asset_list.apply_standardiation()
@@ -305,7 +312,7 @@ def app():
     skip = None  # Used to skip already completed chunks
     chunk_size = 5000
     filename = "Chunk {i}.csv"
-    download_folder = os.path.join(DATA_FOLDER, "Chunks")
+    download_folder = os.path.join(data_folder, "Chunks")
     if not os.path.exists(download_folder):
         os.makedirs(download_folder)
 
@@ -343,12 +350,12 @@ def app():
 
         # Append the failed data to the main data
         # Store the chunk locally as a csv
-        pd.DataFrame(epc_data_chunk).to_csv(os.path.join(DATA_FOLDER, f"Chunks/Chunk {i}.csv"), index=False)
+        pd.DataFrame(epc_data_chunk).to_csv(os.path.join(data_folder, f"Chunks/Chunk {i}.csv"), index=False)
         # Store the errors and no-data locally
-        with open(os.path.join(DATA_FOLDER, f"Chunks/Chunk {i} errors.json"), "w") as f:
+        with open(os.path.join(data_folder, f"Chunks/Chunk {i} errors.json"), "w") as f:
             json.dump(errors_chunk, f)
 
-        with open(os.path.join(DATA_FOLDER, f"Chunks/Chunk {i} nodata.csv"), "w") as f:
+        with open(os.path.join(data_folder, f"Chunks/Chunk {i} nodata.csv"), "w") as f:
             json.dump(no_epc_chunk, f)
 
     # We read in and concatenate the created created chunks
@@ -446,7 +453,7 @@ def app():
     asset_list.flat_analysis()
 
     # Store as an excel
-    filename = os.path.join(DATA_FOLDER, ".".join(DATA_FILENAME.split(".")[:-1])) + " - Standardised.xlsx"
+    filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx"
     # Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
 
     with pd.ExcelWriter(filename) as writer:
diff --git a/asset_list/mappings/heating_systems.py b/asset_list/mappings/heating_systems.py
index b58f13f2..4879efcc 100644
--- a/asset_list/mappings/heating_systems.py
+++ b/asset_list/mappings/heating_systems.py
@@ -62,6 +62,6 @@ HEATING_MAPPINGS = {
     'HDU': 'district heating',
     'OILBLR': 'oil boiler',
     'SOLIDFUEL': 'boiler - other fuel',
-    'STORHTR': 'high heat retention storage heaters',
+    'STORHTR': 'electric storage heaters',
     np.nan: 'unknown',
 }