handling block of flats

This commit is contained in:
Khalim Conn-Kowlessar 2025-02-24 18:44:06 +00:00
parent 8fa8307e33
commit c3049732f0
3 changed files with 24 additions and 10 deletions

View file

@ -344,6 +344,7 @@ class AssetList:
# Will be used to store aggregated figures against the various work types
self.work_type_figures = {}
self.flat_data = None
self.duplicated_addresses = None
# We detect the presence of the non-intrusive columns
self.non_intrusives_present = True if "CIGA Check Required" in self.raw_asset_list.columns else False
@ -691,6 +692,12 @@ class AssetList:
f"There are {self.standardised_asset_list[self.DOMNA_PROPERTY_ID].duplicated().sum()} duplicated "
f"addresses - dropping"
)
# Keep a record of duplicates
self.duplicated_addresses = self.standardised_asset_list[
self.standardised_asset_list[self.DOMNA_PROPERTY_ID].duplicated()
][[self.DOMNA_PROPERTY_ID, self.address1_colname, self.postcode_colname]].copy()
self.standardised_asset_list = self.standardised_asset_list[
~self.standardised_asset_list[self.DOMNA_PROPERTY_ID].duplicated()
]

View file

@ -45,6 +45,12 @@ def get_data(
no_epc = []
for _, home in tqdm(df.iterrows(), total=len(df)):
try:
# If we have a block of flats, we cannot retrieve this data
if home[AssetList.STANDARD_PROPERTY_TYPE] == "block of flats":
no_epc.append(home[row_id_name])
continue
postcode = home[postcode_column]
house_number = str(home[address1_column]).strip()
full_address = home[fulladdress_column].strip()
@ -283,16 +289,17 @@ def app():
# We produce the new maps, which can be saved for future useage
new_property_type_map = PROPERTY_MAPPING.copy().update(
asset_list.variable_mappings[asset_list.landlord_property_type]
asset_list.variable_mappings[asset_list.landlord_property_type] if asset_list.landlord_property_type else {}
)
new_wall_map = WALL_CONSTRUCTION_MAPPINGS.copy().update(
asset_list.variable_mappings[asset_list.landlord_wall_construction]
asset_list.variable_mappings[asset_list.landlord_wall_construction] if
asset_list.landlord_wall_construction else {}
)
new_heating_map = HEATING_MAPPINGS.copy().update(
asset_list.variable_mappings[asset_list.landlord_heating_system]
asset_list.variable_mappings[asset_list.landlord_heating_system] if asset_list.landlord_heating_system else {}
)
new_existing_pv_map = EXISTING_PV_MAPPINGS.copy().update(
asset_list.variable_mappings[asset_list.landlord_existing_pv]
asset_list.variable_mappings[asset_list.landlord_existing_pv] if asset_list.landlord_existing_pv else {}
)
asset_list.apply_standardiation()
@ -305,7 +312,7 @@ def app():
skip = None # Used to skip already completed chunks
chunk_size = 5000
filename = "Chunk {i}.csv"
download_folder = os.path.join(DATA_FOLDER, "Chunks")
download_folder = os.path.join(data_folder, "Chunks")
if not os.path.exists(download_folder):
os.makedirs(download_folder)
@ -343,12 +350,12 @@ def app():
# Append the failed data to the main data
# Store the chunk locally as a csv
pd.DataFrame(epc_data_chunk).to_csv(os.path.join(DATA_FOLDER, f"Chunks/Chunk {i}.csv"), index=False)
pd.DataFrame(epc_data_chunk).to_csv(os.path.join(data_folder, f"Chunks/Chunk {i}.csv"), index=False)
# Store the errors and no-data locally
with open(os.path.join(DATA_FOLDER, f"Chunks/Chunk {i} errors.json"), "w") as f:
with open(os.path.join(data_folder, f"Chunks/Chunk {i} errors.json"), "w") as f:
json.dump(errors_chunk, f)
with open(os.path.join(DATA_FOLDER, f"Chunks/Chunk {i} nodata.csv"), "w") as f:
with open(os.path.join(data_folder, f"Chunks/Chunk {i} nodata.csv"), "w") as f:
json.dump(no_epc_chunk, f)
# We read in and concatenate the created created chunks
@ -446,7 +453,7 @@ def app():
asset_list.flat_analysis()
# Store as an excel
filename = os.path.join(DATA_FOLDER, ".".join(DATA_FILENAME.split(".")[:-1])) + " - Standardised.xlsx"
filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx"
# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
with pd.ExcelWriter(filename) as writer:

View file

@ -62,6 +62,6 @@ HEATING_MAPPINGS = {
'HDU': 'district heating',
'OILBLR': 'oil boiler',
'SOLIDFUEL': 'boiler - other fuel',
'STORHTR': 'high heat retention storage heaters',
'STORHTR': 'electric storage heaters',
np.nan: 'unknown',
}