diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index e40bb98b..009064c6 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -336,6 +336,16 @@ PROPERTY_TYPE_LOOKUP = { 'Cluster': None, 'Scheme Room': None }, + "HA45": { + 'Large block of flats': 'Flat', + 'Small block of flats/dwelling converted in to flats': 'Flat', + 'Semi-detached house': 'House', + 'Mid-terraced house': 'House', + 'End-terraced house': 'House', + 'Block of flats': 'Flat', + 'Detached house': 'House', + 'Flat in mixed use building': 'Flat', + }, "HA48": { "House": "House", "Flat": "Flat", @@ -364,6 +374,30 @@ PROPERTY_TYPE_LOOKUP = { 'Flat?': 'Flat', 'Bungalow ': 'Bungalow' }, + "HA51": { + 'FLAT': 'Flat', + 'HOUSE': 'House', + 'MAISONETTE': 'Maisonette', + 'BEDSIT': None, # Considering as a non-specific residential category here + 'BUNGALOW': 'Bungalow', + }, + "HA52": { + 'House - Mid Terrace': 'House', + 'Flat - First Floor': 'Flat', + 'Flat - Ground Floor': 'Flat', + 'House - Semi-Detached': 'House', + 'House - End Terrace': 'House', + 'Flat - Second Floor': 'Flat', + 'Bedsit': None, # Considering as a non-specific residential category here + 'Bungalow - Semi-Detached': 'Bungalow', + 'Bungalow - Mid Terrace': 'Bungalow', + 'Bungalow - End Terrace': 'Bungalow', + 'House - Detached': 'House', + 'Flat - Third Floor': 'Flat', + 'House attached to flats': 'House', + 'Flat - Fourth Floor': 'Flat', + 'Bungalow - Detached': 'Bungalow' + }, "HA56": { 'House Non Specific': 'House', 'HOUSE TERRACED': 'House', @@ -463,6 +497,10 @@ class DataLoader: "address": "Address", "postcode": "Address - Postcode" }, + "HA5": { + "address": "Address", + "postcode": "matching_postcode" + }, "HA6": { "address": "propertyaddress", "postcode": "address" # The 'address' column actually contains postcode @@ -553,7 +591,9 @@ class DataLoader: def create_asset_list_matching_address(self, ha_name, asset_list): - if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA45", "HA48", "HA49", "HA52", "HA54"]: + if ha_name in [ + "HA1", "HA5", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA45", "HA48", "HA49", "HA52", "HA54" + ]: asset_list["matching_address"] = asset_list[ self.COLUMN_CONFIG[ha_name]["address"] ].astype(str).str.lower().str.strip() @@ -750,6 +790,10 @@ class DataLoader: asset_list["matching_address"] = asset_list["Address1"].astype(str).str.lower().str.strip() + ", " + \ asset_list["POSTCODE"].astype(str).str.lower().str.strip() asset_list["matching_postcode"] = asset_list["POSTCODE"].astype(str).str.lower().str.strip() + elif ha_name == "HA70": + asset_list["matching_address"] = asset_list["Address1"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["POSTCODE"].astype(str).str.lower().str.strip() + asset_list["matching_postcode"] = asset_list["POSTCODE"].astype(str).str.lower().str.strip() elif ha_name == "HA107": # Create matching_address by concatenating House No, Street, Town, District, Postcode asset_list["matching_address"] = asset_list["House No"].astype(str).str.lower().str.strip() + ", " + \ @@ -962,9 +1006,100 @@ class DataLoader: return asset_list + def prepare_ha17(self, workbook): + blocks_sheet = workbook["Blocks List - Cavity Wall only"] + blocks_data = [] + blocks_colnames = [cell.value for cell in blocks_sheet[2]] + for row in blocks_sheet.iter_rows(min_row=4, values_only=False): + row_data = [cell.value for cell in row] # This will get you the cell values + blocks_data.append(row_data) + + blocks_df = pd.DataFrame(blocks_data, columns=blocks_colnames) + + blocks_df["matching_address"] = ( + blocks_df["Block Name\n[as per Naming Convention procedure]"].astype(str).str.lower().str.strip() + ", " + + blocks_df["Block Street Name"].astype(str).str.lower().str.strip() + ", " + + blocks_df["Postcode"].astype(str).str.lower().str.strip() + ) + blocks_df["matching_postcode"] = blocks_df["Postcode"].astype(str).str.lower().str.strip() + blocks_df["property_type"] = "Flat" + + street_properties_sheet = workbook["Street Properties - Cavity Wall"] + street_properties_data = [] + street_properties_colnames = [cell.value for cell in street_properties_sheet[2]] + for row in street_properties_sheet.iter_rows(min_row=3, values_only=False): + row_data = [cell.value for cell in row] # This will get you the cell values + street_properties_data.append(row_data) + + street_properties_df = pd.DataFrame(street_properties_data, columns=street_properties_colnames) + + street_properties_df["matching_address"] = ( + street_properties_df["Block Name\n[as per Naming Convention procedure]"].astype( + str).str.lower().str.strip() + ", " + + street_properties_df["Postcode"].astype(str).str.lower().str.strip() + ) + street_properties_df["matching_postcode"] = street_properties_df["Postcode"].astype(str).str.lower().str.strip() + street_properties_df["property_type"] = street_properties_df[ + "Block typology based on dwelling type\n[defined list]" + ] + + asset_list_compressed = pd.concat( + [ + blocks_df[["matching_address", "matching_postcode", "property_type", "ECO Eligibility"]], + street_properties_df[["matching_address", "matching_postcode", "property_type", "ECO Eligibility"]] + ], + axis=0 + ) + # We expand + range_pattern = r"(\d+)\s+to\s+(\d+)\s+(.*)" + asset_list = [] + for _, row in tqdm(asset_list_compressed.iterrows(), total=len(asset_list_compressed)): + if row["ECO Eligibility"] == "Not Eligible": + asset_list.append(row.to_dict()) + continue + + # Detect a house number range + match = re.search(range_pattern, row["matching_address"]) + + if not match: + asset_list.append(row.to_dict()) + continue + + # Extracting the start and end of the range + start_number = int(match.group(1)) + end_number = int(match.group(2)) + rest_of_address = match.group(3) + + # Generating the list of house numbers + house_numbers = list(range(start_number, end_number + 1)) + data_to_extend = [] + for house_number in house_numbers: + new_adress = f"{house_number} {rest_of_address}" + + entry = row.to_dict().copy() + entry.update({"matching_address": new_adress}) + + data_to_extend.append(entry) + + asset_list.extend(data_to_extend) + + asset_list = pd.DataFrame(asset_list) + + # Add in asset_list_row_id + asset_list["asset_list_row_id"] = ["HA17" + str(i) for i in range(0, len(asset_list))] + + # Add on house number + asset_list = self.create_asset_list_house_no(ha_name="HA17", asset_list=asset_list) + + return asset_list + def load_asset_list(self, filepath, ha_name): workbook = openpyxl.load_workbook(filepath) - asset_sheetname = self.get_asset_sheetname(workbook) + if ha_name == "HA17": + asset_list = self.prepare_ha17(workbook) + return asset_list, pd.DataFrame(), pd.DataFrame(), pd.DataFrame() + else: + asset_sheetname = self.get_asset_sheetname(workbook) asset_sheet = workbook[asset_sheetname] asset_sheet_colnames = [cell.value for cell in asset_sheet[1]] @@ -977,6 +1112,9 @@ class DataLoader: if ha_name == "HA54": asset_sheet_colnames[10] = "matching_postcode" + if ha_name == "HA5": + asset_sheet_colnames[2] = "matching_postcode" + rows_data = [] for row in asset_sheet.iter_rows(min_row=2, values_only=False): @@ -2555,6 +2693,10 @@ class DataLoader: return survey_list + @staticmethod + def correct_ha5_survey_list(survey_list): + return survey_list + @staticmethod def levenstein_match(matching_string, df): match_to = df["matching_address"].tolist() @@ -3431,6 +3573,9 @@ class DataLoader: def get_property_type_and_built_form(property_meta, ha_name): + if ha_name in ["HA44"]: + return None, None + if ha_name == "HA1": property_type = property_meta["Asset Type"] # We correct a small error @@ -3499,6 +3644,8 @@ def get_property_type_and_built_form(property_meta, ha_name): config = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["Type"]] property_type = config.get("property-type") built_form = config.get("built-form") + elif ha_name == "HA17": + return property_meta["property_type"], None elif ha_name == "HA18": property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset Type"].strip()) built_form = None @@ -3580,6 +3727,9 @@ def get_property_type_and_built_form(property_meta, ha_name): elif ha_name == "HA42": property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Dwelling use/type"].strip()) built_form = None + elif ha_name == "HA45": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property type"].strip()) + built_form = None elif ha_name == "HA48": property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip()) built_form = None @@ -3589,6 +3739,14 @@ def get_property_type_and_built_form(property_meta, ha_name): elif ha_name == "HA50": property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip()) built_form = None + elif ha_name == "HA51": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset Type"].strip()) + built_form = None + elif ha_name == "HA52": + if property_meta["Property Type"] is None: + return None, None + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip()) + built_form = None elif ha_name == "HA54": property_type = property_meta["Property Type"] built_form = None @@ -5806,9 +5964,9 @@ def fml_data_pull(loader): "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13", "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12", "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49", - 'HA8', 'HA11', 'HA21', 'HA37', 'HA42', + 'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52', # NEW - add property type - 'HA44', 'HA45', 'HA51', 'HA52' + "HA17" ] # Can't pull from EPC database because it's based in Scotland @@ -5905,7 +6063,7 @@ def fml_analysis(loader): "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13", "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12", "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49", - 'HA8', 'HA11', 'HA21', 'HA37', 'HA42', + 'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52' ] no_ciga_cavity_descriptions = [ @@ -6320,11 +6478,11 @@ def app(): # Add in: priority_has = [ - "HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", "HA19", "HA24", "HA25", - "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54", "HA56", - "HA63", "HA107", "HA117", "HA8", "HA11", "HA21", "HA37", "HA42", + "HA1", "HA2", "HA5", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", "HA19", "HA24", + "HA25", "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54", + "HA56", "HA63", "HA107", "HA117", "HA8", "HA11", "HA21", "HA37", "HA42", # Added as of March 18th - "HA44", "HA45", "HA51", "HA52", + "HA44", "HA45", "HA51", "HA52", "HA17", # New HAS "HAXX", "HAXXX", ] @@ -6332,7 +6490,10 @@ def app(): # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE], # 35 [DONE], 56 [DONE], 19 [DONE], 18 [DONE], 9 [DONE], 27 [DONE], 34 [DONE], 30 [DONE], 31 [DONE], 54 [DONE] # - # Consider for ECO4: HA 70 - have to merge ECO3 list though, HA17 has LOTs of assets, but the asset list is a mess + # Consider for ECO4: + # HA 70 - have to merge ECO3 list though, + # HA17 has LOTs of assets, but the asset list is a mess + # HA53 but has EPCs done # Consider for GBIS: # Ignore for now: # 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in