Adding ha5

This commit is contained in:
Khalim Conn-Kowlessar 2024-03-18 15:40:52 +00:00
parent 92193d773d
commit 443aa585d0

View file

@ -336,6 +336,16 @@ PROPERTY_TYPE_LOOKUP = {
'Cluster': None,
'Scheme Room': None
},
"HA45": {
'Large block of flats': 'Flat',
'Small block of flats/dwelling converted in to flats': 'Flat',
'Semi-detached house': 'House',
'Mid-terraced house': 'House',
'End-terraced house': 'House',
'Block of flats': 'Flat',
'Detached house': 'House',
'Flat in mixed use building': 'Flat',
},
"HA48": {
"House": "House",
"Flat": "Flat",
@ -364,6 +374,30 @@ PROPERTY_TYPE_LOOKUP = {
'Flat?': 'Flat',
'Bungalow ': 'Bungalow'
},
"HA51": {
'FLAT': 'Flat',
'HOUSE': 'House',
'MAISONETTE': 'Maisonette',
'BEDSIT': None, # Considering as a non-specific residential category here
'BUNGALOW': 'Bungalow',
},
"HA52": {
'House - Mid Terrace': 'House',
'Flat - First Floor': 'Flat',
'Flat - Ground Floor': 'Flat',
'House - Semi-Detached': 'House',
'House - End Terrace': 'House',
'Flat - Second Floor': 'Flat',
'Bedsit': None, # Considering as a non-specific residential category here
'Bungalow - Semi-Detached': 'Bungalow',
'Bungalow - Mid Terrace': 'Bungalow',
'Bungalow - End Terrace': 'Bungalow',
'House - Detached': 'House',
'Flat - Third Floor': 'Flat',
'House attached to flats': 'House',
'Flat - Fourth Floor': 'Flat',
'Bungalow - Detached': 'Bungalow'
},
"HA56": {
'House Non Specific': 'House',
'HOUSE TERRACED': 'House',
@ -463,6 +497,10 @@ class DataLoader:
"address": "Address",
"postcode": "Address - Postcode"
},
"HA5": {
"address": "Address",
"postcode": "matching_postcode"
},
"HA6": {
"address": "propertyaddress",
"postcode": "address" # The 'address' column actually contains postcode
@ -553,7 +591,9 @@ class DataLoader:
def create_asset_list_matching_address(self, ha_name, asset_list):
if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA45", "HA48", "HA49", "HA52", "HA54"]:
if ha_name in [
"HA1", "HA5", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA45", "HA48", "HA49", "HA52", "HA54"
]:
asset_list["matching_address"] = asset_list[
self.COLUMN_CONFIG[ha_name]["address"]
].astype(str).str.lower().str.strip()
@ -750,6 +790,10 @@ class DataLoader:
asset_list["matching_address"] = asset_list["Address1"].astype(str).str.lower().str.strip() + ", " + \
asset_list["POSTCODE"].astype(str).str.lower().str.strip()
asset_list["matching_postcode"] = asset_list["POSTCODE"].astype(str).str.lower().str.strip()
elif ha_name == "HA70":
asset_list["matching_address"] = asset_list["Address1"].astype(str).str.lower().str.strip() + ", " + \
asset_list["POSTCODE"].astype(str).str.lower().str.strip()
asset_list["matching_postcode"] = asset_list["POSTCODE"].astype(str).str.lower().str.strip()
elif ha_name == "HA107":
# Create matching_address by concatenating House No, Street, Town, District, Postcode
asset_list["matching_address"] = asset_list["House No"].astype(str).str.lower().str.strip() + ", " + \
@ -962,9 +1006,100 @@ class DataLoader:
return asset_list
def prepare_ha17(self, workbook):
blocks_sheet = workbook["Blocks List - Cavity Wall only"]
blocks_data = []
blocks_colnames = [cell.value for cell in blocks_sheet[2]]
for row in blocks_sheet.iter_rows(min_row=4, values_only=False):
row_data = [cell.value for cell in row] # This will get you the cell values
blocks_data.append(row_data)
blocks_df = pd.DataFrame(blocks_data, columns=blocks_colnames)
blocks_df["matching_address"] = (
blocks_df["Block Name\n[as per Naming Convention procedure]"].astype(str).str.lower().str.strip() + ", " +
blocks_df["Block Street Name"].astype(str).str.lower().str.strip() + ", " +
blocks_df["Postcode"].astype(str).str.lower().str.strip()
)
blocks_df["matching_postcode"] = blocks_df["Postcode"].astype(str).str.lower().str.strip()
blocks_df["property_type"] = "Flat"
street_properties_sheet = workbook["Street Properties - Cavity Wall"]
street_properties_data = []
street_properties_colnames = [cell.value for cell in street_properties_sheet[2]]
for row in street_properties_sheet.iter_rows(min_row=3, values_only=False):
row_data = [cell.value for cell in row] # This will get you the cell values
street_properties_data.append(row_data)
street_properties_df = pd.DataFrame(street_properties_data, columns=street_properties_colnames)
street_properties_df["matching_address"] = (
street_properties_df["Block Name\n[as per Naming Convention procedure]"].astype(
str).str.lower().str.strip() + ", " +
street_properties_df["Postcode"].astype(str).str.lower().str.strip()
)
street_properties_df["matching_postcode"] = street_properties_df["Postcode"].astype(str).str.lower().str.strip()
street_properties_df["property_type"] = street_properties_df[
"Block typology based on dwelling type\n[defined list]"
]
asset_list_compressed = pd.concat(
[
blocks_df[["matching_address", "matching_postcode", "property_type", "ECO Eligibility"]],
street_properties_df[["matching_address", "matching_postcode", "property_type", "ECO Eligibility"]]
],
axis=0
)
# We expand
range_pattern = r"(\d+)\s+to\s+(\d+)\s+(.*)"
asset_list = []
for _, row in tqdm(asset_list_compressed.iterrows(), total=len(asset_list_compressed)):
if row["ECO Eligibility"] == "Not Eligible":
asset_list.append(row.to_dict())
continue
# Detect a house number range
match = re.search(range_pattern, row["matching_address"])
if not match:
asset_list.append(row.to_dict())
continue
# Extracting the start and end of the range
start_number = int(match.group(1))
end_number = int(match.group(2))
rest_of_address = match.group(3)
# Generating the list of house numbers
house_numbers = list(range(start_number, end_number + 1))
data_to_extend = []
for house_number in house_numbers:
new_adress = f"{house_number} {rest_of_address}"
entry = row.to_dict().copy()
entry.update({"matching_address": new_adress})
data_to_extend.append(entry)
asset_list.extend(data_to_extend)
asset_list = pd.DataFrame(asset_list)
# Add in asset_list_row_id
asset_list["asset_list_row_id"] = ["HA17" + str(i) for i in range(0, len(asset_list))]
# Add on house number
asset_list = self.create_asset_list_house_no(ha_name="HA17", asset_list=asset_list)
return asset_list
def load_asset_list(self, filepath, ha_name):
workbook = openpyxl.load_workbook(filepath)
asset_sheetname = self.get_asset_sheetname(workbook)
if ha_name == "HA17":
asset_list = self.prepare_ha17(workbook)
return asset_list, pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
else:
asset_sheetname = self.get_asset_sheetname(workbook)
asset_sheet = workbook[asset_sheetname]
asset_sheet_colnames = [cell.value for cell in asset_sheet[1]]
@ -977,6 +1112,9 @@ class DataLoader:
if ha_name == "HA54":
asset_sheet_colnames[10] = "matching_postcode"
if ha_name == "HA5":
asset_sheet_colnames[2] = "matching_postcode"
rows_data = []
for row in asset_sheet.iter_rows(min_row=2, values_only=False):
@ -2555,6 +2693,10 @@ class DataLoader:
return survey_list
@staticmethod
def correct_ha5_survey_list(survey_list):
return survey_list
@staticmethod
def levenstein_match(matching_string, df):
match_to = df["matching_address"].tolist()
@ -3431,6 +3573,9 @@ class DataLoader:
def get_property_type_and_built_form(property_meta, ha_name):
if ha_name in ["HA44"]:
return None, None
if ha_name == "HA1":
property_type = property_meta["Asset Type"]
# We correct a small error
@ -3499,6 +3644,8 @@ def get_property_type_and_built_form(property_meta, ha_name):
config = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["Type"]]
property_type = config.get("property-type")
built_form = config.get("built-form")
elif ha_name == "HA17":
return property_meta["property_type"], None
elif ha_name == "HA18":
property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset Type"].strip())
built_form = None
@ -3580,6 +3727,9 @@ def get_property_type_and_built_form(property_meta, ha_name):
elif ha_name == "HA42":
property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Dwelling use/type"].strip())
built_form = None
elif ha_name == "HA45":
property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property type"].strip())
built_form = None
elif ha_name == "HA48":
property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
built_form = None
@ -3589,6 +3739,14 @@ def get_property_type_and_built_form(property_meta, ha_name):
elif ha_name == "HA50":
property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
built_form = None
elif ha_name == "HA51":
property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset Type"].strip())
built_form = None
elif ha_name == "HA52":
if property_meta["Property Type"] is None:
return None, None
property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
built_form = None
elif ha_name == "HA54":
property_type = property_meta["Property Type"]
built_form = None
@ -5806,9 +5964,9 @@ def fml_data_pull(loader):
"HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13",
"HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
"HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49",
'HA8', 'HA11', 'HA21', 'HA37', 'HA42',
'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52',
# NEW - add property type
'HA44', 'HA45', 'HA51', 'HA52'
"HA17"
]
# Can't pull from EPC database because it's based in Scotland
@ -5905,7 +6063,7 @@ def fml_analysis(loader):
"HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13",
"HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
"HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49",
'HA8', 'HA11', 'HA21', 'HA37', 'HA42',
'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52'
]
no_ciga_cavity_descriptions = [
@ -6320,11 +6478,11 @@ def app():
# Add in:
priority_has = [
"HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", "HA19", "HA24", "HA25",
"HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54", "HA56",
"HA63", "HA107", "HA117", "HA8", "HA11", "HA21", "HA37", "HA42",
"HA1", "HA2", "HA5", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", "HA19", "HA24",
"HA25", "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54",
"HA56", "HA63", "HA107", "HA117", "HA8", "HA11", "HA21", "HA37", "HA42",
# Added as of March 18th
"HA44", "HA45", "HA51", "HA52",
"HA44", "HA45", "HA51", "HA52", "HA17",
# New HAS
"HAXX", "HAXXX",
]
@ -6332,7 +6490,10 @@ def app():
# back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE],
# 35 [DONE], 56 [DONE], 19 [DONE], 18 [DONE], 9 [DONE], 27 [DONE], 34 [DONE], 30 [DONE], 31 [DONE], 54 [DONE]
#
# Consider for ECO4: HA 70 - have to merge ECO3 list though, HA17 has LOTs of assets, but the asset list is a mess
# Consider for ECO4:
# HA 70 - have to merge ECO3 list though,
# HA17 has LOTs of assets, but the asset list is a mess
# HA53 but has EPCs done
# Consider for GBIS:
# Ignore for now:
# 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in