working on matching code for HA6 asset and survey lists

This commit is contained in:
Khalim Conn-Kowlessar 2024-01-22 11:41:52 +00:00
parent f1670498d1
commit cf9253d062

View file

@ -52,7 +52,7 @@ class DataLoader:
rows_data = []
rows_colors = []
for row in sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
for row in tqdm(sheet.iter_rows(min_row=2, values_only=False)): # Assuming the first row is headers
row_data = [cell.value for cell in row] # This will get you the cell values
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
# row_color = COLOR_INDEX[row_color]
@ -137,7 +137,7 @@ class DataLoader:
)
# Add in asset_list_row_id
survey_list["survey_list_row_id"] = [ha_name + str(i) for i in range(0, len(survey_list))]
survey_list["survey_list_row_id"] = [ha_name + "_surveys_" + str(i) for i in range(0, len(survey_list))]
# We now do the matching between the asset list and the survey list.
# What we'll get from this is a lookup table from the asset list to the survey list
@ -150,14 +150,53 @@ class DataLoader:
return survey_list
def merge_ha_6(self, asset_list, survey_list):
pass
# Prepare the asset list
asset_list["matching_address"] = asset_list["propertyaddress"].str.lower().strip()
asset_list["matching_postcode"] = asset_list["Post Code"].str.lower().strip()
split_addresses = asset_list['matching_address'].str.split(',', expand=True)
split_addresses.columns = ['temp', 'address2', 'address3', 'address4', 'address5']
house_numbers = split_addresses['temp'].str.split(' ', expand=True)
house_numbers.columns = ['HouseNo', 'part1', 'part2', "part3", "part4", "part5"]
asset_list = pd.concat([asset_list, house_numbers[["HouseNo"]]], axis=1)
del split_addresses, house_numbers
matching_lookup = []
for _, row in tqdm(survey_list.iterrows(), total=len(survey_list)):
house_number = row["NO."]
if isinstance(house_number, str):
house_number = house_number.lower().strip()
# Filter on the first line of the address
df = asset_list[
asset_list["matching_address"].str.contains(row["Street / Block Name"].lower().strip())
].copy()
df = df[df["matching_address"].str.contains(str(house_number))]
if df.shape[0] != 1:
df = df[df["HouseNo"] == str(house_number)]
if df.shape[0] != 1:
df = df[df["matching_postcode"].str.lower().str.contains(row["Post Code"].lower())]
if df.shape[0] != 1:
print(row["Street / Block Name"])
print(house_number)
print(row["Post Code"].lower())
raise ValueError("Investigate")
matching_lookup.append(
{
"survey_list_row_id": row["survey_list_row_id"],
"asset_list_row_id": df["asset_list_row_id"].values[0],
}
)
def load(self):
data = {}
for ha_name, file_config in self.files.items():
# Load asset list
# logger.info("LOading asset list for {}".format(ha_name))
logger.info("Loading asset list for {}".format(ha_name))
asset_list = self.load_asset_list(
file_path=file_config["asset_list"]["filepath"],
ha_name=ha_name,
@ -165,6 +204,7 @@ class DataLoader:
)
if file_config.get("survey_list"):
logger.info("Loading survey list for {}".format(ha_name))
survey_list = self.load_survey_list(
file_path=file_config["survey_list"]["filepath"],
ha_name=ha_name,
@ -209,3 +249,4 @@ def app():
}
loader = DataLoader(files)
loader.load()