mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
29% through matching eco3 ha25
This commit is contained in:
parent
067a66c1b1
commit
5c3f6320dd
1 changed files with 117 additions and 19 deletions
|
|
@ -183,7 +183,7 @@ class DataLoader:
|
|||
|
||||
def create_asset_list_matching_address(self, ha_name, asset_list):
|
||||
|
||||
if ha_name in ["HA1", "HA6", "HA16", "HA24", "HA25"]:
|
||||
if ha_name in ["HA1", "HA6", "HA16", "HA24"]:
|
||||
asset_list["matching_address"] = asset_list[
|
||||
self.COLUMN_CONFIG[ha_name]["address"]
|
||||
].astype(str).str.lower().str.strip()
|
||||
|
|
@ -214,6 +214,14 @@ class DataLoader:
|
|||
asset_list["Postcode"].astype(str).str.lower().str.strip()
|
||||
)
|
||||
asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
|
||||
elif ha_name == "HA25":
|
||||
asset_list["matching_address"] = asset_list[
|
||||
self.COLUMN_CONFIG[ha_name]["address"]
|
||||
].astype(str).str.lower().str.strip()
|
||||
|
||||
asset_list["matching_postcode"] = asset_list['matching_address'].apply(
|
||||
lambda x: ' '.join(x.split()[-2:]) if pd.notnull(x) else x
|
||||
)
|
||||
elif ha_name == "HA28":
|
||||
asset_list["matching_address"] = (
|
||||
asset_list["House Number"].astype(str).str.lower().str.strip() + ", " +
|
||||
|
|
@ -352,6 +360,9 @@ class DataLoader:
|
|||
house_numbers = house_numbers.iloc[:, 0:1]
|
||||
house_numbers.columns = ['HouseNo']
|
||||
|
||||
# Remove trailing punctuation such as , or ;
|
||||
house_numbers["HouseNo"] = house_numbers["HouseNo"].str.rstrip(',;')
|
||||
|
||||
asset_list = pd.concat([asset_list, house_numbers[["HouseNo"]]], axis=1)
|
||||
|
||||
return asset_list
|
||||
|
|
@ -425,27 +436,16 @@ class DataLoader:
|
|||
workbook = openpyxl.load_workbook(filepath)
|
||||
asset_sheetname = self.get_asset_sheetname(workbook)
|
||||
|
||||
# TODO: TEMP
|
||||
sheetnames_lower = [x.lower() for x in workbook.sheetnames]
|
||||
if any("eco3" in x for x in sheetnames_lower):
|
||||
raise Exception("REMOVE ME")
|
||||
|
||||
asset_sheet = workbook[asset_sheetname]
|
||||
asset_sheet_colnames = [cell.value for cell in asset_sheet[1]]
|
||||
if ha_name == "HA25":
|
||||
asset_sheet_colnames[11] = "matching_postcode"
|
||||
|
||||
values_only = not ha_name != "HA25"
|
||||
|
||||
rows_data = []
|
||||
if not values_only:
|
||||
for row in asset_sheet.iter_rows(min_row=2, values_only=values_only):
|
||||
row_data = [cell.value for cell in row] # This will get you the cell values
|
||||
rows_data.append(row_data)
|
||||
else:
|
||||
for row in asset_sheet.iter_rows(min_row=2, values_only=values_only): # use values_only=True to get values
|
||||
row_data = list(row) # No need for comprehension, values_only=True returns a tuple of values
|
||||
rows_data.append(row_data)
|
||||
|
||||
for row in asset_sheet.iter_rows(min_row=2, values_only=False):
|
||||
row_data = [cell.value for cell in row] # This will get you the cell values
|
||||
rows_data.append(row_data)
|
||||
|
||||
asset_list = pd.DataFrame(rows_data, columns=asset_sheet_colnames)
|
||||
|
||||
|
|
@ -477,6 +477,29 @@ class DataLoader:
|
|||
if ha_name in ["HA1", "HA25"]:
|
||||
return asset_list, pd.DataFrame(), pd.DataFrame()
|
||||
|
||||
# If we have ECO3 surveys, we need to match them, because any properties treated under ECO3 won't be
|
||||
# suitable under ECO4, since their walls will be filled
|
||||
eco3_list = pd.DataFrame()
|
||||
sheetnames_lower = [x.lower() for x in workbook.sheetnames]
|
||||
eco3_sheetname_index = [i for i, x in enumerate(sheetnames_lower) if "eco3" in x.replace(" ", "")]
|
||||
if eco3_sheetname_index:
|
||||
eco3_sheetname = workbook.sheetnames[eco3_sheetname_index[0]]
|
||||
eco3_sheet = workbook[eco3_sheetname]
|
||||
eco3_rows = []
|
||||
for row in eco3_sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
|
||||
row_data = [cell.value for cell in row] # This will get you the cell values
|
||||
eco3_rows.append(row_data)
|
||||
|
||||
eco3_list = pd.DataFrame(eco3_rows, columns=[cell.value for cell in eco3_sheet[1]])
|
||||
# Remove columns that are None
|
||||
eco3_list = eco3_list.loc[:, eco3_list.columns.notnull()]
|
||||
# Remove rows that are completely empty
|
||||
eco3_list = eco3_list.loc[eco3_list.loc[:, eco3_list.columns].notnull().any(axis=1)]
|
||||
eco3_list["eco3_list_row_id"] = [ha_name + "_Eco3_" + str(i) for i in range(0, len(eco3_list))]
|
||||
|
||||
# Perform the eco3 merge
|
||||
eco3_list = self.merge_eco3_to_assets(asset_list, eco3_list, ha_name)
|
||||
|
||||
# We check if there is a survey list
|
||||
survey_sheetname = self.get_survey_sheetname(workbook)
|
||||
survey_sheet = workbook[survey_sheetname]
|
||||
|
|
@ -518,7 +541,7 @@ class DataLoader:
|
|||
ciga_list = self.dedupe_ciga_list(ciga_list)
|
||||
ciga_list = self.merge_ciga_to_assets(asset_list, ciga_list, ha_name)
|
||||
|
||||
return asset_list, survey_list, ciga_list
|
||||
return asset_list, survey_list, ciga_list, eco3_list
|
||||
|
||||
@staticmethod
|
||||
def correct_ha6_asset_list(asset_list):
|
||||
|
|
@ -1433,6 +1456,79 @@ class DataLoader:
|
|||
|
||||
return survey_list
|
||||
|
||||
def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name):
|
||||
|
||||
# We add on a matching postcode without spaces for this
|
||||
# asset_list["matching_postcode_no_space"] = asset_list["matching_postcode"].str.lower().str.replace(" ", "")
|
||||
|
||||
# May need an eco3 list correction function
|
||||
|
||||
# NEADS DRIVE, postcode with bs305dt, is not found in the asset list
|
||||
eco3_list = eco3_list[
|
||||
~(eco3_list["Post Code"] == "BS305DT")
|
||||
]
|
||||
# Drop rows with missings postcode
|
||||
eco3_list = eco3_list[
|
||||
~pd.isnull(eco3_list["Post Code"])
|
||||
]
|
||||
|
||||
missed_postcodes = []
|
||||
if ha_name == "HA25":
|
||||
missed_postcodes = {
|
||||
postcode.lower() for postcode in eco3_list["Post Code"] if
|
||||
postcode.lower() not in asset_list["matching_postcode"].values
|
||||
}
|
||||
eco3_list = eco3_list[~eco3_list["Post Code"].str.lower().isin(missed_postcodes)]
|
||||
|
||||
matching_lookup = []
|
||||
missed = []
|
||||
for _, row in tqdm(eco3_list.iterrows(), total=len(eco3_list)):
|
||||
|
||||
postcode = row["Post Code"].lower().strip()
|
||||
|
||||
# df will never be empty, since we've already done a check for common postcodes
|
||||
df = asset_list[
|
||||
asset_list["matching_postcode"].str.contains(postcode)
|
||||
]
|
||||
|
||||
house_number = row["NO "]
|
||||
if isinstance(house_number, str):
|
||||
house_number = house_number.lower().strip()
|
||||
|
||||
if not any(df["matching_address"].str.contains(str(house_number))):
|
||||
if "flat" in str(house_number):
|
||||
house_number = house_number.split("flat")[1].strip()
|
||||
|
||||
# We check if we had an instance of flat x, y
|
||||
if "," in str(house_number):
|
||||
house_number = house_number.split(",")[0].strip()
|
||||
|
||||
# We may also have a space for an instance of flat x y
|
||||
if " " in str(house_number):
|
||||
house_number = house_number.split(" ")[0].strip()
|
||||
|
||||
df = df[df["matching_address"].str.contains(str(house_number))]
|
||||
|
||||
if df.empty:
|
||||
missed.append(row["eco3_list_row_id"])
|
||||
continue
|
||||
|
||||
if df.shape[0] != 1:
|
||||
df = df[df["HouseNo"].astype(str).str.lower() == str(house_number)]
|
||||
|
||||
if df.shape[0] != 1:
|
||||
print(row["Street / Block Name"])
|
||||
print(house_number)
|
||||
print(row["Post Code"])
|
||||
raise ValueError("Investigate")
|
||||
|
||||
matching_lookup.append(
|
||||
{
|
||||
"eco3_list_row_id": row["eco3_list_row_id"],
|
||||
"asset_list_row_id": df["asset_list_row_id"].values[0],
|
||||
}
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def extract_streetname(address, house_number=None, postcode=None):
|
||||
"""
|
||||
|
|
@ -4008,11 +4104,13 @@ def app():
|
|||
# Add in: "HA25"
|
||||
# TODO: Remove ECO3 sales from HA25
|
||||
priority_has = [
|
||||
"HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA28", "HA32", "HA38", "HA39", "HA107",
|
||||
"HA1", "HA6", "HA7", "HA14", "HA15", "HA16", "HA20", "HA24", "HA25", "HA28", "HA32", "HA39", "HA107",
|
||||
]
|
||||
# Next HAs to do: 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come back on this],
|
||||
# Then: 28 [DONE],
|
||||
# 38, 41, 10, 14, 20, 48
|
||||
# 41, 10, 14 [DONE], 20, 48, 50
|
||||
# 38[problematic, but no ECO4]
|
||||
# TODO - do 50 and 25 next
|
||||
# Filter down the directories to only the priority HAs
|
||||
directories = [d for d in directories if d.split("/")[2] in priority_has]
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue