mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
formatting asset list
This commit is contained in:
parent
d51e1c913d
commit
0e5c343319
1 changed files with 34 additions and 24 deletions
|
|
@ -26,33 +26,31 @@ load_dotenv(ENV_FILE)
|
|||
|
||||
|
||||
def load_data():
|
||||
workbook = openpyxl.load_workbook('etl/eligibility/ha_15_32/HESTIA - HA 25 ASSET LIST.xlsx')
|
||||
workbook = openpyxl.load_workbook('etl/eligibility/ha_15_32/HESTIA - HA 25 ASSET LIST.xlsx', data_only=True)
|
||||
sheet = workbook.active
|
||||
# There are no colnames so we create them ourselves
|
||||
sheet_colnames = [
|
||||
"property_reference",
|
||||
"address",
|
||||
"tenure",
|
||||
"property_type",
|
||||
"unknown1",
|
||||
"year_built",
|
||||
"unknown2",
|
||||
"heating_type",
|
||||
"wall_type",
|
||||
"roof_type",
|
||||
"postcode"
|
||||
]
|
||||
|
||||
rows_data = []
|
||||
rows_colors = []
|
||||
for row in sheet.iter_rows(min_row=1, values_only=False): # Assuming the first row is headers
|
||||
row_data = [cell.value for cell in row] # This will get you the cell values
|
||||
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
|
||||
# row_color = COLOR_INDEX[row_color]
|
||||
for row in sheet.iter_rows(min_row=1, values_only=True): # use values_only=True to get values
|
||||
|
||||
row_data = list(row) # No need for comprehension, values_only=True returns a tuple of values
|
||||
rows_data.append(row_data)
|
||||
|
||||
# Headers are on the final row. Pop them off and store them and then remove them from rows_data
|
||||
headers = rows_data.pop()
|
||||
# The postcode header is None, so we replace it with "postcode"
|
||||
headers[-1] = "postcode"
|
||||
|
||||
# Handle colours separately
|
||||
for row in sheet.iter_rows(min_row=1, values_only=False):
|
||||
# Assume first cell color is indicative of entire row
|
||||
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
|
||||
rows_colors.append(row_color)
|
||||
|
||||
asset_list = pd.DataFrame(rows_data, columns=sheet_colnames)
|
||||
# Remove the final row of colours, which is the header
|
||||
rows_colors.pop()
|
||||
|
||||
asset_list = pd.DataFrame(rows_data, columns=headers)
|
||||
asset_list['row_color'] = rows_colors
|
||||
|
||||
asset_list["row_colour_name"] = np.where(
|
||||
|
|
@ -65,6 +63,19 @@ def load_data():
|
|||
np.where(asset_list["row_colour_name"] == "green", "identified potential eco", "maybe in the future")
|
||||
)
|
||||
|
||||
asset_list["address"] = asset_list["T1_Address"].copy().str.lower()
|
||||
asset_list["address"] = asset_list["address"].str.replace("flat", "")
|
||||
asset_list["address"] = asset_list["address"].str.strip()
|
||||
|
||||
split_addresses = asset_list['address'].str.split(' ', expand=True)
|
||||
split_addresses.columns = ['HouseNo', 'address2', 'address3', 'address4', 'address5', 'address6', 'address7',
|
||||
'address8',
|
||||
'address9', 'address10', 'address11', 'address12', 'address13']
|
||||
split_addresses["HouseNo"] = split_addresses["HouseNo"].str.replace(";", "")
|
||||
|
||||
# We could re-concatenate but we only care about HouseNo for the moment
|
||||
asset_list = pd.concat([asset_list, split_addresses[["HouseNo"]]], axis=1)
|
||||
|
||||
# We analysis historical ECO3 survey list
|
||||
eco3_survey_workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/HESTIA - HA 25 ECO3 SURVEY LIST.xlsx')
|
||||
eco3_survey_sheet = eco3_survey_workbook["CAVITY"]
|
||||
|
|
@ -154,13 +165,12 @@ def load_data():
|
|||
house_number = house_number.lower()
|
||||
|
||||
# Filter on the first line of the address
|
||||
df = asset_list[asset_list["Address"].str.lower().str.contains(row["Street / Block Name"].lower())].copy()
|
||||
# df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())]
|
||||
df = df[df["Address"].str.lower().str.contains(str(house_number))]
|
||||
df = asset_list[asset_list["T1_Address"].str.lower().str.contains(row["ADDRESS 1"].lower())].copy()
|
||||
df = df[df["T1_Address"].str.lower().str.contains(str(house_number))]
|
||||
if df.shape[0] != 1:
|
||||
df = df[df["HouseNo"] == str(house_number)]
|
||||
if df.shape[0] != 1:
|
||||
df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())]
|
||||
df = df[df["postcode"].str.lower().str.contains(row["POSTCODE"].lower())]
|
||||
if df.shape[0] != 1:
|
||||
print(row["Street / Block Name"])
|
||||
print(house_number)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue