From b39e9c989943f2a6752935d72b21e5a7dea8b361 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 15 Feb 2024 10:57:57 +0000 Subject: [PATCH] working on updating the HA code for new file formats --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- backend/app/plan/router.py | 5 - .../ha_15_32/ha_analysis_batch_3.py | 171 +++++++++--------- 4 files changed, 84 insertions(+), 96 deletions(-) diff --git a/.idea/Model.iml b/.idea/Model.iml index 4413bb06..b0f9c00d 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 6f308057..1122b380 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 08d3f048..3799d43f 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -158,11 +158,6 @@ async def trigger_plan(body: PlanTriggerRequest): property_recommendations, property_representative_recommendations ) - p.recommendations_scoring_data[0]["id"] - p.recommendations_scoring_data[0]["walls_thermal_transmittance"] - p.recommendations_scoring_data[0]["walls_thermal_transmittance_ending"] - p.recommendations_scoring_data[0]["walls_thermal_transmittance_ending"] - recommendations_scoring_data.extend(p.recommendations_scoring_data) logger.info("Preparing data for scoring in sap change api") diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index e94babcd..5ed7d6f2 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -24,27 +24,6 @@ load_dotenv(ENV_FILE) class DataLoader: - COLOUR_CONFIG = { - "ha_1": { - "asset_list": {"red": "FFFF0000", "green": "FF00B050"}, - }, - "ha_6": { - "asset_list": {"red": "FFFF0000", "green": "FF00B050"}, - "survey_list": { - "green": "FF92D050", "purple": "FF7030A0", "red": "FFFF0000", "blue": "FF00B0F0" - } - }, - "ha_14": { - "asset_list": {"red": "FFFF0000", "green": "FF00B050"}, - }, - "ha_39": { - "asset_list": {"red": "FFFF0000", "green": "FF00B050"}, - }, - "ha_107": { - "asset_list": {"red": "FFFF0000", "green": "FF00B050"}, - } - } - MIN_ROWS = { "ha_1": 2, "ha_6": 2, @@ -53,12 +32,87 @@ class DataLoader: "ha_107": 2, } + COLUMN_CONFIG = { + "ha_1": { + "address": "Address", + "postcode": "Address - Postcode" + } + } + def __init__(self, files, use_cache): self.files = files self.use_cache = use_cache self.data = {} + def create_asset_list_matching_address(self, ha_name, asset_list): + + if ha_name in ["ha_1", "ha_6"]: + asset_list["matching_address"] = asset_list[ + self.COLUMN_CONFIG[ha_name]["address"] + ].str.lower().str.strip() + asset_list["matching_postcode"] = asset_list[ + self.COLUMN_CONFIG[ha_name]["postcode"] + ].str.lower().str.strip() + elif ha_name == "ha_14": + # Create matching_address by concatenating Address 1, Address 2, Address 3, Address 4, Postcode + asset_list["matching_address"] = asset_list["Address 1"].str.lower().str.strip() + ", " + \ + asset_list["Address 2"].str.lower().str.strip() + ", " + \ + asset_list["Address 3"].str.lower().str.strip() + ", " + \ + asset_list["Address 4"].str.lower().str.strip() + ", " + \ + asset_list["Postcode"].str.lower().str.strip() + asset_list["matching_postcode"] = asset_list["Postcode"].str.lower().str.strip() + elif ha_name == "ha_39": + # Create matching_address by concatenating add_1, add_2, add_3, add_4, add_5, post_code + asset_list["matching_address"] = asset_list["add_1"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["add_2"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["add_3"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["add_4"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["add_5"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["post_code"].astype(str).str.lower().str.strip() + asset_list["matching_postcode"] = asset_list["post_code"].str.lower().str.strip() + elif ha_name == "ha_107": + # Create matching_address by concatenating House No, Street, Town, District, Postcode + asset_list["matching_address"] = asset_list["House No"].astype(str).str.lower().str.strip() + ", " + \ + asset_list["Street"].str.lower().str.strip() + ", " + \ + asset_list["Town"].str.lower().str.strip() + ", " + \ + asset_list["District"].str.lower().str.strip() + ", " + \ + asset_list["Postcode"].str.lower().str.strip() + asset_list["matching_postcode"] = asset_list["Postcode"].str.lower().str.strip() + else: + raise NotImplementedError("implement me") + + return asset_list + + def append_asset_list_built_form(self, ha_name, asset_list): + + # Finally, we process property_type or built form, where needed + if ha_name == "ha_6": + asset_list["built_form"] = asset_list["Property Type"].apply(self.identify_built_form_ha6) + + return asset_list + + @staticmethod + def create_asset_list_house_no(ha_name, asset_list): + """ + This function will append the House number onto the asset list + :return: + """ + + if ha_name in ["ha_107"]: + asset_list["HouseNo"] = asset_list["House No"].copy() + else: + split_addresses = asset_list['matching_address'].str.split(',', expand=True) + house_numbers = split_addresses[0].str.split(' ', expand=True) + # THe first column should be HouseNo - we aren't interested in the other columns, but we don't know how + # many columns there might be + house_numbers = house_numbers.iloc[:, 0:1] + house_numbers.columns = ['HouseNo'] + + asset_list = pd.concat([asset_list, house_numbers[["HouseNo"]]], axis=1) + + return asset_list + def load_asset_list(self, file_path, ha_name, sheet_name=None): workbook = openpyxl.load_workbook(file_path) if sheet_name is not None: @@ -87,74 +141,15 @@ class DataLoader: # Remove entirely empty roww - consider all rows apart from row_color asset_list = asset_list.loc[asset_list.loc[:, asset_list.columns != 'row_color'].notnull().any(axis=1)] - asset_list_colours = self.COLOUR_CONFIG[ha_name]["asset_list"] - - asset_list["row_colour_name"] = np.where( - asset_list["row_color"] == asset_list_colours["red"], "red", - np.where(asset_list["row_color"] == asset_list_colours["green"], "green", "yellow") - ) - - asset_list["row_meaning"] = np.where( - asset_list["row_colour_name"] == "red", "does not meet criteria", - np.where( - asset_list["row_colour_name"] == "green", "identified potential eco works (CWI)", "maybe in the future" - ) - ) - # Add in asset_list_row_id asset_list["asset_list_row_id"] = [ha_name + str(i) for i in range(0, len(asset_list))] - # Prepare the asset list - # Depending on the HA, we need to rename some columns - if ha_name == "ha_1": - asset_list["matching_address"] = asset_list["Address"].str.lower().str.strip() - asset_list["matching_postcode"] = asset_list["Address - Postcode"].str.lower().str.strip() - elif ha_name == "ha_6": - asset_list["matching_address"] = asset_list["propertyaddress"].str.lower().str.strip() - asset_list["matching_postcode"] = asset_list["Post Code"].str.lower().str.strip() - elif ha_name == "ha_14": - # Create matching_address by concatenating Address 1, Address 2, Address 3, Address 4, Postcode - asset_list["matching_address"] = asset_list["Address 1"].str.lower().str.strip() + ", " + \ - asset_list["Address 2"].str.lower().str.strip() + ", " + \ - asset_list["Address 3"].str.lower().str.strip() + ", " + \ - asset_list["Address 4"].str.lower().str.strip() + ", " + \ - asset_list["Postcode"].str.lower().str.strip() - asset_list["matching_postcode"] = asset_list["Postcode"].str.lower().str.strip() - elif ha_name == "ha_39": - # Create matching_address by concatenating add_1, add_2, add_3, add_4, add_5, post_code - asset_list["matching_address"] = asset_list["add_1"].astype(str).str.lower().str.strip() + ", " + \ - asset_list["add_2"].astype(str).str.lower().str.strip() + ", " + \ - asset_list["add_3"].astype(str).str.lower().str.strip() + ", " + \ - asset_list["add_4"].astype(str).str.lower().str.strip() + ", " + \ - asset_list["add_5"].astype(str).str.lower().str.strip() + ", " + \ - asset_list["post_code"].astype(str).str.lower().str.strip() - asset_list["matching_postcode"] = asset_list["post_code"].str.lower().str.strip() - elif ha_name == "ha_107": - # Create matching_address by concatenating House No, Street, Town, District, Postcode - asset_list["matching_address"] = asset_list["House No"].astype(str).str.lower().str.strip() + ", " + \ - asset_list["Street"].str.lower().str.strip() + ", " + \ - asset_list["Town"].str.lower().str.strip() + ", " + \ - asset_list["District"].str.lower().str.strip() + ", " + \ - asset_list["Postcode"].str.lower().str.strip() - asset_list["matching_postcode"] = asset_list["Postcode"].str.lower().str.strip() - else: - raise NotImplementedError("implement me") + # Create matching address and matching postcode + asset_list = self.create_asset_list_matching_address(ha_name=ha_name, asset_list=asset_list) - if ha_name in ["ha_107"]: - asset_list["HouseNo"] = asset_list["House No"].copy() - else: - split_addresses = asset_list['matching_address'].str.split(',', expand=True) - house_numbers = split_addresses[0].str.split(' ', expand=True) - # THe first column should be HouseNo - we aren't interested in the other columns, but we don't know how - # many columns there might be - house_numbers = house_numbers.iloc[:, 0:1] - house_numbers.columns = ['HouseNo'] + asset_list = self.create_asset_list_house_no(ha_name=ha_name, asset_list=asset_list) - asset_list = pd.concat([asset_list, house_numbers[["HouseNo"]]], axis=1) - - # Finally, we process property_type or built form, where needed - if ha_name == "ha_6": - asset_list["built_form"] = asset_list["Property Type"].apply(self.identify_built_form_ha6) + asset_list = self.append_asset_list_built_form(ha_name=ha_name, asset_list=asset_list) return asset_list @@ -177,9 +172,7 @@ class DataLoader: survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]]) # Remove columns that are None survey_list = survey_list.loc[:, survey_list.columns.notnull()] - survey_list["row_colour"] = survey_colors - survey_list_colours = self.COLOUR_CONFIG[ha_name]["survey_list"] # The survey list has 4 possible colours: # PURPLE - Installer advised install complete and a complimentary post works EPC has been completed. @@ -1252,13 +1245,13 @@ def app(): :return: """ - use_cache = True + use_cache = False files = { "ha_1": { "asset_list": { - "filepath": "etl/eligibility/ha_15_32/HA 1 - ASSET LIST.xlsx", - "sheetname": "HA 1" + "filepath": "local_data/ha_data/HA1/ACCENT GROUP.xlsx", + "sheetname": "Energy data" } }, "ha_6": {