diff --git a/.idea/Model.iml b/.idea/Model.iml
index 4413bb06..b0f9c00d 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
-
+
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 6f308057..1122b380 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
-
+
diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index b4b82d0b..de2c0e6a 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -6907,3 +6907,81 @@ def app():
december_figures["ECO4 remaining"]
)
december_figures["ECO4 remaining"].sum()
+
+ # Adhoc - for UNITAS, stripping out additional surveys that have been completed
+ unitas_data = loader.data["HA50"].copy()
+ unitas_asset_list = unitas_data["asset_list"].copy()
+ unitas_survey_sheet = unitas_data["survey_list"].copy()
+ # We remove the surveyed properties from the asset sheet
+ unitas_survey_sheet = unitas_survey_sheet[~pd.isnull(unitas_survey_sheet["asset_list_row_id"])]
+ unitas_asset_list = unitas_asset_list.merge(
+ unitas_survey_sheet[["asset_list_row_id", "installation_status"]],
+ how="left",
+ on="asset_list_row_id"
+ )
+ unitas_asset_list = unitas_asset_list[pd.isnull(unitas_asset_list["installation_status"])]
+ unitas_asset_list = unitas_asset_list.drop(columns=["installation_status"])
+
+ # We read in the data for the further completed surveys
+ unitas_phase_1_workbook = openpyxl.load_workbook(
+ "local_data/ha_data/UNITAS ( STOKE) MASTER ROLLING SHEET UPDATED 8.4.24 K - no password.xlsx"
+ )
+ phase_1_worksheet = unitas_phase_1_workbook["ECO 4 - PHASE 1"]
+ phase_2_worksheet = unitas_phase_1_workbook["ECO4 - PHASE 2"]
+ phase1_colnames = [cell.value for cell in phase_1_worksheet[1]]
+ phase_1_rows_data = []
+ for row in phase_1_worksheet.iter_rows(min_row=2, values_only=False):
+ row_data = [cell.value for cell in row] # This will get you the cell values
+ phase_1_rows_data.append(row_data)
+
+ phase_1_surveys = pd.DataFrame(phase_1_rows_data, columns=phase1_colnames)
+
+ # Correct phase 1 surveys in the same fashion as the previous approach
+ phase_1_surveys = DataLoader.correct_ha50_survey_list(phase_1_surveys.copy())
+
+ # We check all phase 1 surveys are contained in the data we had before
+ additional = []
+ for _, row in tqdm(phase_1_surveys.iterrows(), total=len(phase_1_surveys)):
+ # We look for the entry in the old survey sheet:
+ # matched_uprn = unitas_survey_sheet[unitas_survey_sheet["EPR UPRN NUMBER"] == row["UPRN"]]
+ # if matched_uprn.shape[0] == 1:
+ # continue
+
+ matched_1 = unitas_survey_sheet[
+ (unitas_survey_sheet["Post Code"] == row["Post Code"]) &
+ (unitas_survey_sheet["NO."] == row["NO."])
+ ]
+
+ if matched_1.shape[0] == 1:
+ continue
+
+ matched_2 = unitas_survey_sheet[
+ (unitas_survey_sheet["Street / Block Name"] == row["Street / Block Name"]) &
+ (unitas_survey_sheet["NO."] == row["NO."])
+ ]
+
+ if matched_2.shape[0] == 1:
+ continue
+
+ additional.append(row.to_dict())
+ additional = pd.DataFrame(additional)
+
+ phase_2_rows_data = []
+ for row in phase_2_worksheet.iter_rows(min_row=2, values_only=False):
+ row_data = [cell.value for cell in row] # This will get you the cell values
+ phase_2_rows_data.append(row_data)
+
+ phase2_colnames = [cell.value for cell in phase_2_worksheet[1]]
+ phase_2_surveys = pd.DataFrame(phase_2_rows_data, columns=phase2_colnames)
+ # Drop all of the occurances of "OFFICE USE ONLY" columns
+ phase_2_surveys = phase_2_surveys.drop(columns=[c for c in phase_2_surveys.columns if "OFFICE USE ONLY" in c])
+ common_columns = list({c for c in phase_2_surveys.columns if c in additional.columns})
+ additional_filtered = additional[common_columns]
+
+ further_unitas_completed_surveys = pd.concat(
+ [phase_2_surveys, additional_filtered],
+ axis=0,
+ ignore_index=True
+ )
+
+ # We match these back to the asset list