From 0142e6fe5fcbcffc836bc139df48cf31e77545f1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 8 Apr 2024 15:29:52 +0100
Subject: [PATCH] wip matching completed surveys back to the asset list

---
 .idea/Model.iml                               |  2 +-
 .idea/misc.xml                                |  2 +-
 .../ha_15_32/ha_analysis_batch_3.py           | 78 +++++++++++++++++++
 3 files changed, 80 insertions(+), 2 deletions(-)
diff --git a/.idea/Model.iml b/.idea/Model.iml
index 4413bb06..b0f9c00d 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 6f308057..1122b380 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index b4b82d0b..de2c0e6a 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -6907,3 +6907,81 @@ def app():
         december_figures["ECO4 remaining"]
     )
     december_figures["ECO4 remaining"].sum()
+
+    # Adhoc - for UNITAS, stripping out additional surveys that have been completed
+    unitas_data = loader.data["HA50"].copy()
+    unitas_asset_list = unitas_data["asset_list"].copy()
+    unitas_survey_sheet = unitas_data["survey_list"].copy()
+    # We remove the surveyed properties from the asset sheet
+    unitas_survey_sheet = unitas_survey_sheet[~pd.isnull(unitas_survey_sheet["asset_list_row_id"])]
+    unitas_asset_list = unitas_asset_list.merge(
+        unitas_survey_sheet[["asset_list_row_id", "installation_status"]],
+        how="left",
+        on="asset_list_row_id"
+    )
+    unitas_asset_list = unitas_asset_list[pd.isnull(unitas_asset_list["installation_status"])]
+    unitas_asset_list = unitas_asset_list.drop(columns=["installation_status"])
+
+    # We read in the data for the further completed surveys
+    unitas_phase_1_workbook = openpyxl.load_workbook(
+        "local_data/ha_data/UNITAS ( STOKE) MASTER ROLLING SHEET UPDATED 8.4.24 K - no password.xlsx"
+    )
+    phase_1_worksheet = unitas_phase_1_workbook["ECO 4 - PHASE 1"]
+    phase_2_worksheet = unitas_phase_1_workbook["ECO4 - PHASE 2"]
+    phase1_colnames = [cell.value for cell in phase_1_worksheet[1]]
+    phase_1_rows_data = []
+    for row in phase_1_worksheet.iter_rows(min_row=2, values_only=False):
+        row_data = [cell.value for cell in row]  # This will get you the cell values
+        phase_1_rows_data.append(row_data)
+
+    phase_1_surveys = pd.DataFrame(phase_1_rows_data, columns=phase1_colnames)
+
+    # Correct phase 1 surveys in the same fashion as the previous approach
+    phase_1_surveys = DataLoader.correct_ha50_survey_list(phase_1_surveys.copy())
+
+    # We check all phase 1 surveys are contained in the data we had before
+    additional = []
+    for _, row in tqdm(phase_1_surveys.iterrows(), total=len(phase_1_surveys)):
+        # We look for the entry in the old survey sheet:
+        # matched_uprn = unitas_survey_sheet[unitas_survey_sheet["EPR UPRN NUMBER"] == row["UPRN"]]
+        # if matched_uprn.shape[0] == 1:
+        #     continue
+
+        matched_1 = unitas_survey_sheet[
+            (unitas_survey_sheet["Post Code"] == row["Post Code"]) &
+            (unitas_survey_sheet["NO."] == row["NO."])
+            ]
+
+        if matched_1.shape[0] == 1:
+            continue
+
+        matched_2 = unitas_survey_sheet[
+            (unitas_survey_sheet["Street / Block Name"] == row["Street / Block Name"]) &
+            (unitas_survey_sheet["NO."] == row["NO."])
+            ]
+
+        if matched_2.shape[0] == 1:
+            continue
+
+        additional.append(row.to_dict())
+    additional = pd.DataFrame(additional)
+
+    phase_2_rows_data = []
+    for row in phase_2_worksheet.iter_rows(min_row=2, values_only=False):
+        row_data = [cell.value for cell in row]  # This will get you the cell values
+        phase_2_rows_data.append(row_data)
+
+    phase2_colnames = [cell.value for cell in phase_2_worksheet[1]]
+    phase_2_surveys = pd.DataFrame(phase_2_rows_data, columns=phase2_colnames)
+    # Drop all of the occurances of "OFFICE USE ONLY" columns
+    phase_2_surveys = phase_2_surveys.drop(columns=[c for c in phase_2_surveys.columns if "OFFICE USE ONLY" in c])
+    common_columns = list({c for c in phase_2_surveys.columns if c in additional.columns})
+    additional_filtered = additional[common_columns]
+
+    further_unitas_completed_surveys = pd.concat(
+        [phase_2_surveys, additional_filtered],
+        axis=0,
+        ignore_index=True
+    )
+
+    # We match these back to the asset list