From 36bb4b0f275b402e7806f01cde788676e7090bd3 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 28 Jan 2025 15:10:23 +0000
Subject: [PATCH] pulled data needed for stonewater

---
 .idea/Model.iml                               |   2 +-
 .idea/misc.xml                                |   2 +-
 etl/access_reporting/app.py                   |  46 ++
 .../stonewater/Wave 3 Preparation.py          |  33 ++
 etl/customers/stonewater/data_cleaning.py     | 137 ++++++
 .../stonewater/potential_eco_properties.py    | 393 ++++++++++++------
 .../whlg eligibile properties.py              |   8 +
 7 files changed, 495 insertions(+), 126 deletions(-)
 create mode 100644 etl/customers/stonewater/data_cleaning.py
diff --git a/.idea/Model.iml b/.idea/Model.iml
index df6c4faa..762580d9 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Fastapi-backend" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Stonewater-wave-3" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 50cad4ca..c916a158 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Fastapi-backend" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Stonewater-wave-3" project-jdk-type="Python SDK" />
   <component name="PyCharmProfessionalAdvertiser">
     <option name="shown" value="true" />
   </component>
diff --git a/etl/access_reporting/app.py b/etl/access_reporting/app.py
index 830f4370..8a8254a1 100644
--- a/etl/access_reporting/app.py
+++ b/etl/access_reporting/app.py
@@ -83,8 +83,11 @@ def api_call_decorator(func):
             results = []
             page_size = kwargs.get('page_size', None)
             response_data = {}
+            n_calls = 0
 
             while url:
+                logger.info("Making call for page: " + str(n_calls + 1))
+                n_calls += 1
                 response = requests.request(http_method, url, headers=self.headers, json=data)
 
                 # Handle the response
@@ -93,6 +96,7 @@ def api_call_decorator(func):
                     if page_size:
                         results.extend(response_json.get('value', []))
                         url = response_json.get('@odata.nextLink', None)
+                        logger.info(f"Next page URL: {url}")
                     else:
                         response_data = response_json  # Capture the full response for consistency
                         break
@@ -270,6 +274,48 @@ class SharePointClient:
 
         return file_content
 
+    def download_sharepoint_folder(self, drive_id, folder_path, download_dir, excluded_file_types=None):
+        """
+        Downloads all files in a SharePoint folder to the specified local directory.
+
+        :param drive_id: The ID of the SharePoint drive.
+        :param folder_path: The path of the folder in SharePoint.
+        :param download_dir: The local directory to save the downloaded files.
+        :param excluded_file_types: A list of file types to exclude from download (default is None).
+        """
+
+        excluded_file_types = [] if excluded_file_types is None else excluded_file_types
+
+        # Ensure the download directory exists
+        os.makedirs(download_dir, exist_ok=True)
+
+        # List folder contents
+        folder_contents = self.list_folder_contents(drive_id, folder_path)
+        files = folder_contents.get('value', [])
+
+        for item in files:
+            if item.get('folder'):  # Check if it's a folder
+                # Recursively handle subfolders
+                subfolder_path = f"{folder_path}/{item['name']}"
+                subfolder_dir = os.path.join(download_dir, item['name'])
+                self.download_sharepoint_folder(drive_id, subfolder_path, subfolder_dir)
+            else:
+                # It's a file, download it
+                file_name = item['name']
+                if file_name.split(".")[-1] in excluded_file_types:
+                    continue
+                download_url = item['@microsoft.graph.downloadUrl']
+
+                logger.info(f"Downloading file: {file_name}")
+                file_content = self.download_sharepoint_file(download_url)
+
+                # Save the file locally
+                file_path = os.path.join(download_dir, file_name)
+                with open(file_path, 'wb') as f:
+                    f.write(file_content.read())
+
+                logger.info(f"File saved to: {file_path}")
+
 
 def app():
     # Customers for WC 18/11/2024
diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py
index 0f757f7b..8538188b 100644
--- a/etl/customers/stonewater/Wave 3 Preparation.py	
+++ b/etl/customers/stonewater/Wave 3 Preparation.py	
@@ -2905,5 +2905,38 @@ def identify_incorrect_packages():
         os.path.join(CUSTOMER_FOLDER_PATH, "Units with assigned packages - with flags.csv"), index=False
     )
 
+
+def revised_model():
+    """
+    This function implements the revised model for Stonewater, where we are looking at new priority postcodes
+    This work was undertaken in January 2021.
+    """
+
+    # 1) Create the new list of properties
+
+    new_priority_postcodes = pd.read_excel(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Jan 2025 Project/Updated 2025 to 2030 "
+        "priority list.xlsx"
+    )
+
+    original_archetypes = pd.read_excel(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24 "
+        "- Archetyped V3.1.xlsx",
+        header=4
+    )
+    original_archetypes = original_archetypes[~pd.isnull(original_archetypes["Address ID"])]
+    original_archetypes = original_archetypes[original_archetypes["Address ID"] != "Address ID"]
+    original_archetypes["Address ID"] = original_archetypes["Address ID"].astype(int)
+
+    original_archetypes = original_archetypes[
+        ["Address ID", "Archetype ID", ""]
+    ]
+
+    # Check if we have all of the addresses
+    missed = original_archetypes[
+        ~original_archetypes["Address ID"].isin(new_priority_postcodes["Address ID"].values)
+    ]["Archetype ID"].unique()
+    assert
+
 # if __name__ == "__main__":
 #     main()
diff --git a/etl/customers/stonewater/data_cleaning.py b/etl/customers/stonewater/data_cleaning.py
new file mode 100644
index 00000000..8751960c
--- /dev/null
+++ b/etl/customers/stonewater/data_cleaning.py
@@ -0,0 +1,137 @@
+import os
+import shutil
+from tqdm import tqdm
+
+
+def delete_large_files():
+    """
+    This function deletes photos, designs and other files which we don't need
+    :return:
+    """
+
+    folder_path = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Wave 2.1 Surveys"
+
+    # List the contents of this folder since in each sub-folder we have the property folders
+    contents = os.listdir(folder_path)
+
+    for subfolder in contents:
+        if not os.path.isdir(os.path.join(folder_path, subfolder)):
+            continue
+        subfolder_path = os.path.join(folder_path, subfolder)
+        # List the contents
+        property_folders = os.listdir(subfolder_path)
+
+        for property in tqdm(property_folders):
+            # Check if it's a directory
+            if not os.path.isdir(os.path.join(subfolder_path, property)):
+                continue
+
+            property_path = os.path.join(subfolder_path, property)
+            property_contents = os.listdir(property_path)
+            # We delete the contents of the following folders:
+            # '1. RA Property Pics'
+            # '4. Air Tightness Tests'
+            # '5. RD Design Info'
+            for folder_to_delete in ["1. RA Property Pics", "4. Air Tightness Tests", "5. RD Design Info",
+                                     "1. RA Property PIcs", "Post EPC Photos", "4. RD Design Info",
+                                     "5. Installer Info", "6. Trustmark lodgement", "7.Post Install Inspection Photos",
+                                     "6. Trustmark Lodgement", "7. Post Inspection Photos"]:
+                if folder_to_delete not in property_contents:
+                    continue
+                folder_to_delete_path = os.path.join(property_path, folder_to_delete)
+                if os.path.isdir(folder_to_delete_path):
+                    # Delete the folder, even if it's not empty
+                    shutil.rmtree(folder_to_delete_path)
+
+            # We now check the '2. RA Coordinator Info' folder for any .MOV files and delete them
+            if "2. RA Coordinator Info" not in property_contents:
+                coordinator_folder = "1. RA Coordinator Info"
+            else:
+                coordinator_folder = "2. RA Coordinator Info"
+            coordinator_info_path = os.path.join(property_path, coordinator_folder)
+            coordinator_info_contents = os.listdir(coordinator_info_path)
+            # Look for .MOV files and .jpg files
+            for file in coordinator_info_contents:
+                if file.endswith(".MOV"):
+                    os.remove(os.path.join(coordinator_info_path, file))
+
+                if file.endswith(".jpg"):
+                    os.remove(os.path.join(coordinator_info_path, file))
+
+            if "Property Pics" in coordinator_info_contents:
+                # Delete folder and contents
+                shutil.rmtree(os.path.join(coordinator_info_path, "Property Pics"))
+
+
+def download_data_from_sharepoint():
+    # Given a sharepoint location, this function will download the retrofit assessment folders from the locations
+    # specified in the sharepoint location
+    from etl.access_reporting.app import SharePointClient
+
+    sharepoint_client = SharePointClient(
+        tenant_id="10d5af8b-2cfd-4882-9ccd-b96e4812dacf",
+        client_id="6832a4c5-fb8c-4082-a746-4f51e1020f0d",
+        client_secret="xpC8Q~Frww48SM1V-D8lGy5iOY7P_cJ7FF3jgarQ",
+        site_id="bc925a9a-ad0b-4de9-9a3c-e61014cc7489"
+    )
+
+    # Retrieve the data from Sharepoint and write to local machine
+    contents = sharepoint_client.list_folder_contents(
+        drive_id=sharepoint_client.document_drive["id"],
+        folder_path="Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders"
+    )
+
+    len(contents["value"])
+    folders_to_pull = [
+        folder for folder in contents["value"] if folder["name"] in ["3. Wiltshire", "4. Bournemouth", "5. Coventry"]
+    ]
+    for folder_to_pull in folders_to_pull:
+        # Get the contents
+        folder_contents = sharepoint_client.list_folder_contents(
+            drive_id=sharepoint_client.document_drive["id"],
+            folder_path="Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders" + "/" +
+                        folder_to_pull["name"],
+            page_size=100
+        )
+
+        property_folders = [f for f in folder_contents["value"]]
+
+        for property_folder in property_folders:
+            # We go into each property folder and get the contents
+            property_folder_contents = sharepoint_client.list_folder_contents(
+                drive_id=sharepoint_client.document_drive["id"],
+                folder_path="Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders" + "/" +
+                            folder_to_pull["name"] + "/" + property_folder["name"]
+            )
+            # We look for the retrofit assessment folder:
+            property_sub_folders = [
+                f for f in property_folder_contents["value"] if "ra coordinator info" in f["name"].lower()
+            ]
+
+            if not property_sub_folders:
+                continue
+
+            # if we have this, we download the folder and store it on my laptop!
+            property_sub_folder = property_sub_folders[0]
+
+            property_folder_path = os.path.join(
+                "Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders",
+                folder_to_pull["name"],
+                property_folder["name"],
+                property_sub_folder["name"]
+            )
+
+            download_dir = os.path.join(
+                "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Wave 2.1 Surveys",
+                folder_to_pull["name"],
+                property_folder["name"],
+                property_sub_folder["name"]
+            )
+
+            # We download the folder
+            sharepoint_client.download_sharepoint_folder(
+                drive_id=sharepoint_client.document_drive["id"],
+                folder_path=property_folder_path,
+                download_dir=download_dir,
+                excluded_file_types=["MOV"]
+            )
diff --git a/etl/customers/stonewater/potential_eco_properties.py b/etl/customers/stonewater/potential_eco_properties.py
index c0301e9a..bda9c30c 100644
--- a/etl/customers/stonewater/potential_eco_properties.py
+++ b/etl/customers/stonewater/potential_eco_properties.py
@@ -7,6 +7,8 @@ from tqdm import tqdm
 from dotenv import load_dotenv
 from backend.SearchEpc import SearchEpc
 from utils.s3 import read_from_s3, read_pickle_from_s3
+import msoffcrypto
+from io import BytesIO
 
 load_dotenv(dotenv_path="backend/.env")
 EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
@@ -64,6 +66,28 @@ def app():
     This code creates a list of cavity properties, for review
     """
 
+    # Read in the password protected master
+    # TODO: This file should be deleted!
+
+    # Path to the password-protected Excel file
+    file_path = ("/Users/khalimconn-kowlessar/Downloads/STONEWATER MASTER SHEET - UPDATED 20.5.24 - K- PASSWORD "
+                 "PROTECTED.xlsx")
+    password = "STONE123"  # Replace with the actual password
+
+    # Open the file and decrypt it
+    with open(file_path, "rb") as f:
+        decrypted_file = BytesIO()
+        office_file = msoffcrypto.OfficeFile(f)
+        office_file.load_key(password=password)
+        office_file.decrypt(decrypted_file)
+
+    # Read the decrypted file into a DataFrame
+    eco_rolling_master = pd.read_excel(decrypted_file, sheet_name="Sheet1", engine="openpyxl")
+
+    eco_rolling_master = eco_rolling_master[
+        ~eco_rolling_master['INSTALL/CANCELLATION DATE'].str.contains("CANCELLED")
+    ]
+
     archetyped_properties = pd.read_excel(
         "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24 - "
         "Archetyped V3.1.xlsx",
@@ -116,13 +140,16 @@ def app():
 
     features_to_merge = features[
         [
-            "Address ID", "Age", "Property Type", "Walls", "Roofs", "Glazing", "Heating", "Main Fuel", "Hot Water",
+            "Address ID", "Organisation Reference", "Age", "Property Type", "Walls", "Roofs", "Glazing", "Heating",
+            "Main Fuel",
+            "Hot Water",
             "Renewables", "Total Floor Area"
         ]
     ]
 
     stonewater_cavity_properties = archetyped_properties[
-        ["Name", "Postcode", "Osm. ID", "Address ID", "UPRN", "UDPRN", "Archetype ID", "House no", "Street name",
+        ["Name", "Postcode", "Osm. ID", "Org. ref.", "Address ID", "UPRN", "UDPRN", "Archetype ID", "House no",
+         "Street name",
          "Address line 2", "City/Town", "Is Cavity Property", "Survey shows CWI needed for Archetype"]
     ].merge(
         features_to_merge, how="left", on="Address ID"
@@ -166,77 +193,137 @@ def app():
         stonewater_cavity_properties["Reason Included"]
     )
 
+    # We flag units that were installed under ECO3
+    numeric_ids = eco_rolling_master[eco_rolling_master["STONEWATER UPRN"] != "NOT ON ASSET LIST"]
+    numeric_ids = numeric_ids[~pd.isnull(numeric_ids["STONEWATER UPRN"])]
+    numeric_ids["STONEWATER UPRN"] = numeric_ids["STONEWATER UPRN"].astype(int)
+
+    stonewater_cavity_properties["Installed under ECO3"] = stonewater_cavity_properties["Org. ref."].isin(
+        numeric_ids['STONEWATER UPRN'].values
+    )
+
+    # Which postcodes were installed under ECO3
+    priority_list_eco3 = stonewater_cavity_properties[
+        stonewater_cavity_properties["Installed under ECO3"]
+    ]["Postcode"].unique()
+
+    # These are properties that were not installed under ECO3, that have the same postcodes as properties
+    # installed under ECO3
+
+    # These are 66 properties we might want to start with as an immediate priority
+    stonewater_cavity_properties["Same Postcode as Installed under ECO3"] = (
+        ~stonewater_cavity_properties["Installed under ECO3"] & (
+        stonewater_cavity_properties["Postcode"].isin(priority_list_eco3)
+    )
+    )
+
     # We get the EPC data
-    epc_data = json.loads(
-        read_from_s3(
-            bucket_name="retrofit-data-dev",
-            s3_file_name="customers/Stonewater/clustering/epc_data.json"
-        )
-    )
-    epc_data = pd.DataFrame(epc_data)
-
-    epc_data["uprn"] = np.where(
-        epc_data["internal_id"] == 1091,
-        83143766,
-        epc_data["uprn"]
-    )
-
-    epc_data_batch_2 = read_pickle_from_s3(
-        s3_file_name="customers/Stonewater/clustering/epc_data_batch_2.pkl",
-        bucket_name="retrofit-data-dev"
-    )
-    epc_data_batch_2 = pd.DataFrame(epc_data_batch_2)
-
-    complete_epcs = pd.concat([epc_data, epc_data_batch_2])
-
-    epcs_to_merge = complete_epcs[
-        [
-            "uprn",
-            "address",
-            "postcode",
-            "property-type",
-            "built-form",
-            "inspection-date",
-            "current-energy-rating",
-            "current-energy-efficiency",
-            "roof-description",
-            "walls-description",
-            "transaction-type",
-            "secondheat-description",
-            "total-floor-area",
-            "construction-age-band",
-            "floor-height",
-            "number-habitable-rooms",
-            "mainheat-description",
-            "energy-consumption-current"
-        ]
-    ].rename(
-        columns={
-            "address": "Address",
-            "postcode": "Postcode",
-            "inspection-date": "Date of last EPC",
-            "current-energy-efficiency": "SAP score on register",
-            "current-energy-rating": "EPC rating on register",
-            "property-type": "Property Type",
-            "built-form": "Archetype",
-            "total-floor-area": "Property Floor Area",
-            "construction-age-band": "Property Age Band",
-            "floor-height": "Property Floor Height",
-            "number-habitable-rooms": "Number of Habitable Rooms",
-            "walls-description": "Wall Construction",
-            "roof-description": "Roof Construction",
-            "mainheat-description": "Heating Type",
-            "secondheat-description": "Secondary Heating",
-            "transaction-type": "Reason for last EPC",
-            "energy-consumption-current": "Heat Demand (kWh/m2)",
-        }
-    )
-    # We de-dupe, taking the newest on the date the EPC was lod
-    epcs_to_merge["Date of last EPC"] = pd.to_datetime(epcs_to_merge["Date of last EPC"])
-    epcs_to_merge = epcs_to_merge.sort_values("Date of last EPC", ascending=False)
-    epcs_to_merge = epcs_to_merge.drop_duplicates(subset="uprn")
+    # epc_data = json.loads(
+    #     read_from_s3(
+    #         bucket_name="retrofit-data-dev",
+    #         s3_file_name="customers/Stonewater/clustering/epc_data.json"
+    #     )
+    # )
+    # epc_data = pd.DataFrame(epc_data)
+    #
+    # epc_data["uprn"] = np.where(
+    #     epc_data["internal_id"] == 1091,
+    #     83143766,
+    #     epc_data["uprn"]
+    # )
+    #
+    # epc_data_batch_2 = read_pickle_from_s3(
+    #     s3_file_name="customers/Stonewater/clustering/epc_data_batch_2.pkl",
+    #     bucket_name="retrofit-data-dev"
+    # )
+    # epc_data_batch_2 = pd.DataFrame(epc_data_batch_2)
+    #
+    # complete_epcs = pd.concat([epc_data, epc_data_batch_2])
+    #
+    # epcs_to_merge = complete_epcs[
+    #     [
+    #         "uprn",
+    #         "address",
+    #         "postcode",
+    #         "property-type",
+    #         "built-form",
+    #         "inspection-date",
+    #         "current-energy-rating",
+    #         "current-energy-efficiency",
+    #         "roof-description",
+    #         "walls-description",
+    #         "transaction-type",
+    #         "secondheat-description",
+    #         "total-floor-area",
+    #         "construction-age-band",
+    #         "floor-height",
+    #         "number-habitable-rooms",
+    #         "mainheat-description",
+    #         "energy-consumption-current"
+    #     ]
+    # ].rename(
+    #     columns={
+    #         "address": "Address",
+    #         "postcode": "Postcode",
+    #         "inspection-date": "Date of last EPC",
+    #         "current-energy-efficiency": "SAP score on register",
+    #         "current-energy-rating": "EPC rating on register",
+    #         "property-type": "Property Type",
+    #         "built-form": "Archetype",
+    #         "total-floor-area": "Property Floor Area",
+    #         "construction-age-band": "Property Age Band",
+    #         "floor-height": "Property Floor Height",
+    #         "number-habitable-rooms": "Number of Habitable Rooms",
+    #         "walls-description": "Wall Construction",
+    #         "roof-description": "Roof Construction",
+    #         "mainheat-description": "Heating Type",
+    #         "secondheat-description": "Secondary Heating",
+    #         "transaction-type": "Reason for last EPC",
+    #         "energy-consumption-current": "Heat Demand (kWh/m2)",
+    #     }
+    # )
+    # # We de-dupe, taking the newest on the date the EPC was lod
+    # epcs_to_merge["Date of last EPC"] = pd.to_datetime(epcs_to_merge["Date of last EPC"])
+    # epcs_to_merge = epcs_to_merge.sort_values("Date of last EPC", ascending=False)
+    # epcs_to_merge = epcs_to_merge.drop_duplicates(subset="uprn")
 
     stonewater_cavity_properties["UPRN"] = stonewater_cavity_properties["UPRN"].astype("Int64").astype(str)
+    stonewater_cavity_properties["Reason Included"].value_counts()
+    # Find the postcodes where an Osmosis survey revealed a need for CWI
+    postcodes_found_needing_cwi = stonewater_cavity_properties[
+        stonewater_cavity_properties["Reason Included"].isin(
+            [
+                "Survey revealed potential need for CWI or extract and re-fill",
+                "Surveyed revealed potential need for CWI or extract and re-fill and is an as built cavity property",
+                "Survey showed this property needs CWI",
+                "Survey showed this property could need extract and re-fill"
+            ]
+        )
+    ]["Postcode"].unique()
+
+    stonewater_cavity_properties["Suspected Needs CWI - not surveyed"] = (
+        (
+            stonewater_cavity_properties[
+                "Postcode"].isin(
+                postcodes_found_needing_cwi)
+        ) & (
+            ~stonewater_cavity_properties[
+                "Reason Included"].isin(
+                [
+                    "Survey revealed potential need "
+                    "for CWI or extract and re-fill",
+                    "Surveyed revealed potential "
+                    "need for CWI or extract and "
+                    "re-fill and is an as built "
+                    "cavity property",
+                    "Survey showed this property "
+                    "needs CWI",
+                    "Survey showed this property "
+                    "could need extract and re-fill"
+                ]
+            )
+        )
+    )
 
     # Merge the EPCs on, with the data we need
     stonewater_cavity_properties = stonewater_cavity_properties.rename(
@@ -252,12 +339,12 @@ def app():
             "Renewables": "Parity - Renewables",
             "Total Floor Area": "Parity - Total Floor Area"
         }
-    ).merge(
-        epcs_to_merge,
-        how="left",
-        left_on="UPRN",
-        right_on="uprn"
-    )
+    )  # .merge(
+    #     epcs_to_merge,
+    #     how="left",
+    #     left_on="UPRN",
+    #     right_on="uprn"
+    # )
 
     # We now flag the additional properties in the as built list
 
@@ -288,8 +375,56 @@ def app():
     additional_properties = additional_properties.merge(house_numbers, how="left", on="Address ID")
     additional_properties["row_id"] = additional_properties["Address ID"].copy()
 
+    # Flag any units in this list that were installed under ECO3
+    additional_properties["Installed under ECO3"] = additional_properties["Organisation Reference"].isin(
+        numeric_ids['STONEWATER UPRN'].values
+    )
+
+    # Additional list ECO3
+    additional_list_eco3 = additional_properties[additional_properties["Installed under ECO3"]]["Postcode"].unique()
+
+    # These are properties that were not installed under ECO3, that have the same postcodes as properties
+    # installed under ECO3
+    # These are 297 properties we might want to start with as an immediate priority
+    additional_properties["Same Postcode as Installed under ECO3"] = (
+        ~additional_properties["Installed under ECO3"] & (
+        additional_properties["Postcode"].isin(additional_list_eco3)
+    )
+    )
+
+    # We do some additional manual checks, for ECO3 properties that were installed that didn't get matched to either
+    # dataaset
+    numeric_ids["In asset list"] = numeric_ids["STONEWATER UPRN"].isin(
+        stonewater_cavity_properties['Org. ref.'].astype(int).values
+    )
+    numeric_ids["In asset list"] = numeric_ids["In asset list"] | (
+        numeric_ids["STONEWATER UPRN"].isin(
+            additional_properties['Organisation Reference'].astype(int).values
+        )
+    )
+
+    # eco3_installs_not_in_asset_list = numeric_ids[~numeric_ids["In asset list"]]
+    # # We now take samples of properties randomly and manually check the ID against the asset list
+    # print(eco3_installs_not_in_asset_list.sample(1)[["STONEWATER UPRN", "Post Code", "NO ", "Street / Block Name", ]])
+    # # Checked STONEWATER UPRN
+    # # 9862, BH15 1NR, 33, THE QUAY FOYER [x]
+    # # 12785, S01 66PN, 57, SEACOLE GARDENS [x]
+    # # 26071,  MK42 0TE,  51,  De Havilland Avenue, Shortstown [x]
+    # # 18213,  HR6 9UW, 20 Ford Street [x]
+    # # 24344, LU4 9FF, 6 SEAL CLOSE [x]
+    # # 31222,  SN14 0QZ, 7 HARDBROOK COURT [x]
+    # # 9343, SP4 7XL, 10 OAK PLACE [x]
+    # # 34730, LU5 5TN, 4 TUDOR DRIVE [x]
+    # # 7021,  BN27 2BZ, 32 BUTTS FIELD []
+    #
+    # stonewater_cavity_properties[stonewater_cavity_properties['Org. ref.'] == 7021]
+    # stonewater_cavity_properties[stonewater_cavity_properties['Postcode'] == "BN27 2BZ"]["Name"]
+    #
+    # additional_properties[additional_properties['Organisation Reference'] == 7021]
+    # additional_properties[additional_properties['Postcode'] == "BN27 2BZ"][["Address"]]
+
     # Pull the EPCs for these properties
-    additional_properties_epcs, errors = get_data(additional_properties)
+    # additional_properties_epcs, errors = get_data(additional_properties)
 
     # Save this data as a pickle
     # import pickle
@@ -297,12 +432,20 @@ def app():
     # "wb") as f:
     #     pickle.dump(additional_properties_epcs, f)
 
+    additional_properties["Suspected Needs CWI - not surveyed"] = (
+        (
+            additional_properties["Postcode"].isin(postcodes_found_needing_cwi)
+        )
+    )
+
+    additional_properties["Same Postcode as Installed under ECO3"].value_counts()
+
     # We drop Full Address
     additional_properties = additional_properties.drop(columns=["Full Address"])
     additional_properties2 = additional_properties[[
-        "row_id", "Address", "Postcode", "Address ID", "SAP", "SAP Band", "Property Type", "Walls", "Roofs", "Glazing",
-        "Heating", "Main Fuel", "Hot Water", "Renewables", "Total Floor Area",
-
+        "Address", "Postcode", "Address ID", "SAP", "SAP Band", "Property Type", "Walls", "Roofs", "Glazing",
+        "Heating", "Main Fuel", "Hot Water", "Renewables", "Total Floor Area", 'Installed under ECO3',
+        'Same Postcode as Installed under ECO3'
     ]].rename(
         columns={
             "SAP": "Parity - Predicted SAP",
@@ -318,56 +461,58 @@ def app():
             "Renewables": "Parity - Renewables",
             "Total Floor Area": "Parity - Total Floor Area"
         }
-    ).merge(
-        pd.DataFrame(additional_properties_epcs)[
-            [
-                "row_id",
-                "property-type",
-                "built-form",
-                "inspection-date",
-                "current-energy-rating",
-                "current-energy-efficiency",
-                "roof-description",
-                "walls-description",
-                "transaction-type",
-                "secondheat-description",
-                "total-floor-area",
-                "construction-age-band",
-                "floor-height",
-                "number-habitable-rooms",
-                "mainheat-description",
-                "energy-consumption-current"
-            ]
-        ].rename(
-            columns={
-                "inspection-date": "Date of last EPC",
-                "current-energy-efficiency": "SAP score on register",
-                "current-energy-rating": "EPC rating on register",
-                "property-type": "Property Type",
-                "built-form": "Archetype",
-                "total-floor-area": "Property Floor Area",
-                "construction-age-band": "Property Age Band",
-                "floor-height": "Property Floor Height",
-                "number-habitable-rooms": "Number of Habitable Rooms",
-                "walls-description": "Wall Construction",
-                "roof-description": "Roof Construction",
-                "mainheat-description": "Heating Type",
-                "secondheat-description": "Secondary Heating",
-                "transaction-type": "Reason for last EPC",
-                "energy-consumption-current": "Heat Demand (kWh/m2)",
-            }
-        ),
-        how="left",
-        on="row_id"
-    )
+    )  # .merge(
+    #     pd.DataFrame(additional_properties_epcs)[
+    #         [
+    #             "row_id",
+    #             "property-type",
+    #             "built-form",
+    #             "inspection-date",
+    #             "current-energy-rating",
+    #             "current-energy-efficiency",
+    #             "roof-description",
+    #             "walls-description",
+    #             "transaction-type",
+    #             "secondheat-description",
+    #             "total-floor-area",
+    #             "construction-age-band",
+    #             "floor-height",
+    #             "number-habitable-rooms",
+    #             "mainheat-description",
+    #             "energy-consumption-current"
+    #         ]
+    #     ].rename(
+    #         columns={
+    #             "inspection-date": "Date of last EPC",
+    #             "current-energy-efficiency": "SAP score on register",
+    #             "current-energy-rating": "EPC rating on register",
+    #             "property-type": "Property Type",
+    #             "built-form": "Archetype",
+    #             "total-floor-area": "Property Floor Area",
+    #             "construction-age-band": "Property Age Band",
+    #             "floor-height": "Property Floor Height",
+    #             "number-habitable-rooms": "Number of Habitable Rooms",
+    #             "walls-description": "Wall Construction",
+    #             "roof-description": "Roof Construction",
+    #             "mainheat-description": "Heating Type",
+    #             "secondheat-description": "Secondary Heating",
+    #             "transaction-type": "Reason for last EPC",
+    #             "energy-consumption-current": "Heat Demand (kWh/m2)",
+    #         }
+    #     ),
+    #     how="left",
+    #     on="row_id"
+    # )
 
     # We save the data locally
     stonewater_cavity_properties.to_csv(
-        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater Cavity Properties.csv",
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater Cavity Properties - priority "
+        "postcodes.csv",
         index=False
     )
     additional_properties2.to_csv(
-        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater Additional Cavity Properties.csv",
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater Additional Cavity Properties - "
+        "non-priority postcodes.csv",
         index=False
     )
     # Save the survey findings
diff --git a/etl/customers/waltham_forest/whlg eligibile properties.py b/etl/customers/waltham_forest/whlg eligibile properties.py
index fee988c1..9e1949f7 100644
--- a/etl/customers/waltham_forest/whlg eligibile properties.py	
+++ b/etl/customers/waltham_forest/whlg eligibile properties.py	
@@ -44,6 +44,10 @@ epc_data["has_conservation_restrictions"] = (
     | (epc_data["is_heritage_building"] == True)
 )
 
+whlg_eligible_postcodes["Local Authority"].value_counts()
+
+whlg_eligible_postcodes = whlg_eligible_postcodes[whlg_eligible_postcodes["Local Authority"] == "Waltham Forest"]
+
 # Pathway 1:
 # Match based on eligible postcodes
 pathway1 = epc_data[epc_data["postcode"].isin(whlg_eligible_postcodes["Postcode"].values)]
@@ -67,6 +71,10 @@ pathway1["EPC Date"] = pd.to_datetime(pathway1["EPC Date"]).dt.strftime("%Y-%m-%
 # Create a year EPC was lodged
 pathway1["EPC Year"] = pd.to_datetime(pathway1["EPC Date"]).dt.year
 
+low_epc = pathway1[pathway1["EPC Rating"].isin(["F", "G"])]
+low_epc["EPC Rating"].value_counts()
+low_epc.tail(1)[["address", "postcode"]]
+
 pathway1.to_csv(
     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Waltham Forest WHLG - Pathway 1 Eligibility.csv",
     index=False