From 0fccc0cc10f2c96e768cc0fc3cf51cc7999ecdd4 Mon Sep 17 00:00:00 2001
From: Jun-te Kim <junte.kim@mealcraft.com>
Date: Fri, 30 Jan 2026 13:48:07 +0000
Subject: [PATCH] just test if plan works

---
 .devcontainer/asset_list/requirements.txt |   1 +
 .github/workflows/deploy_terraform.yml    |  35 ++--
 asset_list/app.py                         | 220 ++++++++++++----------
 3 files changed, 135 insertions(+), 121 deletions(-)

diff --git a/.devcontainer/asset_list/requirements.txt b/.devcontainer/asset_list/requirements.txt
index cfab95ec..0640f2c9 100644
--- a/.devcontainer/asset_list/requirements.txt
+++ b/.devcontainer/asset_list/requirements.txt
@@ -21,3 +21,4 @@ pydantic>=1.10.7,<2
 sqlmodel
 # Formatting
 black==26.1.0
+dotenv
diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml
index a7aef225..05a667bb 100644
--- a/.github/workflows/deploy_terraform.yml
+++ b/.github/workflows/deploy_terraform.yml
@@ -3,8 +3,7 @@ name: Deploy terraform stack
 on:
   push:
     branches:
-      - dev
-      - prod
+      - "*"
 
 jobs:
   deploy:
@@ -47,22 +46,22 @@ jobs:
       - name: Terraform Init
         run: cd infrastructure/terraform && terraform init
 
-      - name: Terraform Workspace
-        run: |
-          BRANCH_NAME=$(echo "${{ github.ref }}" | sed -e "s/^refs\/heads\///")
-          cd infrastructure/terraform
-          terraform workspace select ${BRANCH_NAME} || terraform workspace new ${BRANCH_NAME}
+      # - name: Terraform Workspace
+      #   run: |
+      #     BRANCH_NAME=$(echo "${{ github.ref }}" | sed -e "s/^refs\/heads\///")
+      #     cd infrastructure/terraform
+      #     terraform workspace select ${BRANCH_NAME} || terraform workspace new ${BRANCH_NAME}
 
       - name: Terraform Plan
         run: |
           BRANCH_NAME=$(echo "${{ github.ref }}" | sed -e "s/^refs\/heads\///")
-          cd infrastructure/terraform && terraform plan -var-file=${BRANCH_NAME}.tfvars
+          cd infrastructure/terraform && terraform plan -var-file=dev.tfvars
 
-      - name: Deploy to Dev
-        if: github.ref == 'refs/heads/dev'
-        run: cd infrastructure/terraform && terraform apply -var-file=dev.tfvars -auto-approve
-        env:
-          name: dev
+      # - name: Deploy to Dev
+      #   if: github.ref == 'refs/heads/dev'
+      #   run: cd infrastructure/terraform && terraform apply -var-file=dev.tfvars -auto-approve
+      #   env:
+      #     name: dev
 
       - name: Configure AWS credentials (ProdAdmin)
         uses: aws-actions/configure-aws-credentials@v1
@@ -73,8 +72,8 @@ jobs:
         env:
           AWS_PROFILE: "ProdAdmin"
 
-      - name: Deploy to Prod
-        if: github.ref == 'refs/heads/prod'
-        run: cd infrastructure/terraform && terraform apply -var-file=prod.tfvars -auto-approve
-        env:
-          name: prod
+      # - name: Deploy to Prod
+      #   if: github.ref == 'refs/heads/prod'
+      #   run: cd infrastructure/terraform && terraform apply -var-file=prod.tfvars -auto-approve
+      #   env:
+      #     name: prod
diff --git a/asset_list/app.py b/asset_list/app.py
index e8ce408e..1c7200fd 100644
--- a/asset_list/app.py
+++ b/asset_list/app.py
@@ -12,23 +12,35 @@ from asset_list.utils import get_data
 
 from dotenv import load_dotenv
 from backend.SearchEpc import SearchEpc
+
 load_dotenv(dotenv_path="backend/.env")
-EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN", "a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA=")
+EPC_AUTH_TOKEN = os.getenv(
+    "EPC_AUTH_TOKEN",
+    "a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA=",
+)
 
 
-def extract_address1(asset_list, full_address_col, postcode_col, method="first_two_words"):
+def extract_address1(
+    asset_list, full_address_col, postcode_col, method="first_two_words"
+):
     if method == "first_two_words":
-        asset_list["address1_extracted"] = asset_list[full_address_col].str.split(" ").str[:2].str.join(" ")
+        asset_list["address1_extracted"] = (
+            asset_list[full_address_col].str.split(" ").str[:2].str.join(" ")
+        )
         return asset_list
 
     if method == "first_word":
-        asset_list["address1_extracted"] = asset_list[full_address_col].str.split(" ").str[0]
+        asset_list["address1_extracted"] = (
+            asset_list[full_address_col].str.split(" ").str[0]
+        )
         return asset_list
 
     if method == "house_number_extraction":
         asset_list["address1_extracted"] = asset_list.apply(
-            lambda x: SearchEpc.get_house_number(address=x[full_address_col], postcode=x[postcode_col]),
-            axis=1
+            lambda x: SearchEpc.get_house_number(
+                address=x[full_address_col], postcode=x[postcode_col]
+            ),
+            axis=1,
         )
         return asset_list
 
@@ -57,58 +69,17 @@ def app():
     EPC recommendations
     Property UPRN
     """
-<<<<<<< HEAD
-    data_folder = ("/workspaces/model/asset_list")
+    data_folder = "/workspaces/model/asset_list"
     data_filename = "assets.xlsx"
-=======
-
-    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hackney"
-    data_filename = "Domna SHF Wave 3.xlsx"
-    sheet_name = "Domna Wave 3"
-    postcode_column = 'Postcode'
-    address1_column = "Address 1"
-    address1_method = None
-    fulladdress_column = None
-    address_cols_to_concat = ["Address 1"]
-    missing_postcodes_method = None
-    landlord_year_built = None
-    landlord_os_uprn = "UPRN"
-    landlord_property_type = None
-    landlord_built_form = None
-    landlord_wall_construction = None
-    landlord_roof_construction = None
-    landlord_heating_system = None
-    landlord_existing_pv = None
-    landlord_property_id = "Row ID"
-    landlord_sap = None
-    outcomes_filename = None
-    outcomes_sheetname = None
-    outcomes_postcode = None
-    outcomes_houseno = None
-    outcomes_id = None
-    outcomes_address = None
-    master_filepaths = []
-    master_id_colnames = []
-    master_to_asset_list_filepath = None
-    phase = False
-    ecosurv_landlords = None
-    asset_list_header = 0
-    landlord_block_reference = None
-
-    # Peabody data for cleaning
-    data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
-                   "Project/data_validation")
-    data_filename = "to_standardise_uprns.xlsx"
->>>>>>> 3874da6177cbcc37f7a488bec0a06e387906653c
     sheet_name = "Sheet1"
-    postcode_column = 'Postcode'
+    postcode_column = "Post Code"
     address1_column = None
-    address1_method = 'house_number_extraction'
-    fulladdress_column = 'Address'
+    address1_method = "house_number_extraction"
+    fulladdress_column = "User Input"
     address_cols_to_concat = None
     missing_postcodes_method = None
     landlord_year_built = None
-    landlord_os_uprn = None
+    landlord_os_uprn = "UPRN"
     landlord_property_type = None
     landlord_built_form = None
     landlord_wall_construction = None
@@ -155,49 +126,62 @@ def app():
         landlord_existing_pv=landlord_existing_pv,
         landlord_sap=landlord_sap,
         landlord_block_reference=landlord_block_reference,
-        phase=phase
+        phase=phase,
     )
     asset_list.init_standardise()
 
     # We produce the new maps, which can be saved for future useage
     new_property_type_map = {
-        k: v for k, v in (
-            asset_list.variable_mappings[asset_list.landlord_property_type] if
-            asset_list.landlord_property_type else {}
+        k: v
+        for k, v in (
+            asset_list.variable_mappings[asset_list.landlord_property_type]
+            if asset_list.landlord_property_type
+            else {}
         ).items()
         if k not in PROPERTY_MAPPING
     }
     new_built_form_map = {
-        k: v for k, v in (
-            asset_list.variable_mappings[asset_list.landlord_built_form] if
-            asset_list.landlord_built_form else {}
+        k: v
+        for k, v in (
+            asset_list.variable_mappings[asset_list.landlord_built_form]
+            if asset_list.landlord_built_form
+            else {}
         ).items()
         if k not in BUILT_FORM_MAPPINGS
     }
     new_wall_map = {
-        k: v for k, v in (
-            asset_list.variable_mappings[asset_list.landlord_wall_construction] if
-            asset_list.landlord_wall_construction else {}
+        k: v
+        for k, v in (
+            asset_list.variable_mappings[asset_list.landlord_wall_construction]
+            if asset_list.landlord_wall_construction
+            else {}
         ).items()
         if k not in WALL_CONSTRUCTION_MAPPINGS
     }
     new_heating_map = {
-        k: v for k, v in (
-            asset_list.variable_mappings[asset_list.landlord_heating_system] if
-            asset_list.landlord_heating_system else {}
+        k: v
+        for k, v in (
+            asset_list.variable_mappings[asset_list.landlord_heating_system]
+            if asset_list.landlord_heating_system
+            else {}
         ).items()
         if k not in HEATING_MAPPINGS
     }
     new_existing_pv_map = {
-        k: v for k, v in (
-            asset_list.variable_mappings[asset_list.landlord_existing_pv] if asset_list.landlord_existing_pv else {}
+        k: v
+        for k, v in (
+            asset_list.variable_mappings[asset_list.landlord_existing_pv]
+            if asset_list.landlord_existing_pv
+            else {}
         ).items()
         if k not in EXISTING_PV_MAPPINGS
     }
     new_roof_construction_map = {
-        k: v for k, v in (
-            asset_list.variable_mappings[asset_list.landlord_roof_construction] if
-            asset_list.landlord_roof_construction else {}
+        k: v
+        for k, v in (
+            asset_list.variable_mappings[asset_list.landlord_roof_construction]
+            if asset_list.landlord_roof_construction
+            else {}
         ).items()
         if k not in ROOF_CONSTRUCTION_MAPPINGS
     }
@@ -211,7 +195,7 @@ def app():
         outcomes_address=outcomes_address,
         outcomes_postcode=outcomes_postcode,
         outcomes_houseno=outcomes_houseno,
-        outcomes_id=outcomes_id
+        outcomes_id=outcomes_id,
     )
 
     asset_list.flag_survey_master(
@@ -245,14 +229,16 @@ def app():
         skip = max(chunk_indexes)
 
     if any(x in folder_contents for x in downloaded_files):
-        skip = max([i for i in chunk_indexes if filename.format(i=i) in folder_contents])
+        skip = max(
+            [i for i in chunk_indexes if filename.format(i=i) in folder_contents]
+        )
 
     for i in range(0, len(asset_list.standardised_asset_list), chunk_size):
         print(f"Processing chunk {i} to {i + chunk_size}")
         if skip is not None and not force_retrieve_data:
             if i <= skip:
                 continue
-        chunk = asset_list.standardised_asset_list[i:i + chunk_size]
+        chunk = asset_list.standardised_asset_list[i : i + chunk_size]
         epc_data_chunk, errors_chunk, no_epc_chunk = get_data(
             df=chunk,
             row_id_name=asset_list.DOMNA_PROPERTY_ID,
@@ -264,7 +250,7 @@ def app():
             built_form_column=AssetList.STANDARD_BUILT_FORM,
             manual_uprn_map=manual_uprn_map,
             epc_api_only=epc_api_only,
-            epc_auth_token=EPC_AUTH_TOKEN
+            epc_auth_token=EPC_AUTH_TOKEN,
         )
 
         # We now retrieve any failed properties
@@ -287,7 +273,9 @@ def app():
 
         # Append the failed data to the main data
         # Store the chunk locally as a csv
-        pd.DataFrame(epc_data_chunk).to_csv(os.path.join(data_folder, f"Chunks/Chunk {i}.csv"), index=False)
+        pd.DataFrame(epc_data_chunk).to_csv(
+            os.path.join(data_folder, f"Chunks/Chunk {i}.csv"), index=False
+        )
         # Store the errors and no-data locally
         with open(os.path.join(data_folder, f"Chunks/Chunk {i} errors.json"), "w") as f:
             json.dump(errors_chunk, f)
@@ -318,7 +306,9 @@ def app():
 
     unique_recommendations = set()
     for _, row in recommendations_df.iterrows():
-        unique_recommendations.update([rec["improvement-summary-text"] for rec in row["recommendations"]])
+        unique_recommendations.update(
+            [rec["improvement-summary-text"] for rec in row["recommendations"]]
+        )
 
     columns = [asset_list.DOMNA_PROPERTY_ID] + list(unique_recommendations)
     transformed_data = []
@@ -338,20 +328,24 @@ def app():
     transformed_df = pd.DataFrame(transformed_data)
     for col in [
         "Floor insulation (solid floor)",
-        "Floor insulation", "Floor insulation (suspended floor)"
+        "Floor insulation",
+        "Floor insulation (suspended floor)",
     ]:
         if col not in transformed_df.columns:
             transformed_df[col] = False
     transformed_df = transformed_df[
         [
-            asset_list.DOMNA_PROPERTY_ID, "Floor insulation (solid floor)",
-            "Floor insulation", "Floor insulation (suspended floor)"
+            asset_list.DOMNA_PROPERTY_ID,
+            "Floor insulation (solid floor)",
+            "Floor insulation",
+            "Floor insulation (suspended floor)",
         ]
     ]
 
     transformed_df["epc_has_floor_recommendation"] = (
-        transformed_df["Floor insulation (solid floor)"] | transformed_df["Floor insulation"] |
-        transformed_df["Floor insulation (suspended floor)"]
+        transformed_df["Floor insulation (solid floor)"]
+        | transformed_df["Floor insulation"]
+        | transformed_df["Floor insulation (suspended floor)"]
     )
 
     # Get the find my epc data
@@ -364,21 +358,20 @@ def app():
             find_my_epc_data.append(
                 {
                     asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID],
-                    **x["find_my_epc_data"]
+                    **x["find_my_epc_data"],
                 }
             )
         else:
             find_my_epc_data.append(
-                {
-                    asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID]
-                }
+                {asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID]}
             )
 
     find_my_epc_data = pd.DataFrame(find_my_epc_data)
 
     find_my_epc_data = find_my_epc_data.merge(
         transformed_df[[asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"]],
-        how="left", on=asset_list.DOMNA_PROPERTY_ID
+        how="left",
+        on=asset_list.DOMNA_PROPERTY_ID,
     )
 
     # We check if we get the solar pv column:
@@ -388,24 +381,26 @@ def app():
     # Retrieve just the data we need
     epc_df = epc_df[
         [asset_list.DOMNA_PROPERTY_ID] + list(asset_list.EPC_API_DATA_NAMES.keys())
-        ].rename(
-        columns=asset_list.EPC_API_DATA_NAMES
-    )
+    ].rename(columns=asset_list.EPC_API_DATA_NAMES)
 
     # Look for columns not in the find my EPC data, which will have happened if we didn't
     # retrieve it in the first place
-    missed_find_epc_cols = [c for c in list(asset_list.FIND_EPC_DATA_NAMES.keys()) if c not in find_my_epc_data.columns]
+    missed_find_epc_cols = [
+        c
+        for c in list(asset_list.FIND_EPC_DATA_NAMES.keys())
+        if c not in find_my_epc_data.columns
+    ]
     if missed_find_epc_cols:
         for c in missed_find_epc_cols:
             find_my_epc_data[c] = None
 
     epc_df = epc_df.merge(
         find_my_epc_data[
-            [asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"] + list(asset_list.FIND_EPC_DATA_NAMES.keys())
-            ]
-        .rename(columns=asset_list.FIND_EPC_DATA_NAMES),
+            [asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"]
+            + list(asset_list.FIND_EPC_DATA_NAMES.keys())
+        ].rename(columns=asset_list.FIND_EPC_DATA_NAMES),
         how="left",
-        on=asset_list.DOMNA_PROPERTY_ID
+        on=asset_list.DOMNA_PROPERTY_ID,
     )
 
     asset_list.merge_data(epc_df)
@@ -422,7 +417,10 @@ def app():
     asset_list.get_work_figures()
 
     # Store as an excel
-    filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx"
+    filename = (
+        os.path.join(data_folder, ".".join(data_filename.split(".")[:-1]))
+        + " - Standardised.xlsx"
+    )
     # Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
 
     # Determine inspections priority
@@ -446,26 +444,42 @@ def app():
     # )
 
     with pd.ExcelWriter(filename) as writer:
-        asset_list.standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False)
+        asset_list.standardised_asset_list.to_excel(
+            writer, sheet_name="Standardised Asset List", index=False
+        )
         if asset_list.block_analysis_df is not None:
-            asset_list.block_analysis_df.to_excel(writer, sheet_name="Block Analysis", index=False)
+            asset_list.block_analysis_df.to_excel(
+                writer, sheet_name="Block Analysis", index=False
+            )
         # If we have outcomes, we add a tab with the outcomes
         if not asset_list.outcomes_for_output.empty:
-            asset_list.outcomes_for_output.to_excel(writer, sheet_name="Outcomes", index=False)
+            asset_list.outcomes_for_output.to_excel(
+                writer, sheet_name="Outcomes", index=False
+            )
 
         if not asset_list.unmatched_submissions.empty:
-            asset_list.unmatched_submissions.to_excel(writer, sheet_name="Unmatched Submissions", index=False)
+            asset_list.unmatched_submissions.to_excel(
+                writer, sheet_name="Unmatched Submissions", index=False
+            )
 
         if not asset_list.outcomes_no_match.empty:
-            asset_list.outcomes_no_match.to_excel(writer, sheet_name="Unmatched Outcomes", index=False)
+            asset_list.outcomes_no_match.to_excel(
+                writer, sheet_name="Unmatched Outcomes", index=False
+            )
 
         if not asset_list.ecosurv_no_match.empty:
-            asset_list.ecosurv_no_match.to_excel(writer, sheet_name="Unmatched Ecosurv", index=False)
+            asset_list.ecosurv_no_match.to_excel(
+                writer, sheet_name="Unmatched Ecosurv", index=False
+            )
 
         if not asset_list.geographical_areas.empty:
-            asset_list.geographical_areas.to_excel(writer, sheet_name="Geographical Areas", index=False)
+            asset_list.geographical_areas.to_excel(
+                writer, sheet_name="Geographical Areas", index=False
+            )
 
         # Store dupes
         if asset_list.duplicated_addresses is not None:
             if not asset_list.duplicated_addresses.empty:
-                asset_list.duplicated_addresses.to_excel(writer, sheet_name="Duplicate Properties", index=False)
+                asset_list.duplicated_addresses.to_excel(
+                    writer, sheet_name="Duplicate Properties", index=False
+                )