From 110cb8070ce78823d2bd9edcca5d5d95222a9da4 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 10 Dec 2025 18:42:25 +0000
Subject: [PATCH 1/2] [Cincreased concurrency of backend

---
 etl/customers/lincs_rural/prepare_data.py     | 71 ++++++++++++++++---
 .../data_cleanse.py                           |  6 ++
 etl/find_my_epc/RetrieveFindMyEpc.py          | 13 ++--
 serverless.yml                                |  2 +-
 4 files changed, 74 insertions(+), 18 deletions(-)
 create mode 100644 etl/customers/peabody/Nov 2025 Consulting Project/data_cleanse.py

diff --git a/etl/customers/lincs_rural/prepare_data.py b/etl/customers/lincs_rural/prepare_data.py
index db7a9087..675179a8 100644
--- a/etl/customers/lincs_rural/prepare_data.py
+++ b/etl/customers/lincs_rural/prepare_data.py
@@ -1,8 +1,15 @@
 """
 Rough script to prepare the data for Lincs Rural project
 """
+from tqdm import tqdm
 import pandas as pd
+import os
+from dotenv import load_dotenv
 from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc
+from backend.SearchEpc import SearchEpc
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
 
 data = pd.read_excel(
     "/Users/khalimconn-kowlessar/Downloads/MASTER LIST EPCS UPDATED November 2025 Domna Homes.xlsx",
@@ -11,16 +18,58 @@ data = pd.read_excel(
 
 # We have property RRNs - we need UPRN
 
-for _, x in data.iterrows():
-    rrn = x["EPC Ref."]
+standardised_ara_list = []
+missed = []
+for _, x in tqdm(data.iterrows(), total=len(data)):
+    try:
+        rrn = x["EPC Ref."]
 
-    # Fetch from find my epc
-    retriever = RetrieveFindMyEpc(
-        address="",
-        postcode="",
-        rrn=rrn,
-        address_postal_town="",
-        sap_rating=x["Actual"]
-    )
+        # Fetch from find my epc
+        retriever = RetrieveFindMyEpc(
+            address="",
+            postcode="",
+            rrn=rrn,
+            address_postal_town="",
+        )
 
-    find_epc_data = retriever.retrieve_all_find_my_epc_data()
+        find_epc_data = retriever.retrieve_newest_find_my_epc_data(rrn=rrn)
+
+        # Find the UPRN
+        epc_searcher = SearchEpc(
+            address1=str(find_epc_data["address1"]),
+            postcode=str(find_epc_data["postcode"]),
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key="",
+            property_type=None,
+            fast=False,
+            full_address=",".join([find_epc_data["address1"], find_epc_data["address2"]]),
+            max_retries=5,
+        )
+        epc_searcher.find_property(skip_os=True)
+
+        # Append in format we need
+        # Stuff we need:
+        standardised_ara_list.append(
+            {
+                "landlord_property_id": x["Property Ref."],
+                "landlord_property_type": epc_searcher.newest_epc.get("property-type"),
+                "landlord_built_form": epc_searcher.newest_epc.get("built-form"),
+                "landlord_heating_system": epc_searcher.newest_epc.get("mainheat-description", ""),
+                "epc_os_uprn": epc_searcher.newest_epc.get("uprn"),
+                "domna_property_id": x["Property Ref."],
+                "domna_full_address": epc_searcher.newest_epc.get(
+                    "address", ", ".join([
+                        find_epc_data["address1"],
+                        find_epc_data["address2"],
+                    ])
+                ),
+            }
+        )
+    except Exception as e:
+        missed.append({
+            "property_ref": x["Property Ref."],
+            "rrn": x["EPC Ref."],
+            "error": str(e)
+        })
+
+missed_df = pd.DataFrame(missed)
diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/data_cleanse.py b/etl/customers/peabody/Nov 2025 Consulting Project/data_cleanse.py
new file mode 100644
index 00000000..a1be533d
--- /dev/null
+++ b/etl/customers/peabody/Nov 2025 Consulting Project/data_cleanse.py	
@@ -0,0 +1,6 @@
+"""
+We have found, within the Peabody data, a large volume of properties with missing and incorrects
+UPRNS and incorrect address data. We want to flag these records and also find missings where we can
+
+We also have duplicate UPRNS that should be flagged
+"""
diff --git a/etl/find_my_epc/RetrieveFindMyEpc.py b/etl/find_my_epc/RetrieveFindMyEpc.py
index eb330948..cf6659f9 100644
--- a/etl/find_my_epc/RetrieveFindMyEpc.py
+++ b/etl/find_my_epc/RetrieveFindMyEpc.py
@@ -465,12 +465,13 @@ class RetrieveFindMyEpc:
         potential_rating = ratings.split(".")[1]
         current_sap = int(current_rating.split(' ')[-1])
 
-        if current_sap != self.sap_rating:
-            # This means we likely have the wrong data. If we are in this scenario, we return nothing
-            return {
-                "epc_certificate": None,
-                "page_source": None,
-            }
+        if self.sap_rating:
+            if current_sap != self.sap_rating:
+                # This means we likely have the wrong data. If we are in this scenario, we return nothing
+                return {
+                    "epc_certificate": None,
+                    "page_source": None,
+                }
 
         # Retrieve the energy consumption
         bills = address_res.find('div', {'id': 'bills-affected'})
diff --git a/serverless.yml b/serverless.yml
index d2d8f50a..38d8da89 100644
--- a/serverless.yml
+++ b/serverless.yml
@@ -66,7 +66,7 @@ functions:
       - sqs:
           arn: arn:aws:sqs:${self:provider.region}:${aws:accountId}:model-engine-queue
           batchSize: 1
-          maximumConcurrency: 5  # Heavily restricts concurrency to avoid overwhelming the ldmbda limits
+          maximumConcurrency: 10  # Heavily restricts concurrency to avoid overwhelming the ldmbda limits
 
 
 resources:

From 8f7e9e0bdece3a0073aff017d32ebcfa3d6050a1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 10 Dec 2025 19:17:19 +0000
Subject: [PATCH 2/2] simplified fuel code

---
 asset_list/app.py                         | 34 +++++++++++++++++++++++
 backend/Property.py                       | 24 +++-------------
 backend/SearchEpc.py                      |  2 +-
 etl/customers/lincs_rural/prepare_data.py | 16 +++++++++++
 4 files changed, 55 insertions(+), 21 deletions(-)

diff --git a/asset_list/app.py b/asset_list/app.py
index cbb2cd93..dfd7aa46 100644
--- a/asset_list/app.py
+++ b/asset_list/app.py
@@ -59,6 +59,40 @@ def app():
     Property UPRN
     """
 
+    # Lambeth:
+    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lambeth/December 10th"
+    data_filename = "lambeth_sw2_leigham court estate.xlsx"
+    sheet_name = "Sheet1"
+    postcode_column = 'Postcode'
+    address1_column = "Address"
+    address1_method = None
+    fulladdress_column = None
+    address_cols_to_concat = ["Address"]
+    missing_postcodes_method = None
+    landlord_year_built = None
+    landlord_os_uprn = None
+    landlord_property_type = None
+    landlord_built_form = None
+    landlord_wall_construction = None
+    landlord_roof_construction = None
+    landlord_heating_system = None
+    landlord_existing_pv = None
+    landlord_property_id = "row_id"
+    landlord_sap = None
+    outcomes_filename = None
+    outcomes_sheetname = None
+    outcomes_postcode = None
+    outcomes_houseno = None
+    outcomes_id = None
+    outcomes_address = None
+    master_filepaths = []
+    master_id_colnames = []
+    master_to_asset_list_filepath = None
+    phase = False
+    ecosurv_landlords = None
+    asset_list_header = 0
+    landlord_block_reference = None
+
     # Maps addresses to uprn in problematic cases
     manual_uprn_map = {}
 
diff --git a/backend/Property.py b/backend/Property.py
index cbcb9aa3..31991702 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -1416,30 +1416,14 @@ class Property:
         if not self.is_ashp_valid(measures=["air_source_heat_pump"]):
             return self.current_energy_consumption
 
-        # If the property currently has an electric boiler, it will still benefit from the ASHP efficiency gain
-        remap_fuel_sources = [
-            "Natural Gas", "LPG", "Wood Logs", "Oil", "Electricity", "Coal", "Smokeless Fuel",
-            "Natural Gas + Solar Thermal", "Anthracite", "Wood Pellets", "LPG + Solar Thermal",
-            "Natural Gas (Community Scheme)"
-        ]
-
-        heating_energy_source = self.heating_energy_source
-        hot_water_energy_source = self.hot_water_energy_source
         heating_consumption = self.energy_consumption_estimates["unadjusted"]["heating"]
         hotwater_consumption = self.energy_consumption_estimates["unadjusted"]["hot_water"]
 
-        if (heating_energy_source not in remap_fuel_sources) or (
-            hot_water_energy_source not in remap_fuel_sources + ["Electricity + Solar Thermal"]
-        ):
-            raise NotImplementedError("Have not implemented estimating electrical consumption for this fuel type")
+        # Adjust the heating consumption to reflect the expected efficiency of an ASHP - broadly 3.0 COP
+        heating_consumption = heating_consumption / (assumed_ashp_efficiency / 100)
 
-        if heating_energy_source in remap_fuel_sources:
-            # Adjust the heating consumption to reflect the expected efficiency of an ASHP
-            heating_consumption = heating_consumption / (assumed_ashp_efficiency / 100)
-
-        if hot_water_energy_source in remap_fuel_sources:
-            # Adjust the hot water consumption to reflect the expected efficiency of an ASHP
-            hotwater_consumption = hotwater_consumption / (assumed_ashp_efficiency / 100)
+        # Adjust the hot water consumption to reflect the expected efficiency of an ASHP
+        hotwater_consumption = hotwater_consumption / (assumed_ashp_efficiency / 100)
 
         electric_consumption = (
             heating_consumption +
diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py
index a193a65f..cb465239 100644
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@@ -563,7 +563,7 @@ class SearchEpc:
             uprn = hash(self.address1 + self.postcode)
 
         if self.fast:
-            return newest_epc, [], {}, "", "", None
+            return newest_epc, [], {}, "", "", None, ""
 
         # Retrieve postcode and address
         address_epc, postcode_epc, address_postal_town = self.format_address(newest_epc=newest_epc)
diff --git a/etl/customers/lincs_rural/prepare_data.py b/etl/customers/lincs_rural/prepare_data.py
index 675179a8..0a3be7fe 100644
--- a/etl/customers/lincs_rural/prepare_data.py
+++ b/etl/customers/lincs_rural/prepare_data.py
@@ -52,6 +52,8 @@ for _, x in tqdm(data.iterrows(), total=len(data)):
         standardised_ara_list.append(
             {
                 "landlord_property_id": x["Property Ref."],
+                "domna_address_1": find_epc_data["address1"],
+                "postcode": find_epc_data["postcode"],
                 "landlord_property_type": epc_searcher.newest_epc.get("property-type"),
                 "landlord_built_form": epc_searcher.newest_epc.get("built-form"),
                 "landlord_heating_system": epc_searcher.newest_epc.get("mainheat-description", ""),
@@ -73,3 +75,17 @@ for _, x in tqdm(data.iterrows(), total=len(data)):
         })
 
 missed_df = pd.DataFrame(missed)
+
+# Store
+standardised_ara_df = pd.DataFrame(standardised_ara_list)
+standardised_ara_df.to_excel(
+    "/Users/khalimconn-kowlessar/Downloads/lincs_rural_standardised_ara_nov_2025.xlsx",
+    index=False,
+    sheet_name="Standardised Asset List"
+)
+# Store missed
+missed_df.to_excel(
+    "/Users/khalimconn-kowlessar/Downloads/lincs_rural_missed_nov_2025.xlsx",
+    index=False,
+    sheet_name="Missed Properties"
+)