From 6076eb4f24905ad026c7a0dca9eb3d15f7678a5b Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 15 Apr 2024 14:50:00 +0100
Subject: [PATCH 01/58] building photo upload app

---
 .idea/Model.iml                               |   2 +-
 .idea/misc.xml                                |   2 +-
 etl/non_invasive_surveys/photos/README.md     |  19 +++
 etl/non_invasive_surveys/photos/app.py        | 120 ++++++++++++++++++
 .../photos/requirements.txt                   |   3 +
 5 files changed, 144 insertions(+), 2 deletions(-)
 create mode 100644 etl/non_invasive_surveys/photos/README.md
 create mode 100644 etl/non_invasive_surveys/photos/app.py
 create mode 100644 etl/non_invasive_surveys/photos/requirements.txt
diff --git a/.idea/Model.iml b/.idea/Model.iml
index 4413bb06..c75af922 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="non_invasive_surveys-photos" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 6f308057..1f2c584d 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="non_invasive_surveys-photos" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/etl/non_invasive_surveys/photos/README.md b/etl/non_invasive_surveys/photos/README.md
new file mode 100644
index 00000000..9dbe951f
--- /dev/null
+++ b/etl/non_invasive_surveys/photos/README.md
@@ -0,0 +1,19 @@
+# Non Intrusive Surveys - photo upload
+
+This folder contains photos taken during non-intrusive surveys. Photos are stored in folders named after the survey ID.
+
+## Getting started
+
+Install the required packages by running the following command:
+
+```bash
+pip install -r requirements.txt
+```
+
+## Usage
+
+The main application is found in the app.py file. To run the application, use the following command:
+
+```bash
+python app.py
+```
\ No newline at end of file
diff --git a/etl/non_invasive_surveys/photos/app.py b/etl/non_invasive_surveys/photos/app.py
new file mode 100644
index 00000000..1b6790f9
--- /dev/null
+++ b/etl/non_invasive_surveys/photos/app.py
@@ -0,0 +1,120 @@
+import boto3
+from PIL import Image
+from pathlib import Path
+from dotenv import load_dotenv
+
+# Inputs
+ENV_FILEPATH = "etl/non_invasive_surveys/photos/.env"
+PHOTO_DIRECTORY = "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data"
+FOLDER_UPRN_LOOKUP = {
+    "91 Osprey Drive DY1 2JS": 90048026,
+    "195 Ashenhurst Rd DY1 2JB": 90051858,
+    "6 Beech Rd DY1 4BP": 90055152,
+    "53 Bromley DY5 4PJ": 90060989,
+    "5 Oaklands B62 0JA": 90028499,
+    "47 Fairfield Rd DY8 5UJ": 90077535,
+    "150 Huntingtree Rd B63 4HP": 90093693,
+    "27 Milton Rd DY1 2JB": 90106884,
+    "21 Wells Rd DY5 3TB": 90022227,
+    "8 Corporation Rd DY2 7PX": 90070461
+}
+
+
+def list_subdirectories(directory_path):
+    """
+    List all subdirectories within a given directory.
+
+    :param directory_path: Path to the directory.
+    :return: A list of paths to the subdirectories.
+    """
+    directory = Path(directory_path)
+    subdirectories = [subdir for subdir in directory.iterdir() if subdir.is_dir()]
+    return subdirectories
+
+
+def list_files_in_directory(directory_path, file_extension=".jpg"):
+    """
+    List all files with a specific extension within a given directory and its subdirectories.
+
+    :param directory_path: Path to the directory to scan.
+    :param file_extension: File extension to filter by.
+    :return: A list of paths to the files.
+    """
+    # Convert the directory path to a Path object if it's not already one
+    directory = Path(directory_path) if not isinstance(directory_path, Path) else directory_path
+
+    # List all files of the specified type in the directory and subdirectories
+    file_list = [file for file in directory.rglob(f'*{file_extension}')]
+
+    return file_list
+
+
+def create_images(input_path):
+    # Load the image
+    with Image.open(input_path) as img:
+        # Create a thumbnail
+        thumbnail = img.copy()
+        thumbnail.thumbnail((128, 128), Image.ANTIALIAS)  # Resize to 128x128 (or any desired size)
+        thumbnail.save('thumbnail.jpg')
+
+        # Create a 1080p version
+        full_hd = img.copy()
+        full_hd.thumbnail((1920, 1080), Image.ANTIALIAS)  # Resize to 1080p
+        full_hd.save('1080p.jpg')
+
+    # Return paths to the processed images
+    return 'thumbnail.jpg', '1080p.jpg', input_path
+
+
+def upload_to_s3(bucket_name, file_path, object_name):
+    s3_client = boto3.client('s3')
+    s3_client.upload_file(file_path, bucket_name, object_name)
+    print(f"Uploaded {object_name} to S3 bucket {bucket_name}")
+
+
+def upload_photos_to_s3(bucket_name, photo_paths):
+    # Upload each photo
+    for path in photo_paths:
+        object_name = path.split('/')[-1]  # Assuming the path format is folder/filename
+        upload_to_s3(bucket_name, path, object_name)
+
+
+def generate_cdn_url(distribution_domain, object_name):
+    return f"https://{distribution_domain}/{object_name}"
+
+
+def process_and_upload_images(input_image_path, bucket_name, distribution_domain):
+    # Create images
+    thumbnail, full_hd, original = create_images(input_image_path)
+
+    # Upload images
+    upload_photos_to_s3(bucket_name, [thumbnail, full_hd, original])
+
+    # Generate CDN links
+    cdn_links = [generate_cdn_url(distribution_domain, path.split('/')[-1]) for path in [thumbnail, full_hd, original]]
+
+    return cdn_links
+
+
+def app():
+    """
+    This application is tasked with uploading the photos, recorded during the non-invasive surveys, to s3 and the
+    database.
+    To begin with, this app will simply read the files from the local machine, however we will come up with a more
+    efficient way to do this in the future.
+
+    :return:
+    """
+
+    # List all files in the directory using pathlib
+    property_directories = list_subdirectories(PHOTO_DIRECTORY)
+
+    # For each property, we want to list all of the photos in the directory
+    for property_dir in property_directories:
+        photo_files = list_files_in_directory(property_dir)
+
+        # We now want to convert each file, and upload it to s3
+        for photo_filepath in photo_files:
+            process_and_upload_images(
+                photo_filepath, "retrofit-datalake-dev", "cdn.retrofit.com"
+            )
diff --git a/etl/non_invasive_surveys/photos/requirements.txt b/etl/non_invasive_surveys/photos/requirements.txt
new file mode 100644
index 00000000..2199a0b4
--- /dev/null
+++ b/etl/non_invasive_surveys/photos/requirements.txt
@@ -0,0 +1,3 @@
+Pillow
+boto3
+python-dotenv
\ No newline at end of file

From d3a175468330774214e4c7225157dd4481cb60cd Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 15 Apr 2024 15:20:38 +0100
Subject: [PATCH 02/58] modifying photo upload code

---
 etl/non_invasive_surveys/photos/app.py | 43 ++++++++++++++++++++------
 1 file changed, 34 insertions(+), 9 deletions(-)

diff --git a/etl/non_invasive_surveys/photos/app.py b/etl/non_invasive_surveys/photos/app.py
index 1b6790f9..ffd993a6 100644
--- a/etl/non_invasive_surveys/photos/app.py
+++ b/etl/non_invasive_surveys/photos/app.py
@@ -1,4 +1,5 @@
 import boto3
+import os
 from PIL import Image
 from pathlib import Path
 from dotenv import load_dotenv
@@ -19,6 +20,10 @@ FOLDER_UPRN_LOOKUP = {
     "8 Corporation Rd DY2 7PX": 90070461
 }
 
+load_dotenv(ENV_FILEPATH)
+CLOUDFRONT_DISTRIBUTION_DOMAIN_NAME = os.getenv("CLOUDFRONT_DISTRIBUTION_DOMAIN_NAME", None)
+CDN_BUCKET_NAME = os.getenv("CDN_BUCKET_NAME", None)
+
 
 def list_subdirectories(directory_path):
     """
@@ -49,21 +54,33 @@ def list_files_in_directory(directory_path, file_extension=".jpg"):
     return file_list
 
 
-def create_images(input_path):
+def create_images(input_path, uprn):
+    # Need to create local directory if it doesn't exist
+    os.makedirs(f"non_invasive_photos/{uprn}", exist_ok=True)
+
     # Load the image
     with Image.open(input_path) as img:
+        # Define output paths
+        thumbnail_path = f"non_invasive_photos/{uprn}/thumbnail.jpg"
+        full_hd_path = f"non_invasive_photos/{uprn}/1080p.jpg"
+        webp_path = f"non_invasive_photos/{uprn}/webp.webp"  # Save as WebP format
+
         # Create a thumbnail
         thumbnail = img.copy()
-        thumbnail.thumbnail((128, 128), Image.ANTIALIAS)  # Resize to 128x128 (or any desired size)
-        thumbnail.save('thumbnail.jpg')
+        thumbnail.thumbnail((128, 128), Image.Resampling.LANCZOS)  # High-quality downsampling
+        thumbnail.save(thumbnail_path, 'JPEG', quality=85)  # Save as JPEG with quality setting
 
         # Create a 1080p version
         full_hd = img.copy()
-        full_hd.thumbnail((1920, 1080), Image.ANTIALIAS)  # Resize to 1080p
-        full_hd.save('1080p.jpg')
+        full_hd.thumbnail((1920, 1080), Image.Resampling.LANCZOS)
+        full_hd.save(full_hd_path, 'JPEG', quality=90)  # Slightly higher quality for larger image
+
+        # Convert to WebP for better compression
+        webp = img.copy()
+        webp.save(webp_path, 'WEBP', quality=90)
 
     # Return paths to the processed images
-    return 'thumbnail.jpg', '1080p.jpg', input_path
+    return thumbnail_path, full_hd_path, webp_path
 
 
 def upload_to_s3(bucket_name, file_path, object_name):
@@ -83,9 +100,9 @@ def generate_cdn_url(distribution_domain, object_name):
     return f"https://{distribution_domain}/{object_name}"
 
 
-def process_and_upload_images(input_image_path, bucket_name, distribution_domain):
+def process_and_upload_images(uprn, input_image_path, bucket_name, distribution_domain):
     # Create images
-    thumbnail, full_hd, original = create_images(input_image_path)
+    thumbnail, full_hd, original = create_images(str(uprn), input_image_path)
 
     # Upload images
     upload_photos_to_s3(bucket_name, [thumbnail, full_hd, original])
@@ -93,6 +110,10 @@ def process_and_upload_images(input_image_path, bucket_name, distribution_domain
     # Generate CDN links
     cdn_links = [generate_cdn_url(distribution_domain, path.split('/')[-1]) for path in [thumbnail, full_hd, original]]
 
+    # Delete local files
+    for path in [thumbnail, full_hd, original]:
+        os.remove(path)
+
     return cdn_links
 
 
@@ -112,9 +133,13 @@ def app():
     # For each property, we want to list all of the photos in the directory
     for property_dir in property_directories:
         photo_files = list_files_in_directory(property_dir)
+        uprn = FOLDER_UPRN_LOOKUP[property_dir.name]
 
         # We now want to convert each file, and upload it to s3
         for photo_filepath in photo_files:
             process_and_upload_images(
-                photo_filepath, "retrofit-datalake-dev", "cdn.retrofit.com"
+                uprn=uprn,
+                input_image_path=photo_filepath,
+                bucket_name=CDN_BUCKET_NAME,
+                distribution_domain=CLOUDFRONT_DISTRIBUTION_DOMAIN_NAME
             )

From 5d3440815d7616bf3af37ca68136a73d610f071a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 15 Apr 2024 16:33:15 +0100
Subject: [PATCH 03/58] Pushing non-invasive photos to app wip

---
 .../photos/README.md                          |  0
 .../photos/app.py                             | 28 +++++++++++--------
 .../photos/requirements.txt                   |  0
 3 files changed, 16 insertions(+), 12 deletions(-)
 rename etl/{non_invasive_surveys => non_intrusive_surveys}/photos/README.md (100%)
 rename etl/{non_invasive_surveys => non_intrusive_surveys}/photos/app.py (84%)
 rename etl/{non_invasive_surveys => non_intrusive_surveys}/photos/requirements.txt (100%)

diff --git a/etl/non_invasive_surveys/photos/README.md b/etl/non_intrusive_surveys/photos/README.md
similarity index 100%
rename from etl/non_invasive_surveys/photos/README.md
rename to etl/non_intrusive_surveys/photos/README.md
diff --git a/etl/non_invasive_surveys/photos/app.py b/etl/non_intrusive_surveys/photos/app.py
similarity index 84%
rename from etl/non_invasive_surveys/photos/app.py
rename to etl/non_intrusive_surveys/photos/app.py
index ffd993a6..c531355b 100644
--- a/etl/non_invasive_surveys/photos/app.py
+++ b/etl/non_intrusive_surveys/photos/app.py
@@ -5,7 +5,7 @@ from pathlib import Path
 from dotenv import load_dotenv
 
 # Inputs
-ENV_FILEPATH = "etl/non_invasive_surveys/photos/.env"
+ENV_FILEPATH = "etl/non_intrusive_surveys/photos/.env"
 PHOTO_DIRECTORY = "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data"
 FOLDER_UPRN_LOOKUP = {
     "91 Osprey Drive DY1 2JS": 90048026,
@@ -55,25 +55,29 @@ def list_files_in_directory(directory_path, file_extension=".jpg"):
 
 
 def create_images(input_path, uprn):
+    # Define the base directory path
+    base_directory = f"non_intrusive_photos/{uprn}"
+    print(f"Creating directory: {base_directory}")  # Debug: print the directory to be created
+
     # Need to create local directory if it doesn't exist
-    os.makedirs(f"non_invasive_photos/{uprn}", exist_ok=True)
+    os.makedirs(base_directory, exist_ok=True)
+
+    # Define output paths
+    thumbnail_path = os.path.join(base_directory, "thumbnail.jpg")
+    full_hd_path = os.path.join(base_directory, "1080p.jpg")
+    webp_path = os.path.join(base_directory, "webp.webp")  # Save as WebP format
 
     # Load the image
     with Image.open(input_path) as img:
-        # Define output paths
-        thumbnail_path = f"non_invasive_photos/{uprn}/thumbnail.jpg"
-        full_hd_path = f"non_invasive_photos/{uprn}/1080p.jpg"
-        webp_path = f"non_invasive_photos/{uprn}/webp.webp"  # Save as WebP format
-
         # Create a thumbnail
         thumbnail = img.copy()
-        thumbnail.thumbnail((128, 128), Image.Resampling.LANCZOS)  # High-quality downsampling
-        thumbnail.save(thumbnail_path, 'JPEG', quality=85)  # Save as JPEG with quality setting
+        thumbnail.thumbnail((128, 128), Image.Resampling.LANCZOS)
+        thumbnail.save(thumbnail_path, 'JPEG', quality=85)
 
         # Create a 1080p version
         full_hd = img.copy()
         full_hd.thumbnail((1920, 1080), Image.Resampling.LANCZOS)
-        full_hd.save(full_hd_path, 'JPEG', quality=90)  # Slightly higher quality for larger image
+        full_hd.save(full_hd_path, 'JPEG', quality=90)
 
         # Convert to WebP for better compression
         webp = img.copy()
@@ -102,10 +106,10 @@ def generate_cdn_url(distribution_domain, object_name):
 
 def process_and_upload_images(uprn, input_image_path, bucket_name, distribution_domain):
     # Create images
-    thumbnail, full_hd, original = create_images(str(uprn), input_image_path)
+    thumbnail, full_hd, original = create_images(input_image_path, uprn=str(uprn))
 
     # Upload images
-    upload_photos_to_s3(bucket_name, [thumbnail, full_hd, original])
+    upload_photos_to_s3(bucket_name, photo_paths=[thumbnail, full_hd, original])
 
     # Generate CDN links
     cdn_links = [generate_cdn_url(distribution_domain, path.split('/')[-1]) for path in [thumbnail, full_hd, original]]
diff --git a/etl/non_invasive_surveys/photos/requirements.txt b/etl/non_intrusive_surveys/photos/requirements.txt
similarity index 100%
rename from etl/non_invasive_surveys/photos/requirements.txt
rename to etl/non_intrusive_surveys/photos/requirements.txt

From d6fa81939d6a0f7752728953250b3554995a5297 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 15 Apr 2024 23:41:24 +0100
Subject: [PATCH 04/58] creating new aggregations for front end

---
 .idea/Model.iml                               |   2 +-
 .idea/misc.xml                                |   2 +-
 backend/Property.py                           |   8 +-
 .../app/db/functions/portfolio_functions.py   |   3 +-
 backend/app/plan/router.py                    | 128 +++++++++++++++++-
 recommendations/Recommendations.py            |  11 +-
 6 files changed, 146 insertions(+), 8 deletions(-)

diff --git a/.idea/Model.iml b/.idea/Model.iml
index c75af922..4413bb06 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="non_invasive_surveys-photos" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 1f2c584d..6f308057 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="non_invasive_surveys-photos" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/backend/Property.py b/backend/Property.py
index a8ed9129..7b5a6bc3 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -142,6 +142,8 @@ class Property:
 
         self.current_adjusted_energy = None
         self.expected_adjusted_energy = None
+        self.current_energy_bill = None
+        self.expected_energy_bill = None
 
         self.recommendations_scoring_data = []
 
@@ -892,12 +894,16 @@ class Property:
 
         return component_data
 
-    def set_adjusted_energy(self, current_adjusted_energy, expected_adjusted_energy):
+    def set_adjusted_energy(
+        self, current_adjusted_energy, expected_adjusted_energy, current_energy_bill, expected_energy_bill
+    ):
         """
         Stores these values for usage later
         """
         self.current_adjusted_energy = current_adjusted_energy
         self.expected_adjusted_energy = expected_adjusted_energy
+        self.current_energy_bill = current_energy_bill
+        self.expected_energy_bill = expected_energy_bill
 
     def set_windows_count(self):
         """
diff --git a/backend/app/db/functions/portfolio_functions.py b/backend/app/db/functions/portfolio_functions.py
index ead8280f..69203368 100644
--- a/backend/app/db/functions/portfolio_functions.py
+++ b/backend/app/db/functions/portfolio_functions.py
@@ -4,7 +4,7 @@ from backend.app.db.models.portfolio import Portfolio
 
 
 def aggregate_portfolio_recommendations(
-    session, portfolio_id: int, total_valuation_increase: float, labour_days: float
+    session, portfolio_id: int, total_valuation_increase: float, labour_days: float, aggregated_data: dict
 ):
     # Aggregate multiple fields
     aggregates = (
@@ -27,6 +27,7 @@ def aggregate_portfolio_recommendations(
         "energy_savings": aggregates.energy_savings or 0,
         "co2_equivalent_savings": aggregates.co2_equivalent_savings or 0,
         "energy_cost_savings": aggregates.energy_cost_savings or 0,
+        **aggregated_data
     }
 
     # Get the portfolio and update the fields
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 49e14872..b8b2d5c8 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -1,3 +1,4 @@
+import json
 from datetime import datetime
 
 from tqdm import tqdm
@@ -57,6 +58,109 @@ def patch_epc(patch, epc_records):
     return epc_records
 
 
+def extract_portfolio_aggregation_data(
+    input_properties, total_valuation_increase, recommendations, new_epc_bands
+):
+    # We aggregate a number of metrics for the portfolio:
+    # 1) A breakdown of the number of properties in each EPC band
+    #    a) before retrofit
+    #    b) after retrofit
+    # 2) Number of units
+    # 3) Co2/unit
+    #    a) before retrofit
+    #    b) after retrofit
+    # 4) Energy bulls/unit
+    #    a) before retrofit
+    #    b) after retrofit
+    # 5) Average valuation improvement/unit
+    # 6) Total cost
+    # 7) Cost per unit
+    # 8) £ per CO2 saved
+    # 9) £ per SAP point
+
+    # We need to construct the underlyind data for this
+
+    # Helper function to reformat the EPC data
+    def reformat_epc_data(epc_counts):
+        # Define all possible EPC bands in the required order
+        epc_bands = ["G", "F", "E", "D", "C", "B", "A"]
+
+        # Create the formatted data list by checking each band in the order
+        formatted_data = []
+        for band in epc_bands:
+            # Get the count from the dictionary, defaulting to 0 if not present
+            count = epc_counts.get(band, 0)
+            # Append the formatted dictionary to the list
+            formatted_data.append({"name": band, band: count})
+
+        return formatted_data
+
+    n_units = len(input_properties)
+
+    agg_data = []
+    for p in input_properties:
+        # Get the recommendations for the property
+        property_recommendations = recommendations.get(p.id, [])
+        if not property_recommendations:
+            continue
+        # Get just the default recommendations
+        default_recommendations = [r for r in property_recommendations if r["default"]]
+
+        # We can now calculate multiple outputs based on default recommendations
+        carbon_savings = sum([r["co2_equivalent_savings"] for r in default_recommendations])
+
+        pre_retrofit_co2 = p.data["co2-emissions-current"]
+        post_retrofit_co2 = pre_retrofit_co2 - carbon_savings
+
+        pre_retrofit_energy_bill = p.current_energy_bill
+        post_retrofit_energy_bill = p.expected_energy_bill
+
+        cost = sum([r["total"] for r in default_recommendations])
+        sap_point_improvement = sum([r["sap_points"] for r in default_recommendations])
+
+        agg_data.append({
+            "pre_retrofit_epc": p.data["current-energy-rating"],
+            "post_retrofit_epc": new_epc_bands[p.id],
+            "pre_retrofit_co2": pre_retrofit_co2,
+            "post_retrofit_co2": post_retrofit_co2,
+            "pre_retrofit_energy_bill": pre_retrofit_energy_bill,
+            "post_retrofit_energy_bill": post_retrofit_energy_bill,
+            "cost": cost,
+            "sap_point_improvement": sap_point_improvement
+        })
+
+    agg_data = pd.DataFrame(agg_data)
+
+    n_units_to_retrofit = len(agg_data)
+
+    valuation_improvment_per_unit = total_valuation_increase / n_units_to_retrofit
+
+    total_carbon_saved = agg_data["pre_retrofit_co2"].sum() - agg_data["post_retrofit_co2"].sum()
+    total_sap_points = agg_data["sap_point_improvement"].sum()
+
+    aggregation_data = {
+        "epc_breakdown_pre_retrofit": json.dumps(
+            reformat_epc_data(agg_data["pre_retrofit_epc"].value_counts().to_dict())
+        ),
+        "epc_breakdown_post_retrofit": json.dumps(
+            reformat_epc_data(agg_data["post_retrofit_epc"].value_counts().to_dict())
+        ),
+        "number_of_properties": n_units,
+        "n_units_to_retrofit": n_units_to_retrofit,
+        "co2_per_unit_pre_retrofit": agg_data["pre_retrofit_co2"].mean(),
+        "co2_per_unit_post_retrofit": agg_data["post_retrofit_co2"].mean(),
+        "energy_bill_per_unit_pre_retrofit": agg_data["pre_retrofit_energy_bill"].mean(),
+        "energy_bill_per_unit_post_retrofit": agg_data["post_retrofit_energy_bill"].mean(),
+        "valuation_improvement_per_unit": valuation_improvment_per_unit,
+        "total_cost": agg_data["cost"].sum(),
+        "cost_per_unit": agg_data["cost"].mean(),
+        "cost_per_co2_saved": agg_data["cost"].sum() / total_carbon_saved,
+        "cost_per_sap_point": agg_data["cost"].sum() / total_sap_points
+    }
+
+    return aggregation_data
+
+
 router = APIRouter(
     prefix="/plan",
     tags=["plan"],
@@ -243,7 +347,13 @@ async def trigger_plan(body: PlanTriggerRequest):
 
             property_instance = [p for p in input_properties if p.id == property_id][0]
 
-            recommendations_with_impact, current_adjusted_energy, expected_adjusted_energy = (
+            (
+                recommendations_with_impact,
+                current_adjusted_energy,
+                expected_adjusted_energy,
+                current_energy_bill,
+                expected_energy_bill
+            ) = (
                 Recommendations.calculate_recommendation_impact(
                     property_instance=property_instance,
                     all_predictions=all_predictions,
@@ -254,7 +364,9 @@ async def trigger_plan(body: PlanTriggerRequest):
             # Store the resulting adjusted energy in the property instance
             property_instance.set_adjusted_energy(
                 current_adjusted_energy=current_adjusted_energy,
-                expected_adjusted_energy=expected_adjusted_energy
+                expected_adjusted_energy=expected_adjusted_energy,
+                current_energy_bill=current_energy_bill,
+                expected_energy_bill=expected_energy_bill
             )
 
             input_measures = prepare_input_measures(recommendations_with_impact, body.goal)
@@ -316,6 +428,7 @@ async def trigger_plan(body: PlanTriggerRequest):
         logger.info("Uploading recommendations to the database")
         property_valuation_increases = []
         session.commit()
+        new_epc_bands = {}
         for i in range(0, len(input_properties), BATCH_SIZE):
             try:
                 # Take a slice of the input_properties list to make a batch
@@ -327,6 +440,7 @@ async def trigger_plan(body: PlanTriggerRequest):
                     total_sap_points = sum([r["sap_points"] for r in default_recommendations])
                     new_sap_points = float(p.data["current-energy-efficiency"]) + total_sap_points
                     new_epc = sap_to_epc(new_sap_points)
+                    new_epc_bands[p.id] = new_epc
 
                     valuations = PropertyValuation.estimate(property_instance=p, target_epc=new_epc)
 
@@ -392,11 +506,19 @@ async def trigger_plan(body: PlanTriggerRequest):
             [sum(r["labour_days"] for r in rec_group if r["default"]) for p_id, rec_group in recommendations.items()]
         ))
 
+        aggregated_data = extract_portfolio_aggregation_data(
+            input_properties=input_properties,
+            total_valuation_increase=total_valuation_increase,
+            recommendations=recommendations,
+            new_epc_bands=new_epc_bands
+        )
+
         aggregate_portfolio_recommendations(
             session,
             portfolio_id=body.portfolio_id,
             total_valuation_increase=total_valuation_increase,
-            labour_days=labour_days
+            labour_days=labour_days,
+            aggregated_data=aggregated_data
         )
 
         # Commit final changes
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 68fead16..659b41a8 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -281,6 +281,9 @@ class Recommendations:
             current_adjusted_energy - expected_adjusted_energy
         )
 
+        current_energy_bill = AnnualBillSavings.calculate_annual_bill(current_adjusted_energy)
+        expected_energy_bill = AnnualBillSavings.calculate_annual_bill(expected_adjusted_energy)
+
         for recommendations_by_type in property_recommendations:
             for rec in recommendations_by_type:
 
@@ -355,4 +358,10 @@ class Recommendations:
                     rec["heat_demand"] is None) or (rec["energy_cost_savings"] is None):
                     raise ValueError("sap points, co2 or heat demand is missing")
 
-        return property_recommendations, current_adjusted_energy, expected_adjusted_energy
+        return (
+            property_recommendations,
+            current_adjusted_energy,
+            expected_adjusted_energy,
+            current_energy_bill,
+            expected_energy_bill
+        )

From cc6277c191dea07ce1a8a26b8083e1eebdd2887b Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 15 Apr 2024 23:52:10 +0100
Subject: [PATCH 05/58] extended outputs

---
 backend/app/plan/router.py | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index b8b2d5c8..f7a825db 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -115,6 +115,9 @@ def extract_portfolio_aggregation_data(
         pre_retrofit_energy_bill = p.current_energy_bill
         post_retrofit_energy_bill = p.expected_energy_bill
 
+        pre_retrofit_energy_consumption = p.current_adjusted_energy
+        post_retrofit_energy_consumption = p.expected_adjusted_energy
+
         cost = sum([r["total"] for r in default_recommendations])
         sap_point_improvement = sum([r["sap_points"] for r in default_recommendations])
 
@@ -125,6 +128,8 @@ def extract_portfolio_aggregation_data(
             "post_retrofit_co2": post_retrofit_co2,
             "pre_retrofit_energy_bill": pre_retrofit_energy_bill,
             "post_retrofit_energy_bill": post_retrofit_energy_bill,
+            "pre_retrofit_energy_consumption": pre_retrofit_energy_consumption,
+            "post_retrofit_energy_consumption": post_retrofit_energy_consumption,
             "cost": cost,
             "sap_point_improvement": sap_point_improvement
         })
@@ -138,6 +143,9 @@ def extract_portfolio_aggregation_data(
     total_carbon_saved = agg_data["pre_retrofit_co2"].sum() - agg_data["post_retrofit_co2"].sum()
     total_sap_points = agg_data["sap_point_improvement"].sum()
 
+    def format_money(amount):
+        return f"£{amount:,.0f}"
+
     aggregation_data = {
         "epc_breakdown_pre_retrofit": json.dumps(
             reformat_epc_data(agg_data["pre_retrofit_epc"].value_counts().to_dict())
@@ -147,15 +155,18 @@ def extract_portfolio_aggregation_data(
         ),
         "number_of_properties": n_units,
         "n_units_to_retrofit": n_units_to_retrofit,
-        "co2_per_unit_pre_retrofit": agg_data["pre_retrofit_co2"].mean(),
-        "co2_per_unit_post_retrofit": agg_data["post_retrofit_co2"].mean(),
-        "energy_bill_per_unit_pre_retrofit": agg_data["pre_retrofit_energy_bill"].mean(),
-        "energy_bill_per_unit_post_retrofit": agg_data["post_retrofit_energy_bill"].mean(),
-        "valuation_improvement_per_unit": valuation_improvment_per_unit,
-        "total_cost": agg_data["cost"].sum(),
-        "cost_per_unit": agg_data["cost"].mean(),
-        "cost_per_co2_saved": agg_data["cost"].sum() / total_carbon_saved,
-        "cost_per_sap_point": agg_data["cost"].sum() / total_sap_points
+        "co2_per_unit_pre_retrofit": str(round(agg_data["pre_retrofit_co2"].mean(), 2)) + "t",
+        "co2_per_unit_post_retrofit": str(round(agg_data["post_retrofit_co2"].mean(), 2)) + "t",
+        "energy_bill_per_unit_pre_retrofit": format_money(agg_data["pre_retrofit_energy_bill"].mean()),
+        "energy_bill_per_unit_post_retrofit": format_money(agg_data["post_retrofit_energy_bill"].mean()),
+        "energy_consumption_per_unit_pre_retrofit": str(
+            round(agg_data["pre_retrofit_energy_consumption"].mean())) + "kWh",
+        "energy_consumption_per_unit_post_retrofit": str(
+            round(agg_data["post_retrofit_energy_consumption"].mean())) + "kWh",
+        "valuation_improvement_per_unit": format_money(valuation_improvment_per_unit),
+        "cost_per_unit": format_money(agg_data["cost"].mean()),
+        "cost_per_co2_saved": format_money(agg_data["cost"].sum() / total_carbon_saved),
+        "cost_per_sap_point": format_money(agg_data["cost"].sum() / total_sap_points)
     }
 
     return aggregation_data

From 83d472a7108019fb7ea9f21c9196a5abba154ad0 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 16 Apr 2024 03:05:26 +0100
Subject: [PATCH 06/58] debugging

---
 backend/app/db/models/portfolio.py | 15 +++++++++++++++
 backend/app/plan/router.py         | 18 ++++++++++++------
 recommendations/Recommendations.py |  3 +++
 3 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/backend/app/db/models/portfolio.py b/backend/app/db/models/portfolio.py
index 830866e6..aa0146c0 100644
--- a/backend/app/db/models/portfolio.py
+++ b/backend/app/db/models/portfolio.py
@@ -45,6 +45,21 @@ class Portfolio(Base):
     labour_days = Column(Float)
     created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
     updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
+    # Aggregations for summary
+    epc_breakdown_pre_retrofit = Column(Text)
+    epc_breakdown_post_retrofit = Column(Text)
+    n_units_to_retrofit = Column(Integer)
+    co2_per_unit_pre_retrofit = Column(Text)
+    co2_per_unit_post_retrofit = Column(Text)
+    energy_bill_per_unit_pre_retrofit = Column(Text)
+    energy_bill_per_unit_post_retrofit = Column(Text)
+    energy_consumption_per_unit_pre_retrofit = Column(Text)
+    energy_consumption_per_unit_post_retrofit = Column(Text)
+    valuation_improvement_per_unit = Column(Text)
+    cost_per_unit = Column(Text)
+    cost_per_co2_saved = Column(Text)
+    cost_per_sap_point = Column(Text)
+    valuation_return_on_investment = Column(Text)
 
 
 class PropertyCreationStatus(enum.Enum):
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index f7a825db..661858b7 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -99,10 +99,9 @@ def extract_portfolio_aggregation_data(
 
     agg_data = []
     for p in input_properties:
-        # Get the recommendations for the property
+        # Get the recommendations for the property - we include all properties, even ones without recommendations
         property_recommendations = recommendations.get(p.id, [])
-        if not property_recommendations:
-            continue
+
         # Get just the default recommendations
         default_recommendations = [r for r in property_recommendations if r["default"]]
 
@@ -113,11 +112,16 @@ def extract_portfolio_aggregation_data(
         post_retrofit_co2 = pre_retrofit_co2 - carbon_savings
 
         pre_retrofit_energy_bill = p.current_energy_bill
-        post_retrofit_energy_bill = p.expected_energy_bill
+        post_retrofit_energy_bill = p.current_energy_bill - sum(
+            [r["energy_cost_savings"] for r in default_recommendations]
+        )
 
         pre_retrofit_energy_consumption = p.current_adjusted_energy
-        post_retrofit_energy_consumption = p.expected_adjusted_energy
+        post_retrofit_energy_consumption = p.current_adjusted_energy - sum(
+            [r["adjusted_heat_demand"] for r in default_recommendations]
+        )
 
+        # Add up energy savings
         cost = sum([r["total"] for r in default_recommendations])
         sap_point_improvement = sum([r["sap_points"] for r in default_recommendations])
 
@@ -166,7 +170,9 @@ def extract_portfolio_aggregation_data(
         "valuation_improvement_per_unit": format_money(valuation_improvment_per_unit),
         "cost_per_unit": format_money(agg_data["cost"].mean()),
         "cost_per_co2_saved": format_money(agg_data["cost"].sum() / total_carbon_saved),
-        "cost_per_sap_point": format_money(agg_data["cost"].sum() / total_sap_points)
+        "cost_per_sap_point": format_money(agg_data["cost"].sum() / total_sap_points),
+        "valuation_return_on_investment": str(round(total_valuation_increase / agg_data["cost"].sum(), 2))
+        # TODO: Could we add 10yr carbon credits value?
     }
 
     return aggregation_data
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 659b41a8..e626ecfa 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -272,6 +272,8 @@ class Recommendations:
             current_epc_rating=property_instance.data["current-energy-rating"],
         )
 
+        # TODO: This isn't quite right as this is based on EVERY possible measure, not just the ones that are
+        #       actually implemented
         expected_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered(
             epc_energy_consumption=expected_heat_demand,
             current_epc_rating=property_instance.data["current-energy-rating"],
@@ -281,6 +283,7 @@ class Recommendations:
             current_adjusted_energy - expected_adjusted_energy
         )
 
+        # TODO: We should determine if the home is gas & electricity or just electricity
         current_energy_bill = AnnualBillSavings.calculate_annual_bill(current_adjusted_energy)
         expected_energy_bill = AnnualBillSavings.calculate_annual_bill(expected_adjusted_energy)
 

From 0f7e815379eacb6d76100a25186cd38e23d9b8c3 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 16 Apr 2024 11:18:36 +0100
Subject: [PATCH 07/58] updating text for valuation improvement

---
 backend/app/plan/router.py | 49 +++++++++++++++++++++++++++++++-------
 1 file changed, 41 insertions(+), 8 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 661858b7..45d87dd3 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -59,7 +59,7 @@ def patch_epc(patch, epc_records):
 
 
 def extract_portfolio_aggregation_data(
-    input_properties, total_valuation_increase, recommendations, new_epc_bands
+    input_properties, total_valuation_increase, recommendations, new_epc_bands, property_value_increase_ranges
 ):
     # We aggregate a number of metrics for the portfolio:
     # 1) A breakdown of the number of properties in each EPC band
@@ -69,7 +69,7 @@ def extract_portfolio_aggregation_data(
     # 3) Co2/unit
     #    a) before retrofit
     #    b) after retrofit
-    # 4) Energy bulls/unit
+    # 4) Energy bill/unit
     #    a) before retrofit
     #    b) after retrofit
     # 5) Average valuation improvement/unit
@@ -105,6 +105,8 @@ def extract_portfolio_aggregation_data(
         # Get just the default recommendations
         default_recommendations = [r for r in property_recommendations if r["default"]]
 
+        has_recommendations = len(default_recommendations) > 0
+
         # We can now calculate multiple outputs based on default recommendations
         carbon_savings = sum([r["co2_equivalent_savings"] for r in default_recommendations])
 
@@ -125,6 +127,15 @@ def extract_portfolio_aggregation_data(
         cost = sum([r["total"] for r in default_recommendations])
         sap_point_improvement = sum([r["sap_points"] for r in default_recommendations])
 
+        lower_bound_valuation_uplift = (
+            property_value_increase_ranges[p.id]["lower_bound_increased_value"] -
+            property_value_increase_ranges[p.id]["current_value"]
+        )
+        upper_bound_valuation_uplift = (
+            property_value_increase_ranges[p.id]["upper_bound_increased_value"] -
+            property_value_increase_ranges[p.id]["current_value"]
+        )
+
         agg_data.append({
             "pre_retrofit_epc": p.data["current-energy-rating"],
             "post_retrofit_epc": new_epc_bands[p.id],
@@ -135,14 +146,22 @@ def extract_portfolio_aggregation_data(
             "pre_retrofit_energy_consumption": pre_retrofit_energy_consumption,
             "post_retrofit_energy_consumption": post_retrofit_energy_consumption,
             "cost": cost,
-            "sap_point_improvement": sap_point_improvement
+            "sap_point_improvement": sap_point_improvement,
+            "lower_bound_valuation_uplift": lower_bound_valuation_uplift,
+            "upper_bound_valuation_uplift": upper_bound_valuation_uplift,
+            "has_recommendations": has_recommendations
         })
 
     agg_data = pd.DataFrame(agg_data)
 
-    n_units_to_retrofit = len(agg_data)
+    n_units_to_retrofit = agg_data["has_recommendations"].sum()
 
-    valuation_improvment_per_unit = total_valuation_increase / n_units_to_retrofit
+    valuation_improvement_lower_bound_per_unit = (
+        agg_data["lower_bound_valuation_uplift"].mean()
+    )
+    valuation_improvement_upper_bound_per_unit = (
+        agg_data["upper_bound_valuation_uplift"].mean()
+    )
 
     total_carbon_saved = agg_data["pre_retrofit_co2"].sum() - agg_data["post_retrofit_co2"].sum()
     total_sap_points = agg_data["sap_point_improvement"].sum()
@@ -150,6 +169,17 @@ def extract_portfolio_aggregation_data(
     def format_money(amount):
         return f"£{amount:,.0f}"
 
+    valuation_improvment_per_unit = format_money(
+        total_valuation_increase / n_units) + (f" ({format_money(valuation_improvement_lower_bound_per_unit)} - "
+                                               f"{format_money(valuation_improvement_upper_bound_per_unit)})")
+
+    valuation_return_on_investment = (
+        str(round(total_valuation_increase / agg_data["cost"].sum(), 2)) +
+        f" ("
+        f"{agg_data['lower_bound_valuation_uplift'].sum() / agg_data['cost'].sum():,.2f} - "
+        f"{agg_data['upper_bound_valuation_uplift'].sum() / agg_data['cost'].sum():,.2f})"
+    )
+
     aggregation_data = {
         "epc_breakdown_pre_retrofit": json.dumps(
             reformat_epc_data(agg_data["pre_retrofit_epc"].value_counts().to_dict())
@@ -167,11 +197,11 @@ def extract_portfolio_aggregation_data(
             round(agg_data["pre_retrofit_energy_consumption"].mean())) + "kWh",
         "energy_consumption_per_unit_post_retrofit": str(
             round(agg_data["post_retrofit_energy_consumption"].mean())) + "kWh",
-        "valuation_improvement_per_unit": format_money(valuation_improvment_per_unit),
+        "valuation_improvement_per_unit": valuation_improvment_per_unit,
         "cost_per_unit": format_money(agg_data["cost"].mean()),
         "cost_per_co2_saved": format_money(agg_data["cost"].sum() / total_carbon_saved),
         "cost_per_sap_point": format_money(agg_data["cost"].sum() / total_sap_points),
-        "valuation_return_on_investment": str(round(total_valuation_increase / agg_data["cost"].sum(), 2))
+        "valuation_return_on_investment": valuation_return_on_investment,
         # TODO: Could we add 10yr carbon credits value?
     }
 
@@ -446,6 +476,7 @@ async def trigger_plan(body: PlanTriggerRequest):
         property_valuation_increases = []
         session.commit()
         new_epc_bands = {}
+        property_value_increase_ranges = {}
         for i in range(0, len(input_properties), BATCH_SIZE):
             try:
                 # Take a slice of the input_properties list to make a batch
@@ -460,6 +491,7 @@ async def trigger_plan(body: PlanTriggerRequest):
                     new_epc_bands[p.id] = new_epc
 
                     valuations = PropertyValuation.estimate(property_instance=p, target_epc=new_epc)
+                    property_value_increase_ranges[p.id] = valuations
 
                     # Your existing operations
                     property_details_epc = p.get_property_details_epc(
@@ -527,7 +559,8 @@ async def trigger_plan(body: PlanTriggerRequest):
             input_properties=input_properties,
             total_valuation_increase=total_valuation_increase,
             recommendations=recommendations,
-            new_epc_bands=new_epc_bands
+            new_epc_bands=new_epc_bands,
+            property_value_increase_ranges=property_value_increase_ranges
         )
 
         aggregate_portfolio_recommendations(

From 02399667798370cab35608dc5edac17db7de1960 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 16 Apr 2024 11:32:15 +0100
Subject: [PATCH 08/58] setting up non-invasive recommendations

---
 etl/customers/immo/pilot/asset_list.py | 29 +++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py
index e587cc25..614fa8a0 100644
--- a/etl/customers/immo/pilot/asset_list.py
+++ b/etl/customers/immo/pilot/asset_list.py
@@ -21,6 +21,7 @@ council_tax_bands = pd.DataFrame(council_tax_bands)
 
 # This is information we need to override on the EPC itself, for instance if a new survey has been conducted and
 # that has not reached the API
+# For 53 Bromley, the non-invasives found the walls to be partially filled
 patches = [
     {
         'address': '6 Beech Road', 'postcode': 'DY1 4BP',
@@ -42,7 +43,11 @@ patches = [
         'energy-consumption-current': '491',
         'co2-emissions-current': '5.0',
         'potential-energy-efficiency': '87'
-    }
+    },
+    {
+        'address': '53 Bromley', 'postcode': 'DY5 4PJ',
+        'walls-description': 'Cavity wall, partial insulation',
+    },
 ]
 
 # This is information that is found as a result of the non-invasives, that mean that certain measures
@@ -56,6 +61,19 @@ already_installed = [
     }
 ]
 
+non_invasive_recommendations = [
+    {'address': '8 Corporation Road', 'postcode': 'DY2 7PX', 'recommendations': []},
+    {'address': '21 Wells Road', 'postcode': 'DY5 3TB', 'recommendations': ['cavity_extract_and_refill']},
+    {'address': '27 Milton Road', 'postcode': 'WV14 8HZ', 'recommendations': ['cavity_extract_and_refill']},
+    {'address': '195 Ashenhurst Road', 'postcode': 'DY1 2JB', 'recommendations': ['cavity_extract_and_refill']},
+    {'address': '53 Bromley', 'postcode': 'DY5 4PJ', 'recommendations': ['cavity_surveyed_as_filled_is_partial']},
+    {'address': '91 Osprey Drive', 'postcode': 'DY1 2JS', 'recommendations': ['cavity_extract_and_refill']},
+    {'address': '47 Fairfield Road', 'postcode': 'DY8 5UJ', 'recommendations': ['cavity_extract_and_refill']},
+    {'address': '150 Huntingtree Road', 'postcode': 'B63 4HP', 'recommendations': ['cavity_extract_and_refill']},
+    {'address': '6 Beech Road', 'postcode': 'DY1 4BP', 'recommendations': []},
+    {'address': '5 Oaklands', 'postcode': 'B62 0JA', 'recommendations': ['cavity_extract_and_refill']},
+]
+
 
 def app():
     raw_asset_list = read_excel_from_s3(
@@ -102,6 +120,14 @@ def app():
         file_name=patches_filename
     )
 
+    # Store non-invasive recommendations in S3
+    non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.json"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(non_invasive_recommendations),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=non_invasive_recommendations_filename
+    )
+
     # EPC C portoflio
     body = {
         "portfolio_id": str(PORTFOLIO_ID),
@@ -111,6 +137,7 @@ def app():
         "trigger_file_path": filename,
         "already_installed_file_path": already_installed_filename,
         "patches_file_path": patches_filename,
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
         "budget": None,
     }
     print(body)

From b3e7675488b7004cc98f171b8d78793188345148 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 16 Apr 2024 11:38:58 +0100
Subject: [PATCH 09/58] added non-invasive recommendations to property class

---
 backend/Property.py                    |  7 ++++++-
 backend/app/plan/router.py             | 13 +++++++++++++
 backend/app/plan/schemas.py            |  1 +
 etl/customers/immo/pilot/asset_list.py |  1 +
 4 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/backend/Property.py b/backend/Property.py
index 7b5a6bc3..2d1dbd5d 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -61,7 +61,8 @@ class Property:
     n_bedrooms = None
 
     def __init__(
-        self, id, postcode, address, epc_record, already_installed=None, **kwargs
+        self, id, postcode, address, epc_record, already_installed=None, property_non_invasive_recommendations=None,
+        **kwargs
     ):
 
         self.epc_record = epc_record
@@ -80,6 +81,10 @@ class Property:
         # cost and instead, provide a message that the measure has already been installed
 
         self.already_installed = ast.literal_eval(already_installed['already_installed']) if already_installed else []
+        self.non_invasive_recommendations = (
+            ast.literal_eval(property_non_invasive_recommendations['recommendations']) if
+            property_non_invasive_recommendations else []
+        )
 
         self.uprn = epc_record.get("uprn")
         self.full_sap_epc = epc_record.get("full_sap_epc")
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 45d87dd3..e5a2aa79 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -242,6 +242,12 @@ async def trigger_plan(body: PlanTriggerRequest):
                 bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.already_installed_file_path
             )
 
+        non_invasive_recommendations = []
+        if body.non_invasive_recommendations_file_path:
+            non_invasive_recommendations = read_csv_from_s3(
+                bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.non_invasive_recommendations_file_path
+            )
+
         cleaning_data = read_dataframe_from_s3_parquet(
             bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
         )
@@ -297,6 +303,12 @@ async def trigger_plan(body: PlanTriggerRequest):
                 x for x in already_installed if
                 (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
             ), {})
+
+            property_non_invasive_recommendations = next((
+                x for x in non_invasive_recommendations if
+                (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
+            ), {})
+
             input_properties.append(
                 Property(
                     id=property_id,
@@ -304,6 +316,7 @@ async def trigger_plan(body: PlanTriggerRequest):
                     postcode=epc_searcher.postcode_clean,
                     epc_record=prepared_epc,
                     already_installed=property_already_installed,
+                    non_invasive_recommendations=property_non_invasive_recommendations,
                     **Property.extract_kwargs(config)
                 )
             )
diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py
index 76eb49d2..59c0ebef 100644
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@@ -11,6 +11,7 @@ class PlanTriggerRequest(BaseModel):
     trigger_file_path: str
     already_installed_file_path: Optional[str] = None
     patches_file_path: Optional[str] = None
+    non_invasive_recommendations_file_path: Optional[str] = None
     exclusions: Optional[conlist(str, min_items=1)] = None
 
     # Pre-defined list of possibilities for exclusions
diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py
index 614fa8a0..57fa5957 100644
--- a/etl/customers/immo/pilot/asset_list.py
+++ b/etl/customers/immo/pilot/asset_list.py
@@ -151,6 +151,7 @@ def app():
         "trigger_file_path": filename,
         "already_installed_file_path": already_installed_filename,
         "patches_file_path": patches_filename,
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
         "budget": None,
     }
     print(body)

From 0c1fb0360fa1473d4123e3a41c3a82f65d9a3512 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 16 Apr 2024 11:50:02 +0100
Subject: [PATCH 10/58] fixed patching of partial cwi description

---
 backend/app/plan/router.py             | 2 ++
 etl/customers/immo/pilot/asset_list.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index e5a2aa79..7200d2ef 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -52,6 +52,8 @@ def patch_epc(patch, epc_records):
     """
 
     for patch_variable, patch_value in patch.items():
+        if patch_value == "":
+            continue
         if patch_variable in epc_records["original_epc"]:
             epc_records["original_epc"][patch_variable] = patch_value
 
diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py
index 57fa5957..6329a2be 100644
--- a/etl/customers/immo/pilot/asset_list.py
+++ b/etl/customers/immo/pilot/asset_list.py
@@ -46,7 +46,7 @@ patches = [
     },
     {
         'address': '53 Bromley', 'postcode': 'DY5 4PJ',
-        'walls-description': 'Cavity wall, partial insulation',
+        'walls-description': 'Cavity wall, partial insulation (assumed)',
     },
 ]
 

From 4cf4d67ac91610d19e418aa33ae794a37c1be505 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 16 Apr 2024 13:21:14 +0100
Subject: [PATCH 11/58] Added cavity extraction and re-fill recommendation and
 costing

---
 backend/Property.py                    | 17 ++++++++++++++---
 backend/app/plan/router.py             | 14 ++++++++------
 recommendations/Costs.py               | 13 ++++++++++++-
 recommendations/Recommendations.py     | 19 ++++++++++++++-----
 recommendations/WallRecommendations.py | 17 +++++++++++++++--
 5 files changed, 63 insertions(+), 17 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 2d1dbd5d..2e6cbbb6 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -61,7 +61,7 @@ class Property:
     n_bedrooms = None
 
     def __init__(
-        self, id, postcode, address, epc_record, already_installed=None, property_non_invasive_recommendations=None,
+        self, id, postcode, address, epc_record, already_installed=None, non_invasive_recommendations=None,
         **kwargs
     ):
 
@@ -82,8 +82,8 @@ class Property:
 
         self.already_installed = ast.literal_eval(already_installed['already_installed']) if already_installed else []
         self.non_invasive_recommendations = (
-            ast.literal_eval(property_non_invasive_recommendations['recommendations']) if
-            property_non_invasive_recommendations else []
+            ast.literal_eval(non_invasive_recommendations['recommendations']) if
+            non_invasive_recommendations else []
         )
 
         self.uprn = epc_record.get("uprn")
@@ -284,6 +284,7 @@ class Property:
                     recommendation_record=recommendation_record,
                     recommendations=previous_phase_representatives + [rec],
                     primary_recommendation_id=rec["recommendation_id"],
+                    non_invasive_recommendations=self.non_invasive_recommendations,
                 )
                 self.recommendations_scoring_data.append(scoring_dict)
 
@@ -293,6 +294,7 @@ class Property:
         recommendation_record,
         recommendations: list,
         primary_recommendation_id: int,
+        non_invasive_recommendations: list = None,
     ):
         """
         This function will iterate through a list of recommendations and apply a simulation for each recommendation
@@ -301,10 +303,12 @@ class Property:
         :param recommendation_record: The record of the property, which will be updated
         :param recommendations: The list of recommendations to apply
         :param primary_recommendation_id: The id of the primary recommendation, which is used to identify the record
+        :param non_invasive_recommendations: The list of non-invasive recommendations
         :return: The updated recommendation record
         """
 
         output = recommendation_record.copy()
+        non_invasive_recommendations = [] if non_invasive_recommendations is None else non_invasive_recommendations
 
         for col in [
             "walls_insulation_thickness",
@@ -323,6 +327,13 @@ class Property:
                 "external_wall_insulation",
                 "cavity_wall_insulation",
             ]:
+
+                # # If we have a non-incasive recommendation that the cavity wall is partially filled, we skip the
+                # # cavity wall insulation recommendation (since on the EPC, the property will look like how it did
+                # # before any works)
+                # if "cavity_surveyed_as_filled_is_partial" in non_invasive_recommendations:
+                #     continue
+
                 # The upgrade made here is to the u-value of the walls and the description of the
                 # insulation thickness
                 output["walls_thermal_transmittance_ending"] = recommendation[
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 7200d2ef..9854abe8 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -171,11 +171,13 @@ def extract_portfolio_aggregation_data(
     def format_money(amount):
         return f"£{amount:,.0f}"
 
-    valuation_improvment_per_unit = format_money(
-        total_valuation_increase / n_units) + (f" ({format_money(valuation_improvement_lower_bound_per_unit)} - "
-                                               f"{format_money(valuation_improvement_upper_bound_per_unit)})")
+    valuation_improvment_per_unit = str(
+        format_money(
+            total_valuation_increase / n_units) + (f" ({format_money(valuation_improvement_lower_bound_per_unit)} - "
+                                                   f"{format_money(valuation_improvement_upper_bound_per_unit)})")
+    )
 
-    valuation_return_on_investment = (
+    valuation_return_on_investment = str(
         str(round(total_valuation_increase / agg_data["cost"].sum(), 2)) +
         f" ("
         f"{agg_data['lower_bound_valuation_uplift'].sum() / agg_data['cost'].sum():,.2f} - "
@@ -189,8 +191,8 @@ def extract_portfolio_aggregation_data(
         "epc_breakdown_post_retrofit": json.dumps(
             reformat_epc_data(agg_data["post_retrofit_epc"].value_counts().to_dict())
         ),
-        "number_of_properties": n_units,
-        "n_units_to_retrofit": n_units_to_retrofit,
+        "number_of_properties": int(n_units),
+        "n_units_to_retrofit": int(n_units_to_retrofit),
         "co2_per_unit_pre_retrofit": str(round(agg_data["pre_retrofit_co2"].mean(), 2)) + "t",
         "co2_per_unit_post_retrofit": str(round(agg_data["post_retrofit_co2"].mean(), 2)) + "t",
         "energy_bill_per_unit_pre_retrofit": format_money(agg_data["pre_retrofit_energy_bill"].mean()),
diff --git a/recommendations/Costs.py b/recommendations/Costs.py
index 0e67b352..852bb11f 100644
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@@ -91,6 +91,10 @@ DOUBLE_RADIATOR_COST = 300
 FLUE_COST = 600
 PIPEWORK_COST = 750  # Min cost is £500
 
+# This is the cost per meter squared for cavity extraction
+# https://www.checkatrade.com/blog/cost-guides/cavity-wall-insulation-removal-cost/
+CAVITY_EXTRACTION_COST = 21.5
+
 
 class Costs:
     """
@@ -173,7 +177,7 @@ class Costs:
         if not self.labour_adjustment_factor:
             raise ValueError("Labour adjustment factor not found")
 
-    def cavity_wall_insulation(self, wall_area, material):
+    def cavity_wall_insulation(self, wall_area, material, is_extraction_and_refill=False):
         """
         Calculates the total cost for cavity wall insulation based on material and labor costs,
         including contingency, preliminaries, profit, and VAT.
@@ -208,6 +212,13 @@ class Costs:
         # Assume a team of 2
         labour_days = (labour_hours / 8) / 2
 
+        if is_extraction_and_refill:
+            # bump up the cost of the work
+            total_cost = total_cost + CAVITY_EXTRACTION_COST * wall_area
+            # Additional 2 days work
+            labour_hours = labour_hours + (2 * 8)
+            labour_days = labour_days + 2
+
         return {
             "total": total_cost,
             "subtotal": subtotal_before_vat,
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index e626ecfa..5960d7be 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -149,12 +149,14 @@ class Recommendations:
         property_recommendations = self.insert_temp_recommendation_id(property_recommendations)
 
         # We also need to create the representative recommendations for each recommendation type
-        property_representative_recommendations = self.create_representative_recommendations(property_recommendations)
+        property_representative_recommendations = self.create_representative_recommendations(
+            property_recommendations, non_invasive_recommendations=self.property_instance.non_invasive_recommendations
+        )
 
         return property_recommendations, property_representative_recommendations
 
     @staticmethod
-    def create_representative_recommendations(property_recommendations):
+    def create_representative_recommendations(property_recommendations, non_invasive_recommendations):
         """
         This method will create a representative recommendation for each recommendation type
         In order to create a representative recommendation, we choose the recommendation that has:
@@ -169,6 +171,13 @@ class Recommendations:
 
         for recommendations_by_type in property_recommendations:
 
+            # If the property was initially surveyed as filled, but the cavity was only partially filled, we don't
+            # want to include the cavity wall insulation recommendation in the defaults
+            # if (recommendations_by_type[0].get("type") == "cavity_wall_insulation") and (
+            #     "cavity_surveyed_as_filled_is_partial" in non_invasive_recommendations
+            # ):
+            #     continue
+
             if recommendations_by_type[0].get("type") == "mechanical_ventilation":
                 continue
 
@@ -238,13 +247,13 @@ class Recommendations:
 
         property_sap_predictions = all_predictions["sap_change_predictions"][
             all_predictions["sap_change_predictions"]["property_id"] == str(property_instance.id)
-            ]
+            ].copy()
         property_heat_predictions = all_predictions["heat_demand_predictions"][
             all_predictions["heat_demand_predictions"]["property_id"] == str(property_instance.id)
-            ]
+            ].copy()
         property_carbon_predictions = all_predictions["carbon_change_predictions"][
             all_predictions["carbon_change_predictions"]["property_id"] == str(property_instance.id)
-            ]
+            ].copy()
 
         property_recommendations = recommendations[property_instance.id].copy()
 
diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py
index feb2620b..20fc453c 100644
--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@@ -113,7 +113,9 @@ class WallRecommendations(Definitions):
         insulation_thickness = self.property.walls["insulation_thickness"]
 
         # We check if the wall is already insulated and if so, we exit
-        if (insulation_thickness in ["average", "above average"]) or self.property.walls["is_filled_cavity"]:
+        if ((insulation_thickness in ["average", "above average"]) or self.property.walls["is_filled_cavity"]) and (
+            "cavity_extract_and_refill" not in self.property.non_invasive_recommendations
+        ):
             return
 
         if u_value:
@@ -216,15 +218,26 @@ class WallRecommendations(Definitions):
             if new_u_value <= self.BUILDING_REGULATIONS_PART_L_CAVITY_WALL_MAX_U_VALUE:
                 lowest_selected_u_value = update_lowest_selected_u_value(lowest_selected_u_value, new_u_value)
 
+                is_extraction_and_refill = "cavity_extract_and_refill" in self.property.non_invasive_recommendations
+
                 cost_result = self.costs.cavity_wall_insulation(
                     wall_area=self.property.insulation_wall_area,
                     material=material.to_dict(),
+                    is_extraction_and_refill=is_extraction_and_refill
                 )
 
                 already_installed = "cavity_wall_insulation" in self.property.already_installed
                 if already_installed:
                     cost_result = override_costs(cost_result)
 
+                if is_extraction_and_refill:
+                    description = f"Extract and refill cavity wall insulation with {material['description']}"
+                else:
+                    description = self._make_description(material)
+
+                # updated the new u-value with the best possible our installers have
+                new_u_value = max(0.31, new_u_value)
+
                 recommendations.append(
                     {
                         "phase": phase,
@@ -237,7 +250,7 @@ class WallRecommendations(Definitions):
                             )
                         ],
                         "type": "cavity_wall_insulation",
-                        "description": self._make_description(material),
+                        "description": description,
                         "starting_u_value": u_value,
                         "new_u_value": new_u_value,
                         "sap_points": None,

From fb6ab43b76c6e40b9ccc6d263fc040985fb63034 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 18 Apr 2024 11:07:15 +0100
Subject: [PATCH 12/58] minor initial scoping

---
 .idea/Model.iml                               |  2 +-
 .idea/misc.xml                                |  2 +-
 etl/customers/gla_croydon_demo/asset_list.py  |  3 ---
 .../vander_elliot/initial_scoping.py          | 23 +++++++++++++++++++
 4 files changed, 25 insertions(+), 5 deletions(-)
 create mode 100644 etl/customers/vander_elliot/initial_scoping.py

diff --git a/.idea/Model.iml b/.idea/Model.iml
index 4413bb06..b0f9c00d 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 6f308057..1122b380 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/etl/customers/gla_croydon_demo/asset_list.py b/etl/customers/gla_croydon_demo/asset_list.py
index 7dde8926..52e9422c 100644
--- a/etl/customers/gla_croydon_demo/asset_list.py
+++ b/etl/customers/gla_croydon_demo/asset_list.py
@@ -34,9 +34,6 @@ def app():
         low_memory=False
     )
 
-    z = epc_data.groupby(["WALLS_DESCRIPTION", "WALLS_ENERGY_EFF"]).size().reset_index(name="count")
-    z = z[z["MAINHEAT_DESCRIPTION"] == "Boiler and radiators, mains gas"]
-
     # Filter on entries where we have a UPRN
     epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
 
diff --git a/etl/customers/vander_elliot/initial_scoping.py b/etl/customers/vander_elliot/initial_scoping.py
new file mode 100644
index 00000000..de212c7c
--- /dev/null
+++ b/etl/customers/vander_elliot/initial_scoping.py
@@ -0,0 +1,23 @@
+import pandas as pd
+from utils.s3 import save_csv_to_s3
+
+
+def app():
+    # Check how many properties there are at EPC F/G in Birmingham
+    epc_data = pd.read_csv(
+        "local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv",
+        low_memory=False
+    )
+
+    # Filter on entries where we have a UPRN
+    epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
+
+    # Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this
+    epc_data["LODGEMENT_DATE"] = pd.to_datetime(epc_data["LODGEMENT_DATE"])
+
+    epc_data = epc_data.sort_values("LODGEMENT_DATE", ascending=False).drop_duplicates("UPRN")
+
+    epc_data = epc_data[epc_data["CURRENT_ENERGY_RATING"].isin(["F", "G"])]
+
+    one_years_ago = pd.Timestamp.now() - pd.DateOffset(days=int(1 * 365))
+    epc_data = epc_data[epc_data["LODGEMENT_DATE"] >= one_years_ago]

From e000c87cad98963e8c734a5cf8990a5a7b713217 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 18 Apr 2024 12:16:13 +0100
Subject: [PATCH 13/58] added patches for immo pilot 2

---
 etl/customers/immo/pilot/asset_list_2.py | 126 +++++++++++++++++++++++
 1 file changed, 126 insertions(+)
 create mode 100644 etl/customers/immo/pilot/asset_list_2.py

diff --git a/etl/customers/immo/pilot/asset_list_2.py b/etl/customers/immo/pilot/asset_list_2.py
new file mode 100644
index 00000000..f722a490
--- /dev/null
+++ b/etl/customers/immo/pilot/asset_list_2.py
@@ -0,0 +1,126 @@
+import pandas as pd
+from utils.s3 import read_excel_from_s3
+from utils.s3 import save_csv_to_s3
+
+USER_ID = 8
+PORTFOLIO_ID = 72
+
+# For
+patches = [
+    {
+        'address': '116 Parkes Hall Road',
+        'postcode': 'DY1 3RJ',
+        'walls-description': 'Cavity wall, filled cavity',
+        'walls-energy-eff': 'Average',
+        'roof-description': 'Pitched, 270 mm loft insulation',
+        'roof-energy-eff': 'Good',
+        'windows-description': 'Fully double glazed',
+        'windows-energy-eff': 'Good',
+        'mainheat-description': 'Boiler and radiators, mains gas',
+        'mainheat-energy-eff': 'Good',
+        'mainheatcont-description': 'Programmer, room thermostat and TRVs',
+        'mainheatc-energy-eff': 'Good',
+        'lighting-description': 'Low energy lighting in 27% of fixed outlets',
+        'lighting-energy-eff': 'Good',
+        'floor-description': 'Solid, no insulation (assumed)',
+        'secondheat-description': 'None',
+        'current-energy-efficiency': '73',
+        'current-energy-rating': 'C',
+        'energy-consumption-current': '184',
+        'co2-emissions-current': '2.4',
+        'potential-energy-efficiency': '88',
+        'total-floor-area': '73',
+        'construction-age-band': 'England and Wales: 1930-1949',
+        'property-type': 'House',
+        'built-form': 'Mid-Terrace',
+    }
+]
+
+# This is information that is found as a result of the non-invasives, that mean that certain measures
+# have been installed already. To reflect this in the front end, it is included in the recommendation, however
+# the cost is removed and instead, a message is presented saying that the measure is already installed.
+already_installed = []
+
+non_invasive_recommendations = []
+
+
+def app():
+    raw_asset_list = read_excel_from_s3(
+        bucket_name="retrofit-datalake-dev",
+        file_key="customers/Immo/Dudley Asset List - Hestia - pilot2.xlsx",
+        header_row=0
+    )
+
+    raw_asset_list = raw_asset_list[raw_asset_list["in_pilot"]].copy()
+
+    # Extract address and postcode
+    raw_asset_list["address"] = raw_asset_list["Full Address"].str.split(",").str[0]
+    raw_asset_list["postcode"] = raw_asset_list["Full Address"].str.split(",").str[-1].str.strip()
+
+    # We're provided with number of bathrooms and number of bedrooms.
+    asset_list = raw_asset_list.rename(
+        columns={
+            "No. of Beds": "n_bedrooms",
+            "No. of WC's": "n_bathrooms"
+        }
+    )
+
+    # Store the asset list in s3
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
+    save_csv_to_s3(
+        dataframe=asset_list,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    # Store overrides in s3
+    already_installed_filename = f"{USER_ID}/{PORTFOLIO_ID}/already_installed.json"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(already_installed),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=already_installed_filename
+    )
+
+    # Store patches in s3
+    patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.json"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(patches),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=patches_filename
+    )
+
+    # Store non-invasive recommendations in S3
+    non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.json"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(non_invasive_recommendations),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=non_invasive_recommendations_filename
+    )
+
+    # EPC C portoflio
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increase EPC",
+        "goal_value": "C",
+        "trigger_file_path": filename,
+        "already_installed_file_path": already_installed_filename,
+        "patches_file_path": patches_filename,
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+        "budget": None,
+    }
+    print(body)
+
+    # EPC B portoflio
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID + 1),
+        "housing_type": "Private",
+        "goal": "Increase EPC",
+        "goal_value": "B",
+        "trigger_file_path": filename,
+        "already_installed_file_path": already_installed_filename,
+        "patches_file_path": patches_filename,
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+        "budget": None,
+    }
+    print(body)

From acada27061d09f47ac76ecd2785c95eb39e741d3 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 18 Apr 2024 15:16:46 +0100
Subject: [PATCH 14/58] rounding up roof coverage %

---
 backend/SearchEpc.py                      |  9 +++++++--
 backend/app/plan/router.py                | 11 +++++++++--
 backend/ml_models/Valuation.py            |  8 ++++++++
 etl/customers/immo/pilot/asset_list_2.py  | 21 ++++++++++++++++++---
 etl/epc/Record.py                         |  2 +-
 recommendations/SolarPvRecommendations.py | 10 +++++++---
 6 files changed, 50 insertions(+), 11 deletions(-)

diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py
index cc2ee4a9..44178792 100644
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@@ -709,8 +709,13 @@ class SearchEpc:
                 self.full_sap_epc = {}
 
                 # Finally, set a standardised address 1 and postcode
-                self.address_clean = self.ordnance_survey_client.address_os
-                self.postcode_clean = self.ordnance_survey_client.postcode_os
+                self.address_clean = (
+                    self.ordnance_survey_client.address_os if self.ordnance_survey_client.address_os else self.address1
+                )
+                self.postcode_clean = (
+                    self.ordnance_survey_client.postcode_os if self.ordnance_survey_client.postcode_os else
+                    self.postcode
+                )
             return
 
         os_response = self.ordnance_survey_client.get_places_api()
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 9854abe8..a8464ee6 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -52,6 +52,10 @@ def patch_epc(patch, epc_records):
     """
 
     for patch_variable, patch_value in patch.items():
+
+        if patch_variable in ["address", "postcode"]:
+            continue
+
         if patch_value == "":
             continue
         if patch_variable in epc_records["original_epc"]:
@@ -268,9 +272,12 @@ async def trigger_plan(body: PlanTriggerRequest):
                 postcode=config["postcode"],
                 uprn=uprn,
                 auth_token=get_settings().EPC_AUTH_TOKEN,
-                os_api_key=get_settings().ORDNANCE_SURVEY_API_KEY
+                os_api_key=get_settings().ORDNANCE_SURVEY_API_KEY,
             )
-            epc_searcher.find_property()
+            epc_searcher.ordnance_survey_client.built_form = config.get("built_form", None)
+            epc_searcher.ordnance_survey_client.property_type = config.get("property_type", None)
+            # For the moment, our OS API access is unavailable, so we skip and interpolate
+            epc_searcher.find_property(skip_os=True)
             # Create a record in db
             property_id, is_new = create_property(
                 session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn
diff --git a/backend/ml_models/Valuation.py b/backend/ml_models/Valuation.py
index 251c016a..39ea5a98 100644
--- a/backend/ml_models/Valuation.py
+++ b/backend/ml_models/Valuation.py
@@ -63,6 +63,14 @@ class PropertyValuation:
         90093693: 279_000,  # Based on Zoopla
         90055152: 149_000,  # Based on Zoopla
         90028499: 238_000,  # Based on Zoopla
+        # IMMO Dudley Pilot 2- search by going to https://www.zoopla.co.uk/property/uprn/{uprn}/
+        90039318: 177_000,  # Based on Zoopla
+        90038384: 170_000,  # Based on Zoopla
+        90105380: 185_000,  # Based on Zoopla
+        90124001: 165_000,  # Based on Zoopla
+        90013980: 148_000,  # Based on Zoopla
+        90087154: 184_000,  # Based on Zoopla
+        90046817: 167_000,  # Based on Zoopla
     }
 
     # We base our valuation uplifts on a number of sources
diff --git a/etl/customers/immo/pilot/asset_list_2.py b/etl/customers/immo/pilot/asset_list_2.py
index f722a490..121e7a81 100644
--- a/etl/customers/immo/pilot/asset_list_2.py
+++ b/etl/customers/immo/pilot/asset_list_2.py
@@ -10,6 +10,7 @@ patches = [
     {
         'address': '116 Parkes Hall Road',
         'postcode': 'DY1 3RJ',
+        'uprn': '90046817',
         'walls-description': 'Cavity wall, filled cavity',
         'walls-energy-eff': 'Average',
         'roof-description': 'Pitched, 270 mm loft insulation',
@@ -21,7 +22,7 @@ patches = [
         'mainheatcont-description': 'Programmer, room thermostat and TRVs',
         'mainheatc-energy-eff': 'Good',
         'lighting-description': 'Low energy lighting in 27% of fixed outlets',
-        'lighting-energy-eff': 'Good',
+        'lighting-energy-eff': 'Average',
         'floor-description': 'Solid, no insulation (assumed)',
         'secondheat-description': 'None',
         'current-energy-efficiency': '73',
@@ -39,7 +40,11 @@ patches = [
 # This is information that is found as a result of the non-invasives, that mean that certain measures
 # have been installed already. To reflect this in the front end, it is included in the recommendation, however
 # the cost is removed and instead, a message is presented saying that the measure is already installed.
-already_installed = []
+already_installed = [
+    {
+        'address': '28 Sangwin Road', 'postcode': 'WV14 9EQ', "already_installed": ["loft_insulation"]
+    }
+]
 
 non_invasive_recommendations = []
 
@@ -58,13 +63,23 @@ def app():
     raw_asset_list["postcode"] = raw_asset_list["Full Address"].str.split(",").str[-1].str.strip()
 
     # We're provided with number of bathrooms and number of bedrooms.
+    # THe UPRNs are not the official ones
     asset_list = raw_asset_list.rename(
         columns={
             "No. of Beds": "n_bedrooms",
-            "No. of WC's": "n_bathrooms"
+            "No. of WC's": "n_bathrooms",
+            'Property Type': 'property_type',
+            'Architype': 'built_form'
         }
     )
 
+    # Remap the values
+    asset_list["built_form"] = asset_list["built_form"].map({
+        "SEMI DETACHED": "Semi-Detached",
+        "MID TERRACE": "Mid-Terrace",
+        "END TERRACE": "End-Terrace",
+    })
+
     # Store the asset list in s3
     filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
     save_csv_to_s3(
diff --git a/etl/epc/Record.py b/etl/epc/Record.py
index e74330a2..9a965c6a 100644
--- a/etl/epc/Record.py
+++ b/etl/epc/Record.py
@@ -191,7 +191,7 @@ class EPCRecord:
         This method will clean the records using the data processor
         """
         epc_data_processor = EPCDataProcessor(
-            data=self.epc_record_as_dataframe("prepared_epc"),
+            data=self.epc_record_as_dataframe("prepared_epc").copy(),
             run_mode="newdata",
             cleaning_averages=self.cleaning_data,
         )
diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index 58cf9735..b44557ab 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -56,14 +56,18 @@ class SolarPvRecommendations:
         if not is_valid_property_type or not is_valid_roof_type or not has_no_existing_solar_pv:
             return
 
+        solar_pv_percentage = self.property.solar_pv_percentage
+        # We round up to the neaest 10%
+        solar_pv_percentage = np.ceil(solar_pv_percentage * 10) / 10
+
         # For the solar recommendations, we produce the following scenarios:
         # 1) Solar panels only, we present a high, medium and low coverage
         # 2) With and without battery
         roof_coverage_scenarios = [
-            self.property.solar_pv_percentage - 0.1, self.property.solar_pv_percentage,
+            solar_pv_percentage - 0.1, solar_pv_percentage,
         ]
-        if self.property.solar_pv_percentage <= 0.4:
-            roof_coverage_scenarios.append(self.property.solar_pv_percentage + 0.1)
+        if solar_pv_percentage <= 0.4:
+            roof_coverage_scenarios.append(solar_pv_percentage + 0.1)
         # We make sure we haven't gone too low or high - we allow no more than 60% coverage
         roof_coverage_scenarios = [v for v in roof_coverage_scenarios if 0 <= v <= 0.6]
         # If we only have two scenarios, we add a coverage scenario 10% less than the smallest

From db2586061598471f182fc338668618dfd4109a61 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 18 Apr 2024 16:01:41 +0100
Subject: [PATCH 15/58] Completed pilot 2

---
 etl/customers/immo/pilot/asset_list_2.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/etl/customers/immo/pilot/asset_list_2.py b/etl/customers/immo/pilot/asset_list_2.py
index 121e7a81..1b4fad9a 100644
--- a/etl/customers/immo/pilot/asset_list_2.py
+++ b/etl/customers/immo/pilot/asset_list_2.py
@@ -43,6 +43,15 @@ patches = [
 already_installed = [
     {
         'address': '28 Sangwin Road', 'postcode': 'WV14 9EQ', "already_installed": ["loft_insulation"]
+    },
+    {
+        'address': '51 Hillwood Road', 'postcode': 'B62 8NQ', "already_installed": ["loft_insulation"]
+    },
+    {
+        'address': '47 Watsons Close', 'postcode': 'DY2 7HL', "already_installed": ["loft_insulation"]
+    },
+    {
+        'address': '44 Hatfield Road', 'postcode': 'DY9 7LW', "already_installed": ["loft_insulation"]
     }
 ]
 

From 3593b7ae9ebd4245985a2dabc80446b23f00d84e Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 19 Apr 2024 13:54:04 +0100
Subject: [PATCH 16/58] Added boiler upgrade recommendation

---
 etl/customers/gla_croydon_demo/asset_list.py |  5 ++--
 recommendations/Costs.py                     | 12 ++------
 recommendations/HeatingRecommender.py        | 31 +++++++++-----------
 3 files changed, 20 insertions(+), 28 deletions(-)

diff --git a/etl/customers/gla_croydon_demo/asset_list.py b/etl/customers/gla_croydon_demo/asset_list.py
index 7dde8926..1655979b 100644
--- a/etl/customers/gla_croydon_demo/asset_list.py
+++ b/etl/customers/gla_croydon_demo/asset_list.py
@@ -34,8 +34,9 @@ def app():
         low_memory=False
     )
 
-    z = epc_data.groupby(["WALLS_DESCRIPTION", "WALLS_ENERGY_EFF"]).size().reset_index(name="count")
-    z = z[z["MAINHEAT_DESCRIPTION"] == "Boiler and radiators, mains gas"]
+    z = epc_data[epc_data["MAINHEAT_DESCRIPTION"] == "Boiler and radiators, mains gas"]
+    z["HOTWATER_DESCRIPTION"].value_counts()
+    z["MAIN_FUEL"].value_counts()
 
     # Filter on entries where we have a UPRN
     epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
diff --git a/recommendations/Costs.py b/recommendations/Costs.py
index 852bb11f..d7a8ad2f 100644
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@@ -67,18 +67,12 @@ LOW_CARBON_COMBI_BOILER = 2200
 # https://www.greenmatch.co.uk/boilers/35kw-boiler
 # https://www.greenmatch.co.uk/boilers/40kw-boiler
 # These are exclusive of installation costs
-COMBI_BOILER_COSTS = {
+CONDENSING_BOILER_COSTS = {
     "30kw": 1550,
     "35kw": 1610,
     "40kw": 1625
 }
 
-CONVENTIONAL_BOILER_COSTS = {
-    "30kw": 1117,
-    "35kw": 1546,
-    "40kw": 1776
-}
-
 # Assumes 3 hours to remove each heater (including re-decorating)
 ROOM_HEATER_REMOVAL_COST = 120
 ROOM_HEATER_REMOVAL_LABOUR_HOURS = 3
@@ -1179,7 +1173,7 @@ class Costs:
         estimated_radiators = max(total_radiators_based_on_power, base_radiators + additional_radiators)
         return round(estimated_radiators)
 
-    def boiler(self, is_combi, size, exising_room_heaters, system_change, n_heated_rooms, n_rooms):
+    def boiler(self, size, exising_room_heaters, system_change, n_heated_rooms, n_rooms):
         """
         Based on a basic estimate of median value £2600 to install a low carbon combi boiler
         First time central heating vosts can als be found here:
@@ -1187,7 +1181,7 @@ class Costs:
         :return:
         """
 
-        unit_cost = COMBI_BOILER_COSTS[size] if is_combi else CONVENTIONAL_BOILER_COSTS[size]
+        unit_cost = CONDENSING_BOILER_COSTS[size]
         # The unit cost is the cost without VAT
         # We now need to estimate the cost of the works
         labour_days = 2
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 432dc6a6..2423901a 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -312,7 +312,15 @@ class HeatingRecommender:
         simulation_config = {}
         boiler_costs = {}
         boiler_recommendation = {}
-        if self.property.data["mainheat-energy-eff"] in ["Very Poor", "Poor", "Average"]:
+
+        has_inefficient_space_heating = self.property.data["mainheat-energy-eff"] in ["Very Poor", "Poor", "Average"]
+
+        has_inefficient_mains_water = (
+            self.property.hotwater["clean_description"] in ["From main system"] and
+            self.property.data["hot-water-energy-eff"] in ["Very Poor", "Poor", "Average"]
+        )
+
+        if has_inefficient_space_heating or has_inefficient_mains_water:
             boiler_size = self.estimate_boiler_size(
                 property_type=self.property.data["property-type"],
                 built_form=self.property.data["built-form"],
@@ -321,22 +329,12 @@ class HeatingRecommender:
                 num_heated_rooms=self.property.data["number-heated-rooms"],
             )
 
-            # We recommend a combi boiler under the following conditions
-            # 1) If there are 4 or fewer rooms (we don't use heqted rooms because none of the rooms could be
-            #    heated if there is no existing heating system).
-            # 2) There 1 or fewer bathrooms
-            # Otherwise, we recommend a gas condensing boiler, which will server a larger property, that has multiple
-            # bathrooms
-            is_combi = (
-                (self.property.number_of_rooms <= 4) and
-                (self.property.n_bathrooms in [None, 0, 1])
-            )
-            if is_combi:
-                description = "Upgrade to a new combi boiler"
-            else:
-                description = "Upgrade to a new gas condensing boiler"
+            description = "Upgrade to a new condensing boiler"
 
-            simulation_config = {"mainheat_energy_eff_ending": "Good"}
+            simulation_config = {
+                "mainheat_energy_eff_ending": "Good",
+                "hot_water_energy_eff_ending": "Good"
+            }
             if system_change:
                 # Installation of a boiler improves the hot water system so we need to reflect this in
                 # the outcome of the recommendation
@@ -363,7 +361,6 @@ class HeatingRecommender:
                 }
 
             boiler_costs = self.costs.boiler(
-                is_combi=is_combi,
                 size=f"{boiler_size}kw",
                 exising_room_heaters=exising_room_heaters,
                 system_change=system_change,

From 391cb356ee12270aa9f5a4ffeff6a917f07ff05e Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 19 Apr 2024 14:07:47 +0100
Subject: [PATCH 17/58] debugging recommendation when we have independent
 boiler upgrade and heating controls

---
 recommendations/HeatingRecommender.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 2423901a..aa5cabdb 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -394,9 +394,13 @@ class HeatingRecommender:
         controls_recommender.recommend(heating_description="Boiler and radiators, mains gas")
         # We may have 2 recommendations from the heating controls
 
-        if not controls_recommender.recommendation:
+        if not controls_recommender.recommendation and not boiler_recommendation:
             return
 
+        if not system_change and len(boiler_recommendation):
+            # If there is not a system change, we add the boiler recommendation at point.
+            self.recommendations.append(boiler_recommendation)
+
         if system_change:
             # We combine the heating and controls recommendations, in the case of a system change
             combined_recommendations = []

From 8bd899bcba8739b3232ec254fa799ff8497efb0f Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 19 Apr 2024 16:43:13 +0100
Subject: [PATCH 18/58] debugging structure of heating recommendations

---
 backend/app/plan/router.py            | 1 +
 recommendations/HeatingRecommender.py | 8 ++++----
 recommendations/Recommendations.py    | 9 +++++++--
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index a8464ee6..06d1aadf 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -380,6 +380,7 @@ async def trigger_plan(body: PlanTriggerRequest):
 
         logger.info("Preparing data for scoring in sap change api")
         recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)
+
         recommendations_scoring_data = recommendations_scoring_data.drop(
             columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
                      "carbon_ending"]
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index aa5cabdb..fe5cdd46 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -399,7 +399,7 @@ class HeatingRecommender:
 
         if not system_change and len(boiler_recommendation):
             # If there is not a system change, we add the boiler recommendation at point.
-            self.recommendations.append(boiler_recommendation)
+            self.recommendations.append([boiler_recommendation])
 
         if system_change:
             # We combine the heating and controls recommendations, in the case of a system change
@@ -417,12 +417,12 @@ class HeatingRecommender:
                 combined_recommendations.extend(combined_recommendation)
 
             # Overwrite the existing boiler recommendation
-            self.recommendations.extend(combined_recommendations)
+            self.recommendations.append(combined_recommendations)
         else:
             # We increment the recommendation phase, since the heating controls are separate from the boiler upgrade
             # but we'll only upgrade if we have a heating recommendation
             has_heating_recommendation = any(
-                recommendation["type"] == "heating" for recommendation in self.recommendations
+                rec["type"] == "heating" for recommendation in self.recommendations for rec in recommendation
             )
             if has_heating_recommendation:
                 recommendation_phase += 1
@@ -431,6 +431,6 @@ class HeatingRecommender:
             for recommendation in controls_recommender.recommendation:
                 recommendation["phase"] = recommendation_phase
 
-            self.recommendations.extend(controls_recommender.recommendation)
+            self.recommendations.append(controls_recommender.recommendation)
 
         return
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 5960d7be..aba75ad9 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -111,11 +111,16 @@ class Recommendations:
         if "heating" not in self.exclusions:
             self.heating_recommender.recommend(phase=phase)
             if self.heating_recommender.recommendations:
-                property_recommendations.append(self.heating_recommender.recommendations)
+                if len(self.heating_recommender.recommendations) == 1:
+                    property_recommendations.append(self.heating_recommender.recommendations)
+                else:
+                    property_recommendations.extend(self.heating_recommender.recommendations)
                 # We check if we have distinct heating and heating controls recommendations
                 # If so, we increment by 2 (one of the heating system, one for the heating controls)
                 # otherwise we incremenet by 1
-                max_used_phase = max([rec["phase"] for rec in self.heating_recommender.recommendations])
+                max_used_phase = max(
+                    [rec["phase"] for recs in self.heating_recommender.recommendations for rec in recs]
+                )
                 amount_to_increment = max_used_phase - phase + 1
                 phase += amount_to_increment
 

From 7bdf2147badefd9f43250ac0eedc933f6378b842 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 19 Apr 2024 18:38:16 +0100
Subject: [PATCH 19/58] restructured output of heating and heating control
 recommendations

---
 backend/app/plan/router.py            | 20 ++++++++++----------
 recommendations/HeatingRecommender.py | 16 +++++++++-------
 recommendations/Recommendations.py    | 19 +++++++++++++------
 3 files changed, 32 insertions(+), 23 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 06d1aadf..ebaf482d 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -282,16 +282,16 @@ async def trigger_plan(body: PlanTriggerRequest):
             property_id, is_new = create_property(
                 session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn
             )
-            if not is_new:
-                continue
-
-            create_property_targets(
-                session,
-                property_id=property_id,
-                portfolio_id=body.portfolio_id,
-                epc_target=body.goal_value,
-                heat_demand_target=None
-            )
+            # if not is_new:
+            #     continue
+            #
+            # create_property_targets(
+            #     session,
+            #     property_id=property_id,
+            #     portfolio_id=body.portfolio_id,
+            #     epc_target=body.goal_value,
+            #     heat_demand_target=None
+            # )
 
             epc_records = {
                 'original_epc': epc_searcher.newest_epc.copy(),
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index fe5cdd46..537125a1 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -15,7 +15,8 @@ class HeatingRecommender:
         self.property = property_instance
         self.costs = Costs(self.property)
 
-        self.recommendations = []
+        self.heating_recommendations = []
+        self.heating_control_recommendations = []
 
     def recommend(self, phase=0):
 
@@ -23,7 +24,8 @@ class HeatingRecommender:
         #       the boiler, but instead flushing the system will make it run more efficiently. There is a cost for this
         #       in the Costs class, stored as SYSTEM_FLUSH_COST
 
-        self.recommendations = []
+        self.heating_recommendations = []
+        self.heating_control_recommendations = []
         # This first iteration of the recommender will provide very basic recommendation
         # We recommend heating controls based on the main heating system
 
@@ -254,7 +256,7 @@ class HeatingRecommender:
             system_change=system_change
         )
 
-        self.recommendations.extend(recommendations)
+        self.heating_recommendations.extend(recommendations)
 
     @staticmethod
     def estimate_boiler_size(property_type, built_form, floor_area, floor_height, num_heated_rooms):
@@ -399,7 +401,7 @@ class HeatingRecommender:
 
         if not system_change and len(boiler_recommendation):
             # If there is not a system change, we add the boiler recommendation at point.
-            self.recommendations.append([boiler_recommendation])
+            self.heating_recommendations.extend([boiler_recommendation])
 
         if system_change:
             # We combine the heating and controls recommendations, in the case of a system change
@@ -417,12 +419,12 @@ class HeatingRecommender:
                 combined_recommendations.extend(combined_recommendation)
 
             # Overwrite the existing boiler recommendation
-            self.recommendations.append(combined_recommendations)
+            self.heating_recommendations.extend(combined_recommendations)
         else:
             # We increment the recommendation phase, since the heating controls are separate from the boiler upgrade
             # but we'll only upgrade if we have a heating recommendation
             has_heating_recommendation = any(
-                rec["type"] == "heating" for recommendation in self.recommendations for rec in recommendation
+                rec["type"] == "heating" for rec in self.heating_recommendations
             )
             if has_heating_recommendation:
                 recommendation_phase += 1
@@ -431,6 +433,6 @@ class HeatingRecommender:
             for recommendation in controls_recommender.recommendation:
                 recommendation["phase"] = recommendation_phase
 
-            self.recommendations.append(controls_recommender.recommendation)
+            self.heating_control_recommendations.extend(controls_recommender.recommendation)
 
         return
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index aba75ad9..06dc2d61 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -110,16 +110,23 @@ class Recommendations:
         # Heating and Electical systems
         if "heating" not in self.exclusions:
             self.heating_recommender.recommend(phase=phase)
-            if self.heating_recommender.recommendations:
-                if len(self.heating_recommender.recommendations) == 1:
-                    property_recommendations.append(self.heating_recommender.recommendations)
-                else:
-                    property_recommendations.extend(self.heating_recommender.recommendations)
+            if (
+                self.heating_recommender.heating_recommendations or
+                self.heating_recommender.heating_control_recommendations
+            ):
+                if self.heating_recommender.heating_recommendations:
+                    property_recommendations.append(self.heating_recommender.heating_recommendations)
+
+                if self.heating_recommender.heating_control_recommendations:
+                    property_recommendations.append(self.heating_recommender.heating_control_recommendations)
+
                 # We check if we have distinct heating and heating controls recommendations
                 # If so, we increment by 2 (one of the heating system, one for the heating controls)
                 # otherwise we incremenet by 1
                 max_used_phase = max(
-                    [rec["phase"] for recs in self.heating_recommender.recommendations for rec in recs]
+                    [rec["phase"] for rec in
+                     self.heating_recommender.heating_recommendations +
+                     self.heating_recommender.heating_control_recommendations]
                 )
                 amount_to_increment = max_used_phase - phase + 1
                 phase += amount_to_increment

From 5a879572f46fba68fc136f2d0681805119e60ccb Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 23 Apr 2024 15:34:29 +0100
Subject: [PATCH 20/58] final modifications for immo pilot

---
 etl/customers/immo/pilot/asset_list_2.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/etl/customers/immo/pilot/asset_list_2.py b/etl/customers/immo/pilot/asset_list_2.py
index 1b4fad9a..52260f57 100644
--- a/etl/customers/immo/pilot/asset_list_2.py
+++ b/etl/customers/immo/pilot/asset_list_2.py
@@ -51,7 +51,9 @@ already_installed = [
         'address': '47 Watsons Close', 'postcode': 'DY2 7HL', "already_installed": ["loft_insulation"]
     },
     {
-        'address': '44 Hatfield Road', 'postcode': 'DY9 7LW', "already_installed": ["loft_insulation"]
+        'address': '44 Hatfield Road',
+        'postcode': 'DY9 7LW',
+        "already_installed": ["loft_insulation", "cavity_wall_insulation"]
     }
 ]
 

From 7a275deb6df6a231bde60d64d78ba3b04ab32f38 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 23 Apr 2024 17:12:39 +0100
Subject: [PATCH 21/58] route march code

---
 .idea/Model.iml                      |  2 +-
 .idea/misc.xml                       |  2 +-
 etl/customers/guiness/route_march.py | 98 ++++++++++++++++++++++++++++
 3 files changed, 100 insertions(+), 2 deletions(-)
 create mode 100644 etl/customers/guiness/route_march.py

diff --git a/.idea/Model.iml b/.idea/Model.iml
index 4413bb06..b0f9c00d 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 6f308057..1122b380 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/etl/customers/guiness/route_march.py b/etl/customers/guiness/route_march.py
new file mode 100644
index 00000000..28f350d3
--- /dev/null
+++ b/etl/customers/guiness/route_march.py
@@ -0,0 +1,98 @@
+import os
+
+import pandas as pd
+from tqdm import tqdm
+
+from dotenv import load_dotenv
+from utils.s3 import read_excel_from_s3
+from backend.SearchEpc import SearchEpc
+from epc_api.client import EpcClient
+from utils.s3 import save_csv_to_s3
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+
+def app():
+    """
+    This app is satisying an adhoc request to retrieve EPC data for properties owned by Guiness, to help plan the
+    route march
+
+    These properties were provided to us by Ecosurv
+    :return:
+    """
+    asset_list = read_excel_from_s3(
+        bucket_name="retrofit-datalake-dev",
+        file_key="customers/guiness/TGP CW Properties PV.xlsx",
+        header_row=0
+    )
+
+    epc_data = []
+    for _, guiness_property in tqdm(asset_list.iterrows(), total=len(asset_list)):
+
+        searcher = SearchEpc(
+            address1=str(guiness_property["Address"]),
+            postcode=guiness_property["POSTCODES"],
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key="",
+            property_type=None,
+            fast=True
+        )
+        # Force the skipping of estimating the EPC
+        searcher.ordnance_survey_client.property_type = None
+        searcher.ordnance_survey_client.built_form = None
+
+        searcher.find_property(skip_os=True)
+        if searcher.newest_epc is None:
+            continue
+
+        epc = {
+            "asset_list_address": guiness_property["Address"],
+            "asset_list_postcode": guiness_property["POSTCODES"],
+            **searcher.newest_epc.copy()
+        }
+
+        epc_data.append(epc)
+
+    epc_df = pd.DataFrame(epc_data)
+
+    # Retrieve just the data we need
+    epc_df = epc_df[
+        [
+            "asset_list_address",
+            "asset_list_postcode",
+            "uprn",
+            "property-type",
+            "built-form",
+            "inspection-date",
+            "current-energy-rating",
+            "current-energy-efficiency",
+            "roof-description",
+            "walls-description",
+            "transaction-type"
+        ]
+    ]
+
+    asset_list = asset_list.merge(
+        epc_df, how="left", left_on=["Address", "POSTCODES"], right_on=["asset_list_address", "asset_list_postcode"]
+    )
+
+    # De-dupe on the address and postcode, since 137 Badger Avenue was duplicated
+    asset_list = asset_list.drop_duplicates(subset=["Address", "POSTCODES"])
+    asset_list = asset_list.drop(columns=["asset_list_address", "asset_list_postcode"])
+
+    # Rename the columns
+    asset_list = asset_list.rename(columns={
+        "property-type": "Property Type",
+        "built-form": "Archetype",
+        "inspection-date": "Last EPC Inspection Date",
+        "current-energy-rating": "Last survey EPC Rating",
+        "current-energy-efficiency": "Last survey SAP Score",
+        "roof-description": "Roof Construction",
+        "walls-description": "Wall Construction",
+        "transaction-type": "Last EPC Reason"
+    })
+
+    # Store as an excel
+    filename = "Guiness EPC data.xlsx"
+    asset_list.to_excel(filename, index=False)

From 03ca16bfc5c94d8325f5c20e5a82aabbb66e014d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 26 Apr 2024 14:06:48 +0100
Subject: [PATCH 22/58] Added rightmove property valuation increase estimates

---
 .idea/Model.iml                              |  2 +-
 .idea/misc.xml                               |  2 +-
 backend/app/plan/router.py                   | 20 +++----
 backend/ml_models/Valuation.py               | 39 +++++++++++-
 etl/customers/gla_croydon_demo/asset_list.py |  4 --
 etl/customers/goldman/asset_list.py          | 63 ++++++++++++++++++++
 etl/customers/goldman/epc_f_g_properties.py  | 25 ++++++++
 recommendations/HeatingRecommender.py        |  1 -
 8 files changed, 137 insertions(+), 19 deletions(-)
 create mode 100644 etl/customers/goldman/asset_list.py
 create mode 100644 etl/customers/goldman/epc_f_g_properties.py

diff --git a/.idea/Model.iml b/.idea/Model.iml
index b0f9c00d..4413bb06 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 1122b380..6f308057 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index ebaf482d..06d1aadf 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -282,16 +282,16 @@ async def trigger_plan(body: PlanTriggerRequest):
             property_id, is_new = create_property(
                 session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn
             )
-            # if not is_new:
-            #     continue
-            #
-            # create_property_targets(
-            #     session,
-            #     property_id=property_id,
-            #     portfolio_id=body.portfolio_id,
-            #     epc_target=body.goal_value,
-            #     heat_demand_target=None
-            # )
+            if not is_new:
+                continue
+
+            create_property_targets(
+                session,
+                property_id=property_id,
+                portfolio_id=body.portfolio_id,
+                epc_target=body.goal_value,
+                heat_demand_target=None
+            )
 
             epc_records = {
                 'original_epc': epc_searcher.newest_epc.copy(),
diff --git a/backend/ml_models/Valuation.py b/backend/ml_models/Valuation.py
index 39ea5a98..5c781979 100644
--- a/backend/ml_models/Valuation.py
+++ b/backend/ml_models/Valuation.py
@@ -71,6 +71,14 @@ class PropertyValuation:
         90013980: 148_000,  # Based on Zoopla
         90087154: 184_000,  # Based on Zoopla
         90046817: 167_000,  # Based on Zoopla
+        # Goldman Sachs Pilot for inrto - search by going to https://www.zoopla.co.uk/property/uprn/{uprn}/
+        100070358888: 153_000,  # Based on Zoopla
+        10090436544: 282_000,  # Based on Zoopla
+        100070365751: 177_000,  # Based on Zoopla
+        10095952767: 168_000,  # Based on Zoopla
+        100070520130: 177_000,  # Based on Zoopla
+        100070333957: 185_000,  # Based on Zoopla
+        100070543258: 211_000,  # Based on Zoopla
     }
 
     # We base our valuation uplifts on a number of sources
@@ -108,6 +116,29 @@ class PropertyValuation:
         # {"start": "D", "end": "A", "increase_percentage": 0.017},
     ]
 
+    # Found here: https://www.rightmove.co.uk/news/articles/property-news/green-premium-epc-ratings/
+    # F -> C is + 15%
+    # E -> C is +7%
+    # D -> C is +3%
+    RIGHTMOVE_MAPPING = [
+        {"start": "G", "end": "C", "increase_percentage": 0.15},
+        {"start": "G", "end": "B", "increase_percentage": 0.15},
+        {"start": "G", "end": "A", "increase_percentage": 0.15},
+
+        {"start": "F", "end": "C", "increase_percentage": 0.15},
+        {"start": "F", "end": "B", "increase_percentage": 0.15},
+        {"start": "F", "end": "A", "increase_percentage": 0.15},
+
+        {"start": "E", "end": "C", "increase_percentage": 0.07},
+        {"start": "E", "end": "B", "increase_percentage": 0.07},
+        {"start": "E", "end": "A", "increase_percentage": 0.07},
+
+        {"start": "D", "end": "C", "increase_percentage": 0.03},
+        {"start": "D", "end": "B", "increase_percentage": 0.03},
+        {"start": "D", "end": "A", "increase_percentage": 0.03},
+
+    ]
+
     EPC_BANDS = ["G", "F", "E", "D", "C", "B", "A"]
 
     @classmethod
@@ -159,14 +190,18 @@ class PropertyValuation:
 
         msm_increase, lloyds_increase = cls.get_increase(epc_band_range)
 
-        # We now use the knight frank and nationwide data to get further valuation evidence, if we have it
+        # We now use the knight frank, nationwide and Rightmove data to get further valuation evidence, if we have it
         kf_increase = [x for x in cls.KNIGHT_FRANK_MAPPING if x["start"] == current_epc and x["end"] == target_epc]
         nw_increase = [x for x in cls.NATIONWIDE_MAPPING if x["start"] == current_epc and x["end"] == target_epc]
+        rm_increase = [x for x in cls.RIGHTMOVE_MAPPING if x["start"] == current_epc and x["end"] == target_epc]
 
         kf_increase = kf_increase[0]["increase_percentage"] if kf_increase else None
         nw_increase = nw_increase[0]["increase_percentage"] if nw_increase else None
+        rm_increase = rm_increase[0]["increase_percentage"] if rm_increase else None
 
-        all_increases = [x for x in [msm_increase, lloyds_increase, kf_increase, nw_increase] if x is not None]
+        all_increases = [
+            x for x in [msm_increase, lloyds_increase, kf_increase, nw_increase, rm_increase] if x is not None
+        ]
 
         max_increase = max(all_increases)
         min_increase = min(all_increases)
diff --git a/etl/customers/gla_croydon_demo/asset_list.py b/etl/customers/gla_croydon_demo/asset_list.py
index 1655979b..52e9422c 100644
--- a/etl/customers/gla_croydon_demo/asset_list.py
+++ b/etl/customers/gla_croydon_demo/asset_list.py
@@ -34,10 +34,6 @@ def app():
         low_memory=False
     )
 
-    z = epc_data[epc_data["MAINHEAT_DESCRIPTION"] == "Boiler and radiators, mains gas"]
-    z["HOTWATER_DESCRIPTION"].value_counts()
-    z["MAIN_FUEL"].value_counts()
-
     # Filter on entries where we have a UPRN
     epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
 
diff --git a/etl/customers/goldman/asset_list.py b/etl/customers/goldman/asset_list.py
new file mode 100644
index 00000000..afe3c64c
--- /dev/null
+++ b/etl/customers/goldman/asset_list.py
@@ -0,0 +1,63 @@
+import pandas as pd
+from utils.s3 import read_excel_from_s3
+from utils.s3 import save_csv_to_s3
+
+PORTFOLIO_ID = 75
+USER_ID = 8
+
+
+def app():
+    asset_list = [
+        {
+            "address": "19 Emily Gardens",
+            "postcode": "B16 0ED",
+        },
+        {
+            "address": "Flat 6 41 Bradford Street",
+            "postcode": "B5 6HX",
+        },
+        {
+            "address": "197 FIELD LANE",
+            "postcode": "B32 4HL",
+        },
+        {
+            "address": "FLAT 4 108 SUMMER ROAD",
+            "postcode": "B23 6DY",
+        },
+        {
+            "address": "1, St. Benedicts Road",
+            "postcode": "B10 9DP",
+        },
+        {
+            "address": "29 COOKSEY LANE",
+            "postcode": "B44 9QL",
+        },
+        {
+            "address": "40 TRITTIFORD ROAD",
+            "postcode": "B13 0HG",
+        }
+    ]
+
+    asset_list = pd.DataFrame(asset_list)
+
+    # Store the asset list in s3
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
+    save_csv_to_s3(
+        dataframe=asset_list,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    # EPC C portoflio
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increase EPC",
+        "goal_value": "B",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": "",
+        "budget": None,
+    }
+    print(body)
diff --git a/etl/customers/goldman/epc_f_g_properties.py b/etl/customers/goldman/epc_f_g_properties.py
new file mode 100644
index 00000000..28197126
--- /dev/null
+++ b/etl/customers/goldman/epc_f_g_properties.py
@@ -0,0 +1,25 @@
+import pandas as pd
+
+
+def app():
+    """
+    Pulling the list of EPC G & F properties in Birmingham for Goldman Sachs
+    """
+    epc_data = pd.read_csv(
+        "local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv",
+        low_memory=False
+    )
+
+    epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
+    epc_data["UPRN"] = epc_data["UPRN"].astype(int).astype(str)
+
+    # Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this
+    epc_data["LODGEMENT_DATETIME"] = pd.to_datetime(epc_data["LODGEMENT_DATETIME"], format='mixed')
+
+    epc_data = epc_data.sort_values("LODGEMENT_DATETIME", ascending=False).drop_duplicates("UPRN")
+
+    # Get G & F properties
+    epc_data = epc_data[epc_data["CURRENT_ENERGY_RATING"].isin(["G", "F"])]
+
+    # Save as an excel
+    epc_data.to_excel("Birmingham EPC F & G Properties.xlsx", index=False)
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 537125a1..8988d2a6 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -359,7 +359,6 @@ class HeatingRecommender:
                     **heating_simulation_config,
                     **hotwater_simulation_config,
                     **fuel_simulation_config,
-                    "hot_water_energy_eff_ending": "Good"
                 }
 
             boiler_costs = self.costs.boiler(

From 155a8c568c595207e4d69cd2f766eeec4b5129f1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 30 Apr 2024 17:41:33 +0100
Subject: [PATCH 23/58] working through the air source heat pump
 recommendations, added route march code for livewest

---
 .idea/Model.iml                               |   2 +-
 .idea/misc.xml                                |   2 +-
 .../AirSourceHeatPumpEfficiency.py            |  46 +++++-
 etl/customers/livewest/route_march.py         | 135 +++++++++++++++++
 .../places_for_people/route_march.py          | 137 ++++++++++++++++++
 recommendations/Costs.py                      |  29 ++++
 recommendations/HeatingControlRecommender.py  |   3 +
 recommendations/HeatingRecommender.py         | 127 +++++++++++++++-
 .../tests/test_air_source_heat_pump.py        |  77 ++++++++++
 9 files changed, 546 insertions(+), 12 deletions(-)
 create mode 100644 etl/customers/livewest/route_march.py
 create mode 100644 etl/customers/places_for_people/route_march.py
 create mode 100644 recommendations/tests/test_air_source_heat_pump.py

diff --git a/.idea/Model.iml b/.idea/Model.iml
index 4413bb06..b0f9c00d 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 6f308057..1122b380 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py b/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py
index 2ba82e77..044cc830 100644
--- a/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py
+++ b/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py
@@ -21,6 +21,8 @@ class AirSourceHeatPumpEfficiency:
 
     def create_dataset(self):
         logger.info("Creating solar photo supply dataset")
+
+        all_counts = []
         for dir in tqdm(self.file_directories):
             filepath = dir / "certificates.csv"
             df = pd.read_csv(filepath, low_memory=False)
@@ -44,9 +46,15 @@ class AirSourceHeatPumpEfficiency:
             df = df[
                 df["MAINHEAT_DESCRIPTION"].str.contains("air source heat pump", case=False, na=False)
             ]
+
+            # Drop rows that have a missing PROPERTY_TYPE, BUILT_FORM, CONSTRUCTION_AGE_BAND, TOTAL_FLOOR_AREA
+            for col in ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "TOTAL_FLOOR_AREA"]:
+                df = df[~pd.isnull(df[col])]
             # Get the columns we're interested in
             df = df[
                 [
+                    "PROPERTY_TYPE",
+                    "BUILT_FORM",
                     "MAINHEAT_DESCRIPTION",
                     "MAINHEAT_ENERGY_EFF",
                     "MAINHEATCONT_DESCRIPTION",
@@ -60,6 +68,8 @@ class AirSourceHeatPumpEfficiency:
 
             counts = df.groupby(
                 [
+                    "PROPERTY_TYPE",
+                    "BUILT_FORM",
                     "MAINHEAT_DESCRIPTION",
                     "MAINHEAT_ENERGY_EFF",
                     "MAINHEATCONT_DESCRIPTION",
@@ -71,8 +81,34 @@ class AirSourceHeatPumpEfficiency:
                 ]
             ).size().reset_index(name="count")
 
-            # Drop rows that have a missing PROPERTY_TYPE, BUILT_FORM, CONSTRUCTION_AGE_BAND, TOTAL_FLOOR_AREA
-            for col in ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "TOTAL_FLOOR_AREA"]:
-                df = df[~pd.isnull(df[col])]
-            # Take newest LODGEMENT_DATE per UPRN
-            df = df.sort_values(by="LODGEMENT_DATE", ascending=False).drop_duplicates(subset=["UPRN"])
+            all_counts.append(counts)
+
+        all_counts = pd.concat(all_counts)
+
+        all_counts_agg = all_counts.groupby(
+            [
+                "PROPERTY_TYPE",
+                "BUILT_FORM",
+                "MAINHEAT_DESCRIPTION",
+                "MAINHEAT_ENERGY_EFF",
+                "MAINHEATCONT_DESCRIPTION",
+                "MAINHEATC_ENERGY_EFF",
+                "MAIN_FUEL",
+                "HOTWATER_DESCRIPTION",
+                "HOT_WATER_ENERGY_EFF",
+                "MAINS_GAS_FLAG"
+            ]
+        )["count"].sum().reset_index()
+
+        all_counts_agg.groupby("PROPERTY_TYPE")["count"].sum()
+        # In houses, 68% of the cases where we see air source heat pumps are in detached and semi-detached houses
+        all_counts_agg[all_counts_agg["PROPERTY_TYPE"] == "House"]["BUILT_FORM"].value_counts(normalize=True)
+
+        all_counts_agg[all_counts_agg["PROPERTY_TYPE"] == "Flat"]["BUILT_FORM"].value_counts()
+
+        # In Bungalows, 74% of cases where we see air source heat pumps are in detached and semi-detached houses
+        all_counts_agg[all_counts_agg["PROPERTY_TYPE"] == "Bungalow"]["BUILT_FORM"].value_counts(normalize=True)
+
+        # TODO: Research options for mid and end-terrace houses
+        # TODO: Research the options for flats - we see them appear in flats, but practically speaking, how does the
+        #       install process work?
diff --git a/etl/customers/livewest/route_march.py b/etl/customers/livewest/route_march.py
new file mode 100644
index 00000000..713ee56a
--- /dev/null
+++ b/etl/customers/livewest/route_march.py
@@ -0,0 +1,135 @@
+import os
+
+import pandas as pd
+from tqdm import tqdm
+
+from dotenv import load_dotenv
+from utils.s3 import read_excel_from_s3
+from backend.SearchEpc import SearchEpc
+from epc_api.client import EpcClient
+from utils.s3 import save_csv_to_s3
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+
+def route_march_may_2024():
+    """
+    This code pulls supplementary data for a route march that is expected to happen in May 2024. This code
+    was authored on the 30th April 2024.
+    """
+
+    asset_list = read_excel_from_s3(
+        bucket_name="retrofit-datalake-dev",
+        file_key="customers/Livewest/Livewest proposed route march Apr-May 2024.xlsx",
+        header_row=1
+    )
+    asset_list = pd.read_excel("/Users/khalimconn-kowlessar/Downloads/Livewest proposed route march Apr-May 2024.xlsx")
+
+    epc_data = []
+    for _, unit in tqdm(asset_list.iterrows(), total=len(asset_list)):
+
+        lst = [unit["NO"], unit["ADDRESS 1"], unit["ADDRESS 2"], unit["ADDRESS 3"], unit["POSTCODE"]]
+        lst = [str(x).strip() for x in lst if not pd.isnull(x)]
+
+        full_address = ", ".join(lst)
+
+        searcher = SearchEpc(
+            address1=str(unit["NO"]),
+            postcode=unit["POSTCODE"],
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key="",
+            property_type=None,
+            fast=True,
+            full_address=full_address
+        )
+        # Force the skipping of estimating the EPC
+        searcher.ordnance_survey_client.property_type = None
+        searcher.ordnance_survey_client.built_form = None
+
+        searcher.find_property(skip_os=True)
+        if searcher.newest_epc is None:
+            # We try with a different address 1
+            add1 = str(unit["NO"]).lower()
+            add1 = (
+                add1
+                .replace("flat", "")
+                .replace("ft", "")
+                .replace("t", "").strip()
+            )
+
+            searcher = SearchEpc(
+                address1=add1,
+                postcode=unit["POSTCODE"],
+                auth_token=EPC_AUTH_TOKEN,
+                os_api_key="",
+                property_type=None,
+                fast=True,
+                full_address=full_address
+            )
+            # Force the skipping of estimating the EPC
+            searcher.ordnance_survey_client.property_type = None
+            searcher.ordnance_survey_client.built_form = None
+
+            searcher.find_property(skip_os=True)
+
+            if searcher.newest_epc is None:
+                continue
+
+        epc = {
+            "asset_list_house_no": unit["NO"],
+            "asset_list_address1": unit["ADDRESS 1"],
+            "asset_list_postcode": unit["POSTCODE"],
+            **searcher.newest_epc.copy()
+        }
+
+        epc_data.append(epc)
+
+    epc_df = pd.DataFrame(epc_data)
+
+    #
+
+    # Retrieve just the data we need
+    epc_df = epc_df[
+        [
+            "asset_list_house_no",
+            "asset_list_address1",
+            "asset_list_postcode",
+            "uprn",
+            "address",
+            "property-type",
+            "built-form",
+            "inspection-date",
+            "current-energy-rating",
+            "current-energy-efficiency",
+            "roof-description",
+            "walls-description",
+            "transaction-type"
+        ]
+    ].rename(columns={"address": "Matched EPC Address"})
+
+    asset_list = asset_list.merge(
+        epc_df,
+        how="left",
+        left_on=["NO", "ADDRESS 1", "POSTCODE"],
+        right_on=["asset_list_house_no", "asset_list_address1", "asset_list_postcode"]
+    )
+
+    asset_list = asset_list.drop_duplicates(subset=["NO", "ADDRESS 1", "POSTCODE"])
+    asset_list = asset_list.drop(columns=["asset_list_house_no", "asset_list_address1", "asset_list_postcode"])
+
+    # Rename the columns
+    asset_list = asset_list.rename(columns={
+        "property-type": "Property Type",
+        "built-form": "Archetype",
+        "inspection-date": "Last EPC Inspection Date",
+        "current-energy-rating": "Last survey EPC Rating",
+        "current-energy-efficiency": "Last survey SAP Score",
+        "roof-description": "Roof Construction",
+        "walls-description": "Wall Construction",
+        "transaction-type": "Last EPC Reason"
+    })
+
+    # Store as an excel
+    filename = "Livewest EPC data.xlsx"
+    asset_list.to_excel(filename, index=False)
diff --git a/etl/customers/places_for_people/route_march.py b/etl/customers/places_for_people/route_march.py
new file mode 100644
index 00000000..c38c71d3
--- /dev/null
+++ b/etl/customers/places_for_people/route_march.py
@@ -0,0 +1,137 @@
+import os
+
+import pandas as pd
+from tqdm import tqdm
+
+from dotenv import load_dotenv
+from utils.s3 import read_excel_from_s3
+from backend.SearchEpc import SearchEpc
+from epc_api.client import EpcClient
+from utils.s3 import save_csv_to_s3
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+
+def app():
+    """
+    This app is satisying an adhoc request to retrieve EPC data for properties owned by Guiness, to help plan the
+    route march
+
+    These properties were provided to us by Ecosurv
+    :return:
+    """
+    asset_list = read_excel_from_s3(
+        bucket_name="retrofit-datalake-dev",
+        file_key="customers/Places For People/PFP ROUTE MARCH PHASE 1.xlsx",
+        header_row=1
+    )
+
+    epc_data = []
+    for _, pfp_property in tqdm(asset_list.iterrows(), total=len(asset_list)):
+
+        lst = [
+            pfp_property["ADDRESS"],
+            pfp_property["ADDRESS.1"],
+            pfp_property["ADDRESS.2"],
+            pfp_property["POSTCODE"]
+        ]
+        lst = [str(x).strip() for x in lst if not pd.isnull(x)]
+
+        full_address = ", ".join(lst)
+
+        searcher = SearchEpc(
+            address1=str(pfp_property["ADDRESS"]),
+            postcode=pfp_property["POSTCODE"],
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key="",
+            property_type=None,
+            fast=True,
+            full_address=full_address
+        )
+        # Force the skipping of estimating the EPC
+        searcher.ordnance_survey_client.property_type = None
+        searcher.ordnance_survey_client.built_form = None
+
+        searcher.find_property(skip_os=True)
+        if searcher.newest_epc is None:
+            # We try with a different address 1
+            add1 = str(pfp_property["ADDRESS"]).lower()
+            add1 = add1.replace("ft", "").replace("t", "").strip()
+
+            searcher = SearchEpc(
+                address1=add1,
+                postcode=pfp_property["POSTCODE"],
+                auth_token=EPC_AUTH_TOKEN,
+                os_api_key="",
+                property_type=None,
+                fast=True,
+                full_address=full_address
+            )
+            # Force the skipping of estimating the EPC
+            searcher.ordnance_survey_client.property_type = None
+            searcher.ordnance_survey_client.built_form = None
+
+            searcher.find_property(skip_os=True)
+
+            if searcher.newest_epc is None:
+                continue
+
+        epc = {
+            "asset_list_address": pfp_property["ADDRESS"],
+            "asset_list_address1": pfp_property["ADDRESS.1"],
+            "asset_list_postcode": pfp_property["POSTCODE"],
+            **searcher.newest_epc.copy()
+        }
+
+        epc_data.append(epc)
+
+    epc_df = pd.DataFrame(epc_data)
+
+    # 702
+
+    # Retrieve just the data we need
+    epc_df = epc_df[
+        [
+            "asset_list_address",
+            "asset_list_address1",
+            "asset_list_postcode",
+            "uprn",
+            "address",
+            "property-type",
+            "built-form",
+            "inspection-date",
+            "current-energy-rating",
+            "current-energy-efficiency",
+            "roof-description",
+            "walls-description",
+            "transaction-type"
+        ]
+    ].rename(columns={"address": "Matched EPC Address"})
+
+    asset_list = asset_list.merge(
+        epc_df,
+        how="left",
+        left_on=["ADDRESS", "ADDRESS.1", "POSTCODE"],
+        right_on=["asset_list_address", "asset_list_address1", "asset_list_postcode"]
+    )
+
+    # De-dupe on the address and postcode, since 137 Badger Avenue was duplicated
+    asset_list = asset_list.drop_duplicates(subset=["ADDRESS", "ADDRESS.1", "POSTCODE"])
+    asset_list = asset_list.drop(columns=["asset_list_address", "asset_list_address1", "asset_list_postcode"])
+
+    # Rename the columns
+    asset_list = asset_list.rename(columns={
+        "property-type": "Property Type",
+        "built-form": "Archetype",
+        "inspection-date": "Last EPC Inspection Date",
+        "current-energy-rating": "Last survey EPC Rating",
+        "current-energy-efficiency": "Last survey SAP Score",
+        "roof-description": "Roof Construction",
+        "walls-description": "Wall Construction",
+        "transaction-type": "Last EPC Reason"
+    })
+
+    # Store as an excel
+    filename = "Places For People EPC data.xlsx"
+    asset_list.to_excel(filename, index=False)
diff --git a/recommendations/Costs.py b/recommendations/Costs.py
index d7a8ad2f..113bb6f8 100644
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@@ -37,6 +37,24 @@ MCS_SOLAR_PV_COST_DATA = {
     "average_cost_per_kwh-Northern Ireland": 2126.09,
 }
 
+# This data is based on the MCS database
+MCS_AIR_SOURCE_HEAT_PUMP_COST_DATA = {
+    "Outer London": None,
+    "Inner London": None,
+    "South East England": None,
+    "South West England": None,
+    "East of England": None,
+    "East Midlands": None,
+    "West Midlands": None,
+    "North East England": None,
+    "North West England": None,
+    "Yorkshire and the Humber": None,
+    "Wales": None,
+    "Scotland": None,
+    "Northern Ireland": None,
+}
+BOILER_UPGRADE_SCHEME_ASHP_VALUE = 7500
+
 # This is based on quotes from installers
 BATTERY_COST = 3500
 
@@ -1240,3 +1258,14 @@ class Costs:
             "labour_hours": labour_hours,
             "labour_days": labour_days,
         }
+
+    def air_source_heat_pump(self):
+        """
+        Based on the region and type of property, this function will produce a cost estimation for an air source heat
+        pump. This cost will include the boiler upgrade scheme grant
+
+        :return:
+        """
+
+        regional_cost = MCS_AIR_SOURCE_HEAT_PUMP_COST_DATA[self.region]
+        pass
diff --git a/recommendations/HeatingControlRecommender.py b/recommendations/HeatingControlRecommender.py
index d24ad811..76da6c37 100644
--- a/recommendations/HeatingControlRecommender.py
+++ b/recommendations/HeatingControlRecommender.py
@@ -35,6 +35,9 @@ class HeatingControlRecommender:
 
             return
 
+        if heating_description in ["Air source heat pump, radiators, electric"]:
+            self.recommend_time_temperature_zone_controls()
+
     def recommend_room_heaters_electric_controls(self):
         """
         If the home has Room heaters, electric, we start by identifying potential heating controls that could
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 8988d2a6..b197d817 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -1,6 +1,4 @@
-import pandas as pd
-
-from recommendations.Costs import Costs
+from recommendations.Costs import Costs, BOILER_UPGRADE_SCHEME_ASHP_VALUE
 from recommendations.recommendation_utils import check_simulation_difference, override_costs
 from backend.Property import Property
 from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes
@@ -18,7 +16,14 @@ class HeatingRecommender:
         self.heating_recommendations = []
         self.heating_control_recommendations = []
 
-    def recommend(self, phase=0):
+    def recommend(self, has_cavity_and_loft_recommendations, phase=0):
+        """
+        Produces heating recommendations
+        :param has_cavity_and_loft_recommendations: boolean indicating if we have produced a cavity or loft insulation
+        recommendation. If there are cavity or loft recommendations, the property would need to complete those measures
+        before being able to get the boiler upgrade scheme benefits. The messaging in the front end would be to
+        :param phase: indicates the phase of the retrofit programme
+        """
 
         # TODO: We could have a system flush recommendation for an existing boiler, where there is no need to replace
         #       the boiler, but instead flushing the system will make it run more efficiently. There is a cost for this
@@ -81,8 +86,120 @@ class HeatingRecommender:
                 phase=phase, system_change=system_change, exising_room_heaters=exising_room_heaters
             )
 
+        # We recommend air source heat pumps
+        # Heat pumps are suitable for all property types:
+        # https://energysavingtrust.org.uk/from-flats-to-terraced-houses-heat-pumps-are-suitable-for-all-property-types/
+        # Just seems least probable for flats, so we'll allow houses and bungalows
+        # In the future, we'll allow overrides, so that non-intrusive surveys can contradict these conditions
+        # and either allow or prevent the recommendation of an air source heat pump
+
+        suitable_property_types = self.property.data["property-type"] in ["House", "Bungalow"]
+        has_air_source_heat_pump = self.property.main_heating["has_air_source_heat_pump"]
+
+        if suitable_property_types and not has_air_source_heat_pump:
+            self.recommend_air_source_heat_pump(
+                phase=phase, has_cavity_and_loft_recommendations=has_cavity_and_loft_recommendations
+            )
+
         return
 
+    def recommend_air_source_heat_pump(self, phase, has_cavity_and_loft_recommendations):
+        """
+        This method will implement the recommendation for an air source heat pump
+        This is ultimately an overhaul to the heating system and so is recommended as an alternative to other
+        heating system recommendations
+        :return:
+        """
+
+        controls_recommender = HeatingControlRecommender(self.property)
+        controls_recommender.recommend(heating_description="Air source heat pump, radiators, electric")
+
+        ashp_costs = self.costs.air_source_heat_pump()
+        # We add the costs of the heating controls, onto each key in the costs dictionary
+        if controls_recommender.recommendation:
+            for key in ashp_costs:
+                ashp_costs[key] += controls_recommender.recommendation[0][key]
+
+        already_installed = "air_source_heat_pump" in self.property.already_installed
+        if already_installed:
+            ashp_costs = override_costs(ashp_costs)
+            description = "The property already has an air source heat pump, no further action needed."
+        else:
+            if controls_recommender.recommendation:
+                description = ("Install an air source heat pump, and upgrade heating controls to Smart Thermostats, "
+                               "room sensors and smart radiator valves (time & temperature zone control) ")
+            else:
+                description = "Install an air source heat pump."
+
+            # If the property does not have existing cavity and loft insulation, we include a note that the cost
+            # includes the boiler upgrade scheme and that the cavity and loft need to be treated, to ensure access
+            # to the funding
+            if has_cavity_and_loft_recommendations:
+                description = description + (f" The cost of works includes the £"
+                                             f"{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant. "
+                                             f"You must ensure that the property has an insulated cavity and "
+                                             f"270mm+ loft insulation to qualify for the grant")
+            else:
+                description = description + (f" The cost of works includes the £"
+                                             f"{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant")
+
+        simulation_config = {
+            "mainheat_energy_eff_ending": "Good",
+            "hot_water_energy_eff_ending": "Good"
+        }
+        # Installation of a boiler improves the hot water system so we need to reflect this in
+        # the outcome of the recommendation
+        heating_ending_config = MainHeatAttributes("Air source heat pump, radiators, electric").process()
+        hotwater_ending_config = HotWaterAttributes("From main system").process()
+
+        # If the property does not currently have electric main fuel, we'll simulate the change
+        fuel_ending_config = {}
+        if self.property.main_fuel["fuel_type"] != "electricity":
+            fuel_ending_config = MainFuelAttributes("electricity (not community)").process()
+
+        # Check the simulation differences
+        heating_simulation_config = check_simulation_difference(
+            new_config=heating_ending_config, old_config=self.property.main_heating
+        )
+        hotwater_simulation_config = check_simulation_difference(
+            new_config=hotwater_ending_config, old_config=self.property.hotwater
+        )
+        fuel_simulation_config = check_simulation_difference(
+            new_config=fuel_ending_config, old_config=self.property.main_fuel
+        )
+
+        simulation_config = {
+            **simulation_config,
+            **heating_simulation_config,
+            **hotwater_simulation_config,
+            **fuel_simulation_config,
+        }
+
+        if controls_recommender.recommendation:
+            # We should have just the single recommendation for heat controls, which is time
+            # and temperature zone controls
+            simulation_config = {
+                **simulation_config,
+                **controls_recommender.recommendation[0]["simulation_config"]
+            }
+
+        ashp_recommendation = {
+            "phase": phase,
+            "parts": [
+                # TODO
+            ],
+            "type": "heating",
+            "description": description,
+            "starting_u_value": None,
+            "new_u_value": None,
+            "sap_points": None,
+            "already_installed": already_installed,
+            "simulation_config": simulation_config,
+            **ashp_costs
+        }
+
+        self.heating_recommendations.append(ashp_recommendation)
+
     @staticmethod
     def check_simulation_difference(old_config, new_config):
         """
@@ -146,7 +263,7 @@ class HeatingRecommender:
 
                 recommendation_description = f"{description} and {controls_description}"
 
-            already_installed = "cavity_wall_insulation" in self.property.already_installed
+            already_installed = "heating_controls" in self.property.already_installed
             if already_installed:
                 total_costs = override_costs(total_costs)
                 recommendation_description = "Heating system has already been upgraded, no further action needed."
diff --git a/recommendations/tests/test_air_source_heat_pump.py b/recommendations/tests/test_air_source_heat_pump.py
new file mode 100644
index 00000000..d80afc6e
--- /dev/null
+++ b/recommendations/tests/test_air_source_heat_pump.py
@@ -0,0 +1,77 @@
+from backend.Property import Property
+from recommendations.HeatingRecommender import HeatingRecommender
+from etl.epc.Record import EPCRecord
+
+
+class TestAirSourceHeatPump:
+
+    def test_eligible(self):
+        # This tests a house, which will be suitable for an air source heat pump
+        epc_record = EPCRecord()
+        epc_record.prepared_epc = {
+            "county": "Broxbourne",
+            "mainheat-energy-eff": "Good",
+            "hot-water-energy-eff": "Good",
+            "mainheatc-energy-eff": "Good",
+            "number-heated-rooms": 5,
+            "property-type": "House",
+            "built-form": "Semi-Detached"
+        }
+
+        property_instance = Property(id=0, address="fake", postcode="fake", epc_record=epc_record)
+        property_instance.main_heating = {
+            'original_description': 'Boiler and radiators, mains gas',
+            "clean_description": "Boiler and radiators, mains gas",
+            'has_radiators': True,
+            'has_fan_coil_units': False, 'has_pipes_in_screed_above_insulation': False,
+            'has_pipes_in_insulated_timber_floor': False, 'has_pipes_in_concrete_slab': False, 'has_boiler': True,
+            'has_air_source_heat_pump': False,
+            'has_room_heaters': False, 'has_electric_storage_heaters': False,
+            'has_warm_air': False,
+            'has_electric_underfloor_heating': False,
+            'has_electric_ceiling_heating': False, 'has_community_scheme': False,
+            'has_ground_source_heat_pump': False, 'has_no_system_present': False,
+            'has_portable_electric_heaters': False,
+            'has_water_source_heat_pump': False, 'has_electric': False,
+            'has_mains_gas': True, 'has_wood_logs': False,
+            'has_coal': False, 'has_oil': False, 'has_wood_pellets': False,
+            'has_anthracite': False,
+            'has_dual_fuel_mineral_and_wood': False, 'has_smokeless_fuel': False,
+            'has_lpg': False, 'has_assumed': False,
+            'has_electricaire': False, 'has_assumed_for_most_rooms': False,
+            'has_underfloor_heating': False,
+            "has_electric_heat_pumps": False,
+            "has_micro-cogeneration": False
+        }
+        property_instance.main_fuel = {
+            'original_description': 'mains gas (not community)', 'fuel_type': 'mains gas',
+            'tariff_type': None,
+            'is_community': False, 'no_individual_heating_or_community_network': False,
+            'complex_fuel_type': None
+        }
+        property_instance.hotwater = {
+            'original_description': 'From main system',
+            'clean_description': 'From main system',
+            'heater_type': None,
+            'system_type': 'from main system',
+            'thermostat_characteristics': None, 'heating_scope': None,
+            'energy_recovery': None, 'tariff_type': None,
+            'extra_features': None, 'chp_systems': None, 'distribution_system': None,
+            'no_system_present': None,
+            'assumed': False, "appliance": None
+        }
+        property_instance.main_heating_controls = {
+            'original_description': 'Programmer, room thermostat and TRVs',
+            'thermostatic_control': 'room thermostat', 'charging_system': None, 'switch_system': 'programmer',
+            'no_control': None, 'dhw_control': None, 'community_heating': None, 'multiple_room_thermostats': False,
+            'auxiliary_systems': None, 'trvs': 'trvs', 'rate_control': None
+
+        }
+
+        recommender = HeatingRecommender(property_instance=property_instance)
+
+        assert not recommender.heating_recommendations
+
+        recommender.recommend(phase=0)
+
+        assert recommender.recommendation is None

From cce9c64fdc029b0f3fa35445f5784ad5698b7b29 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 2 May 2024 00:37:36 +0100
Subject: [PATCH 24/58] Adding company ownership matching code for goldman poc

---
 backend/SearchEpc.py                        | 34 ++++----
 etl/customers/goldman/property_ownership.py | 87 +++++++++++++++++++++
 etl/customers/livewest/route_march.py       |  3 +-
 3 files changed, 104 insertions(+), 20 deletions(-)
 create mode 100644 etl/customers/goldman/property_ownership.py

diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py
index 44178792..06eea258 100644
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@@ -193,33 +193,31 @@ class SearchEpc:
     @classmethod
     def get_house_number(cls, address: str) -> str | None:
         """
-        This method will use the usaddress library to parse an address and extract the house number
-        :return:
+        This method uses the usaddress library to parse an address and extract the primary house or flat number.
         """
+        try:
+            parsed = usaddress.parse(address)
+            # First, try to get the 'OccupancyIdentifier' if 'OccupancyType' is detected
+            for part, type_ in parsed:
+                if type_ == 'OccupancyIdentifier':
+                    return part  # This assumes the first 'OccupancyIdentifier' after 'OccupancyType' is the primary
+                    # number
 
-        parsed = usaddress.parse(address)
-        parsed_house_number = [x for x in parsed if (x[1] == "AddressNumber")]
-        parsed_house_number = parsed_house_number[0][0] if parsed_house_number else None
+            # Fallback to 'AddressNumber' if no 'OccupancyIdentifier' is found
+            address_number = next((part for part, type_ in parsed if type_ == 'AddressNumber'), None)
+            if address_number:
+                return address_number.replace(",", "")  # Remove any trailing commas
 
-        if parsed_house_number is None:
-            # Because usaddress isn't optimal for parsing addresses with some prefixes such as 'Flat',
-            # we also add a custom approach
-
-            # Pattern to look for 'Flat' or 'Apartment' followed by a number, or just a number at the beginning
+            # Further fallback to custom regex (in case usaddress completely fails)
             pattern = r'(?i)(?:flat|apartment)\s*(\d+)|^\s*(\d+)'
-
             match = re.search(pattern, address)
-
             if match:
-                # Return the first non-None group found
                 return next(g for g in match.groups() if g is not None)
-            else:
-                return None
 
-        # Remove training commas
-        parsed_house_number = parsed_house_number.replace(",", "")
+        except Exception as e:
+            print(f"Error parsing address: {e}")
 
-        return parsed_house_number
+        return None
 
     @staticmethod
     def extract_numeric_housenumber_part(house_number: str | None) -> int | None:
diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py
new file mode 100644
index 00000000..17db71b2
--- /dev/null
+++ b/etl/customers/goldman/property_ownership.py
@@ -0,0 +1,87 @@
+import pandas as pd
+from tqdm import tqdm
+from backend.SearchEpc import SearchEpc
+
+
+def aggregate_matches(matching_lookup, company_ownership):
+    df = matching_lookup.merge(company_ownership, how="left", on="Title Number")
+    counts = (
+        df.groupby(["Company Registration No. (1)", "Proprietor Name (1)"])["UPRN"]
+        .count()
+        .reset_index(name="number_of_properties")
+    )
+    counts = counts.sort_values("number_of_properties", ascending=False)
+
+    return counts
+
+
+def app():
+    """
+    This script is for scoping property ownership for EPC F & G rated properties in Birmingam, for Goldman Sachs
+    """
+
+    properties = pd.read_excel("Birmingham EPC F & G Properties.xlsx")
+    company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_04.csv")
+    # FIlter on relevant postcodes
+    company_ownership = company_ownership[
+        company_ownership["Postcode"].str.lower().isin(properties["POSTCODE"].str.lower().unique())]
+
+    # Now we filter properties the other way around
+    properties = properties[properties["POSTCODE"].str.lower().isin(company_ownership["Postcode"].str.lower().unique())]
+    # We end up with 7.4k entires on a postcode match, however we need to now do a direct address match
+
+    ignore_title_numbers = [
+        "WM922695",  # Land at the back of 17 Plumstead Road, Birmingham (B44 0EA): relates to WM154788
+        "WM426374",  # land on the south side of 15 Carlyle Road, Edgbaston, Birmingham (B16 9BH): relates to WM537591
+        "WM44948",
+    ]
+    company_ownership = company_ownership[~company_ownership["Title Number"].isin(ignore_title_numbers)]
+    # Remove entries where the address begins with the term "land adjoining":
+
+    company_ownership = company_ownership[~company_ownership["Property Address"].str.startswith("land adjoining")]
+
+    freehold_matching_lookup = []
+    leasehold_matching_lookup = []
+    for _, address in tqdm(properties.iterrows(), total=len(properties)):
+        filtered = company_ownership[
+            company_ownership["Postcode"].str.lower() == address["POSTCODE"].lower()
+            ].copy()
+
+        filtered["house_number"] = filtered["Property Address"].apply(SearchEpc.get_house_number)
+        house_no = SearchEpc.get_house_number(address["ADDRESS1"])
+
+        filtered = filtered[filtered["house_number"] == house_no]
+
+        if filtered.empty:
+            continue
+
+        filtered_freehold = filtered[filtered["Tenure"] == "Freehold"]
+        filtered_leasehold = filtered[filtered["Tenure"] == "Leasehold"]
+
+        if filtered_freehold.shape[0] > 1:
+            raise ValueError("Multiple freehold matches")
+
+        if filtered_leasehold.shape[0] > 1:
+            raise ValueError("Multiple leasehold matches")
+
+        if not filtered_leasehold.empty:
+            leasehold_matching_lookup.append(
+                {
+                    "UPRN": address["UPRN"],
+                    "Title Number": filtered_leasehold["Title Number"].values[0]
+                }
+            )
+
+        if not filtered_freehold.empty:
+            freehold_matching_lookup.append(
+                {
+                    "UPRN": address["UPRN"],
+                    "Title Number": filtered_freehold["Title Number"].values[0]
+                }
+            )
+
+    freehold_matching_lookup = pd.DataFrame(freehold_matching_lookup)
+    leasehold_matching_lookup = pd.DataFrame(leasehold_matching_lookup)
+
+    freehold_aggregate = aggregate_matches(freehold_matching_lookup, company_ownership)
+    leasehold_aggregate = aggregate_matches(leasehold_matching_lookup, company_ownership)
diff --git a/etl/customers/livewest/route_march.py b/etl/customers/livewest/route_march.py
index 713ee56a..9e69fd43 100644
--- a/etl/customers/livewest/route_march.py
+++ b/etl/customers/livewest/route_march.py
@@ -22,9 +22,8 @@ def route_march_may_2024():
     asset_list = read_excel_from_s3(
         bucket_name="retrofit-datalake-dev",
         file_key="customers/Livewest/Livewest proposed route march Apr-May 2024.xlsx",
-        header_row=1
+        header_row=0
     )
-    asset_list = pd.read_excel("/Users/khalimconn-kowlessar/Downloads/Livewest proposed route march Apr-May 2024.xlsx")
 
     epc_data = []
     for _, unit in tqdm(asset_list.iterrows(), total=len(asset_list)):

From 76ef5c897a2471473058a39d765f55e452a82db5 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 2 May 2024 00:41:47 +0100
Subject: [PATCH 25/58] handling genuine dual leasehold ownership

---
 etl/customers/goldman/property_ownership.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py
index 17db71b2..4a6faede 100644
--- a/etl/customers/goldman/property_ownership.py
+++ b/etl/customers/goldman/property_ownership.py
@@ -42,6 +42,7 @@ def app():
 
     freehold_matching_lookup = []
     leasehold_matching_lookup = []
+    shared_leasehold_match = []
     for _, address in tqdm(properties.iterrows(), total=len(properties)):
         filtered = company_ownership[
             company_ownership["Postcode"].str.lower() == address["POSTCODE"].lower()
@@ -62,9 +63,10 @@ def app():
             raise ValueError("Multiple freehold matches")
 
         if filtered_leasehold.shape[0] > 1:
-            raise ValueError("Multiple leasehold matches")
-
-        if not filtered_leasehold.empty:
+            matched = filtered_leasehold[["Title Number"]].copy()
+            matched.insert(0, "UPRN", address["UPRN"])
+            shared_leasehold_match.append(matched)
+        elif not filtered_leasehold.empty:
             leasehold_matching_lookup.append(
                 {
                     "UPRN": address["UPRN"],

From 5cb35e1d9eb3beec22d772293208fef09c18fbba Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 2 May 2024 18:33:25 +0100
Subject: [PATCH 26/58] working on property ownership pipeline

---
 backend/SearchEpc.py                          |  13 +-
 etl/customers/goldman/property_ownership.py   | 369 ++++++++++++++++--
 etl/customers/vander_elliot/__init__.py       |   0
 .../vander_elliot/single_property_pilot.py    |  56 +++
 recommendations/HeatingRecommender.py         |  14 +-
 recommendations/Recommendations.py            |   2 +-
 recommendations/SolarPvRecommendations.py     |   2 +-
 7 files changed, 418 insertions(+), 38 deletions(-)
 create mode 100644 etl/customers/vander_elliot/__init__.py
 create mode 100644 etl/customers/vander_elliot/single_property_pilot.py

diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py
index 06eea258..db9ec4ff 100644
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@@ -196,6 +196,13 @@ class SearchEpc:
         This method uses the usaddress library to parse an address and extract the primary house or flat number.
         """
         try:
+
+            # Custom regex to catch a broad range of cases
+            pattern = r'(?i)(?:flat|apartment)\s*(\d+)|^\s*(\d+)'
+            match = re.search(pattern, address)
+            if match:
+                return next(g for g in match.groups() if g is not None)
+
             parsed = usaddress.parse(address)
             # First, try to get the 'OccupancyIdentifier' if 'OccupancyType' is detected
             for part, type_ in parsed:
@@ -208,12 +215,6 @@ class SearchEpc:
             if address_number:
                 return address_number.replace(",", "")  # Remove any trailing commas
 
-            # Further fallback to custom regex (in case usaddress completely fails)
-            pattern = r'(?i)(?:flat|apartment)\s*(\d+)|^\s*(\d+)'
-            match = re.search(pattern, address)
-            if match:
-                return next(g for g in match.groups() if g is not None)
-
         except Exception as e:
             print(f"Error parsing address: {e}")
 
diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py
index 4a6faede..abc2645d 100644
--- a/etl/customers/goldman/property_ownership.py
+++ b/etl/customers/goldman/property_ownership.py
@@ -1,27 +1,248 @@
+import re
 import pandas as pd
 from tqdm import tqdm
+import Levenshtein
 from backend.SearchEpc import SearchEpc
 
+# Average value of a property in the midlands in 2024 was £238,000. Since these are EPC F & G properties, we assume
+# £207,000 since they trade at a discount. This is based on the rightmove study where moving from an EPC F/G -> C has a
+# +15% impact on valuation and D -> C has a +3% impact on valuation.
+# The mode EPC rating is D, so we associate the £238k valuation with an EPC D property
+# Therefore value_of_F * 1.15 = value_of_D * 1.03
+# Therefore value_of_F = value_of_D * 1.03/1.15 = 238k * (1.03/1.15) = 213165
+PROPERTY_VALUE_ESTIMATE = 213_165
 
-def aggregate_matches(matching_lookup, company_ownership):
-    df = matching_lookup.merge(company_ownership, how="left", on="Title Number")
+
+def aggregate_matches(matching_lookup, company_ownership, properties):
+    df = matching_lookup.merge(
+        company_ownership, how="left", on="Title Number"
+    ).merge(
+        properties[["UPRN", "LOCAL_AUTHORITY_LABEL"]], how="left", on="UPRN"
+    )
     counts = (
-        df.groupby(["Company Registration No. (1)", "Proprietor Name (1)"])["UPRN"]
+        df.groupby(["Company Registration No. (1)", "Proprietor Name (1)", "LOCAL_AUTHORITY_LABEL"])["UPRN"]
         .count()
         .reset_index(name="number_of_properties")
     )
     counts = counts.sort_values("number_of_properties", ascending=False)
 
-    return counts
+    pivot_counts = counts.pivot_table(
+        index=["Company Registration No. (1)", "Proprietor Name (1)"],  # Rows: companies and proprietors
+        columns="LOCAL_AUTHORITY_LABEL",  # Columns: each local authority
+        values="number_of_properties",  # The counts of properties
+        fill_value=0  # Fill missing values with 0 (where there are no properties owned)
+    ).reset_index()
+
+    total_counts = (
+        df.groupby(["Company Registration No. (1)", "Proprietor Name (1)"])["UPRN"]
+        .count()
+        .reset_index(name="total_number_of_properties")
+    )
+
+    pivot_counts = pivot_counts.merge(
+        total_counts, how="left", on=["Company Registration No. (1)", "Proprietor Name (1)"]
+    )
+
+    pivot_counts = pivot_counts.sort_values("total_number_of_properties", ascending=False)
+
+    pivot_counts["approx_value"] = PROPERTY_VALUE_ESTIMATE * pivot_counts["total_number_of_properties"]
+    pivot_counts["cumulative_value"] = pivot_counts["approx_value"].cumsum()
+
+    return pivot_counts
+
+
+def find_f_g_properties(paths):
+    data = []
+    for path in tqdm(paths):
+        epc_data = pd.read_csv(path, low_memory=False)
+
+        epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
+        epc_data["UPRN"] = epc_data["UPRN"].astype(int).astype(str)
+
+        # Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this
+        epc_data["LODGEMENT_DATETIME"] = pd.to_datetime(epc_data["LODGEMENT_DATETIME"], format='mixed')
+
+        epc_data = epc_data.sort_values("LODGEMENT_DATETIME", ascending=False).drop_duplicates("UPRN")
+
+        # Get G & F properties
+        epc_data = epc_data[epc_data["CURRENT_ENERGY_RATING"].isin(["G", "F"])]
+        data.append(epc_data)
+
+    data = pd.concat(data)
+
+    # Save as an excel
+    data.to_excel("EPC F & G Properties.xlsx", index=False)
+
+
+def remove_text_in_brackets(address: str) -> str:
+    """
+    Removes any text within parentheses, including the parentheses themselves.
+
+    Parameters:
+    - address (str): The address string to clean.
+
+    Returns:
+    - str: The cleaned address with text in parentheses removed.
+    """
+    # Regex to find and remove content in parentheses
+    cleaned_address = re.sub(r'\s*\([^)]*\)', '', address)
+    return cleaned_address
+
+
+def extract_numeric_part(house_number: str) -> str:
+    """
+    Extracts only the numeric part from a house number that may contain letters.
+
+    Parameters:
+    - house_number (str): The house number string possibly containing letters.
+
+    Returns:
+    - str: The numeric part of the house number.
+    """
+    # Use regular expression to replace all non-digit characters with nothing
+    numeric_part = re.sub(r'\D', '', house_number)
+    return numeric_part
+
+
+def levenstein_match(matching_string, df, address_col):
+    match_to = df[address_col].tolist()
+    # Strip out punctuation and spaces
+    match_to = [re.sub(r'[^\w\s]', '', x) for x in match_to]
+    match_to = [x.replace(" ", "") for x in match_to]
+
+    # Perform matching between full key and match_to
+    distances = [Levenshtein.distance(matching_string, s) for s in match_to]
+    best_match_index = distances.index(min(distances))
+    # We might want to consider a threshold for the distance, however for the momeny,
+    # we don't consider this for the moment
+    df = df.iloc[best_match_index:best_match_index + 1]
+
+    return df
+
+
+def extract_range_from_house_number(house_number_range: str):
+    """
+    Detects if the house number includes a numeric range (formatted as 'x-y') and extracts all values within this range.
+    Non-numeric strings containing hyphens are ignored.
+
+    Parameters:
+    - house_number_range (str): The house number string that might contain a range.
+
+    Returns:
+    - list of str: A list of all numbers within the range if it is a range; otherwise, returns None.
+    """
+
+    if not house_number_range:
+        return None
+
+    if '-' in house_number_range:
+        parts = house_number_range.split('-')
+        if len(parts) == 2 and parts[0].isdigit() and parts[1].isdigit():
+            # Both parts are numeric, so it's a valid range
+            start, end = map(int, parts)  # Convert parts to integers
+            return [str(x) for x in range(start, end + 1)]
+        else:
+            # Not a valid numeric range
+            return None
+    else:
+        # No hyphen present or not a range
+        return None
+
+
+def is_in_range(row, house_no):
+    """ Check if the house number is within the range provided in the row. """
+    if row and any(house_no == num for num in row):
+        return True
+    return False
+
+
+def remove_duplicate_matches(matching_lookup, properties, company_ownership):
+    duplicated_titles = matching_lookup[matching_lookup["Title Number"].duplicated()]["Title Number"].unique()
+
+    to_drop = []
+    for dupe_title in duplicated_titles:
+        dupe_data = matching_lookup[matching_lookup["Title Number"] == dupe_title].copy()
+        matched_addresses = dupe_data.merge(
+            properties[["UPRN", "ADDRESS"]].rename(columns={"ADDRESS": "epc_address"}),
+            how="left", on="UPRN"
+        ).merge(
+            company_ownership[["Title Number", "Property Address"]],
+            how="left", on="Title Number"
+        )
+        # We perform levenstein to get the best match
+        best_match = levenstein_match(
+            matching_string=matched_addresses["Property Address"].values[0],
+            df=matched_addresses,
+            address_col="epc_address"
+        )
+        matches_to_drop = matched_addresses[
+            ~matched_addresses["UPRN"].isin(best_match["UPRN"].values)
+        ]
+
+        to_drop.append(
+            matches_to_drop[["UPRN", "Title Number"]].copy()
+        )
+
+    to_drop = pd.concat(to_drop)
+
+    if not to_drop.empty:
+        merged = pd.merge(matching_lookup, to_drop, on=['UPRN', 'Title Number'], how='left', indicator=True)
+        merged[merged['_merge'] == 'left_only'].drop(columns=['_merge'])
+
+        return merged
+
+    return matching_lookup
 
 
 def app():
     """
     This script is for scoping property ownership for EPC F & G rated properties in Birmingam, for Goldman Sachs
     """
+    # paths = [
+    #     "local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E08000031-Wolverhampton/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E08000026-Coventry/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E06000016-Leicester/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E06000015-Derby/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E06000021-Stoke-on-Trent/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E06000018-Nottingham/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E07000154-Northampton/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E06000061-North-Northamptonshire/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E06000062-West-Northamptonshire/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E07000152-East-Northamptonshire/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E07000155-South-Northamptonshire/certificates.csv",
+    #     #
+    #     "local_data/all-domestic-certificates/domestic-E08000027-Dudley/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E08000029-Solihull/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E07000234-Bromsgrove/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E08000030-Walsall/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E08000028-Sandwell/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E06000019-Herefordshire-County-of/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E06000020-Telford-and-Wrekin/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E07000218-North-Warwickshire/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E07000222-Warwick/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E07000237-Worcester/certificates.csv",
+    #     # East midlands
+    #     "local_data/all-domestic-certificates/domestic-E07000035-Derbyshire-Dales/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E07000038-North-East-Derbyshire/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E07000039-South-Derbyshire/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E06000012-North-East-Lincolnshire/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E06000013-North-Lincolnshire/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E07000138-Lincoln/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E07000134-North-West-Leicestershire/certificates.csv",
+    #     "local_data/all-domestic-certificates/domestic-E06000017-Rutland/certificates.csv",
+    # ]
+    # paths = list(set(paths))
+    # find_f_g_properties(paths)
 
-    properties = pd.read_excel("Birmingham EPC F & G Properties.xlsx")
+    properties = pd.read_excel("EPC F & G Properties.xlsx")
     company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_04.csv")
+    company_ownership["is_overseas"] = False
+    overseas_company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/OCOD_FULL_2024_04 2.csv")
+    overseas_company_ownership["is_overseas"] = True
+
+    company_ownership = pd.concat([company_ownership, overseas_company_ownership])
+
     # FIlter on relevant postcodes
     company_ownership = company_ownership[
         company_ownership["Postcode"].str.lower().isin(properties["POSTCODE"].str.lower().unique())]
@@ -29,6 +250,10 @@ def app():
     # Now we filter properties the other way around
     properties = properties[properties["POSTCODE"].str.lower().isin(company_ownership["Postcode"].str.lower().unique())]
     # We end up with 7.4k entires on a postcode match, however we need to now do a direct address match
+    # Take just private rentals
+    properties = properties[
+        properties["TENURE"].isin(["rental (private)", "Rented (private)", "owner-occupied", "Owner-occupied"])
+    ]
 
     ignore_title_numbers = [
         "WM922695",  # Land at the back of 17 Plumstead Road, Birmingham (B44 0EA): relates to WM154788
@@ -36,22 +261,78 @@ def app():
         "WM44948",
     ]
     company_ownership = company_ownership[~company_ownership["Title Number"].isin(ignore_title_numbers)]
-    # Remove entries where the address begins with the term "land adjoining":
 
-    company_ownership = company_ownership[~company_ownership["Property Address"].str.startswith("land adjoining")]
+    # Remove entries where the address begins with the term "land adjoining", or other records that don't reference the
+    # the property itself
+    starting_terms = [
+        "land adjoining", "land on the", "land to the rear of", "land and buildings on the",
+        "garage adjoining", "car park adjoining", "the land adjoining", "land and buildings adjoining",
+        "all royal mines"
+    ]
+    for starting_term in starting_terms:
+        company_ownership = company_ownership[
+            ~company_ownership["Property Address"].str.lower().str.startswith()
+        ]
 
-    freehold_matching_lookup = []
-    leasehold_matching_lookup = []
+    biggest_ownership = (
+        company_ownership
+        .groupby(["Company Registration No. (1)", "Proprietor Name (1)"])["Title Number"]
+        .count()
+        .reset_index(name="n_owned_properties")
+    )
+    biggest_ownership = biggest_ownership.sort_values("n_owned_properties", ascending=False)
+
+    freehold_matching_lookup = []  # 634
+    leasehold_matching_lookup = []  # 86
     shared_leasehold_match = []
+    shared_freehold_match = []
     for _, address in tqdm(properties.iterrows(), total=len(properties)):
+        match_type = "exact"
         filtered = company_ownership[
             company_ownership["Postcode"].str.lower() == address["POSTCODE"].lower()
             ].copy()
 
-        filtered["house_number"] = filtered["Property Address"].apply(SearchEpc.get_house_number)
+        # Remove postcode and remove trailing commas
+        filtered["house_number"] = (
+            filtered["Property Address"]
+            .apply(remove_text_in_brackets)
+            .apply(SearchEpc.get_house_number)
+            .str.lower()
+            .str.replace(",", "")
+        )
         house_no = SearchEpc.get_house_number(address["ADDRESS1"])
+        if house_no is not None:
+            house_no = house_no.replace(",", "")
 
-        filtered = filtered[filtered["house_number"] == house_no]
+        if house_no is None:
+            # It's hard for us to get a reliable match
+            # filtered = filtered[filtered["Property Address"].str.contains(address["ADDRESS1"])]
+            # if filtered.shape[0] > 1:
+            #     raise Exception("No valid - maybe we should do levenstein?")
+            continue
+
+        else:
+
+            if house_no not in filtered["house_number"].values:
+                # If this happens, we check house_number for a x-y range of addresses
+                filtered["house_number_range"] = filtered["house_number"].apply(extract_range_from_house_number)
+                # If we have found a house number range, we check if the house number is in the range and if not,
+                # we drop the row
+                filtered['is_in_range'] = filtered['house_number_range'].apply(lambda x: is_in_range(x, house_no))
+
+                if filtered['is_in_range'].any():
+                    # If house_no is found in any range, keep only rows where it is in range
+                    filtered = filtered[filtered['is_in_range']]
+                else:
+                    # If house_no is not found in any range, filter out rows where 'house_number_range' is not None
+                    filtered = filtered[filtered['house_number_range'].isnull()]
+
+                # Strip out letters from house_no and house_number
+                house_no = extract_numeric_part(house_no)
+                filtered["house_number"] = filtered["house_number"].astype(str).apply(extract_numeric_part)
+                match_type = "approximate"
+
+            filtered = filtered[filtered["house_number"] == house_no]
 
         if filtered.empty:
             continue
@@ -60,7 +341,17 @@ def app():
         filtered_leasehold = filtered[filtered["Tenure"] == "Leasehold"]
 
         if filtered_freehold.shape[0] > 1:
-            raise ValueError("Multiple freehold matches")
+            matched = filtered_leasehold[["Title Number"]].copy()
+            matched.insert(0, "UPRN", address["UPRN"])
+            shared_freehold_match.append(matched)
+        elif not filtered_freehold.empty:
+            freehold_matching_lookup.append(
+                {
+                    "UPRN": address["UPRN"],
+                    "Title Number": filtered_freehold["Title Number"].values[0],
+                    "match_type": match_type,
+                }
+            )
 
         if filtered_leasehold.shape[0] > 1:
             matched = filtered_leasehold[["Title Number"]].copy()
@@ -70,20 +361,52 @@ def app():
             leasehold_matching_lookup.append(
                 {
                     "UPRN": address["UPRN"],
-                    "Title Number": filtered_leasehold["Title Number"].values[0]
-                }
-            )
-
-        if not filtered_freehold.empty:
-            freehold_matching_lookup.append(
-                {
-                    "UPRN": address["UPRN"],
-                    "Title Number": filtered_freehold["Title Number"].values[0]
+                    "Title Number": filtered_leasehold["Title Number"].values[0],
+                    "match_type": match_type,
                 }
             )
 
     freehold_matching_lookup = pd.DataFrame(freehold_matching_lookup)
     leasehold_matching_lookup = pd.DataFrame(leasehold_matching_lookup)
+    shared_leasehold_match = pd.concat(shared_leasehold_match)
 
-    freehold_aggregate = aggregate_matches(freehold_matching_lookup, company_ownership)
-    leasehold_aggregate = aggregate_matches(leasehold_matching_lookup, company_ownership)
+    # The approximate matches aren't very good
+    freehold_matching_lookup = freehold_matching_lookup[freehold_matching_lookup["match_type"] == "exact"]
+    leasehold_matching_lookup = leasehold_matching_lookup[leasehold_matching_lookup["match_type"] == "exact"]
+
+    # There are some cases where we have duplicates
+    freehold_matching_lookup = remove_duplicate_matches(freehold_matching_lookup, properties, company_ownership)
+    leasehold_matching_lookup = remove_duplicate_matches(leasehold_matching_lookup, properties, company_ownership)
+
+    matched_addresses = freehold_matching_lookup.merge(
+        properties[["UPRN", "ADDRESS"]].rename(columns={"ADDRESS": "epc_address"}),
+        how="left", on="UPRN"
+    ).merge(
+        company_ownership[["Title Number", "Property Address"]],
+        how="left", on="Title Number"
+    )
+
+    # shared_freehold_match = pd.DataFrame(shared_freehold_match)
+    # Strore these files
+    freehold_matching_lookup.to_excel("freehold_matching_lookup.xlsx")
+    leasehold_matching_lookup.to_excel("leasehold_matching_lookup.xlsx")
+    shared_leasehold_match.to_excel("shared_leasehold_match.xlsx")
+    # shared_freehold_match.to_excel("shared_freehold_match.xlsx")
+
+    freehold_aggregate = aggregate_matches(freehold_matching_lookup, company_ownership, properties)
+    leasehold_aggregate = aggregate_matches(leasehold_matching_lookup, company_ownership, properties)
+
+    combined_aggregate = aggregate_matches(
+        pd.concat([freehold_matching_lookup, leasehold_matching_lookup]), company_ownership, properties
+    )
+
+    investment_20m = combined_aggregate[combined_aggregate["cumulative_value"] <= 20_500_000]
+    investment_50m = combined_aggregate[combined_aggregate["cumulative_value"] <= 51_000_000]
+
+    z = company_ownership[
+        (company_ownership["Company Registration No. (1)"] == freehold_aggregate["Company Registration No. (1)"].values[
+            0]) &
+        (company_ownership["Title Number"].isin(freehold_matching_lookup["Title Number"].values))
+        ]
+
+    df = freehold_matching_lookup.merge(company_ownership, how="left", on="Title Number")
diff --git a/etl/customers/vander_elliot/__init__.py b/etl/customers/vander_elliot/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/etl/customers/vander_elliot/single_property_pilot.py b/etl/customers/vander_elliot/single_property_pilot.py
new file mode 100644
index 00000000..99624dfc
--- /dev/null
+++ b/etl/customers/vander_elliot/single_property_pilot.py
@@ -0,0 +1,56 @@
+import pandas as pd
+from utils.s3 import read_excel_from_s3
+from utils.s3 import save_csv_to_s3
+
+PORTFOLIO_ID = 77
+USER_ID = 8
+
+patches = [
+    {
+        "address": "79 Perryn Road",
+        "postcode": "W3 7LT",
+        "roof-description": "Pitched, no insulation (assumed)"
+    }
+]
+
+
+def app():
+    asset_list = [
+        {
+            'uprn': 12103117,
+            "address": "79 Perryn Road",
+            "postcode": "W3 7LT",
+        },
+
+    ]
+
+    asset_list = pd.DataFrame(asset_list)
+
+    # Store the asset list in s3
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
+    save_csv_to_s3(
+        dataframe=asset_list,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    # Store patches in s3
+    patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.json"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(patches),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=patches_filename
+    )
+
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increase EPC",
+        "goal_value": "B",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": patches_filename,
+        "non_invasive_recommendations_file_path": "",
+        "budget": None,
+    }
+    print(body)
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index b197d817..b42a9d5b 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -93,13 +93,13 @@ class HeatingRecommender:
         # In the future, we'll allow overrides, so that non-intrusive surveys can contradict these conditions
         # and either allow or prevent the recommendation of an air source heat pump
 
-        suitable_property_types = self.property.data["property-type"] in ["House", "Bungalow"]
-        has_air_source_heat_pump = self.property.main_heating["has_air_source_heat_pump"]
-
-        if suitable_property_types and not has_air_source_heat_pump:
-            self.recommend_air_source_heat_pump(
-                phase=phase, has_cavity_and_loft_recommendations=has_cavity_and_loft_recommendations
-            )
+        # suitable_property_types = self.property.data["property-type"] in ["House", "Bungalow"]
+        # has_air_source_heat_pump = self.property.main_heating["has_air_source_heat_pump"]
+        #
+        # if suitable_property_types and not has_air_source_heat_pump:
+        #     self.recommend_air_source_heat_pump(
+        #         phase=phase, has_cavity_and_loft_recommendations=has_cavity_and_loft_recommendations
+        #     )
 
         return
 
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 06dc2d61..1a6d7a1c 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -109,7 +109,7 @@ class Recommendations:
 
         # Heating and Electical systems
         if "heating" not in self.exclusions:
-            self.heating_recommender.recommend(phase=phase)
+            self.heating_recommender.recommend(phase=phase, has_cavity_and_loft_recommendations=None)
             if (
                 self.heating_recommender.heating_recommendations or
                 self.heating_recommender.heating_control_recommendations
diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index b44557ab..58d4b123 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -44,7 +44,7 @@ class SolarPvRecommendations:
         :return:
         """
 
-        is_valid_property_type = self.property.data["property-type"] in ["House", "Bungalow"]
+        is_valid_property_type = self.property.data["property-type"] in ["House", "Bungalow", "Maisonette"]
         is_valid_roof_type = (
             self.property.roof["is_flat"] or self.property.roof["is_pitched"] or self.property.roof["is_roof_room"]
         )

From 9f9799cfa8a65a2714a91bd47a68dc57538758d0 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 3 May 2024 13:35:49 +0100
Subject: [PATCH 27/58] finishing property ownership|

---
 etl/customers/goldman/property_ownership.py | 27 +++------------------
 1 file changed, 4 insertions(+), 23 deletions(-)

diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py
index abc2645d..89e7c976 100644
--- a/etl/customers/goldman/property_ownership.py
+++ b/etl/customers/goldman/property_ownership.py
@@ -255,13 +255,6 @@ def app():
         properties["TENURE"].isin(["rental (private)", "Rented (private)", "owner-occupied", "Owner-occupied"])
     ]
 
-    ignore_title_numbers = [
-        "WM922695",  # Land at the back of 17 Plumstead Road, Birmingham (B44 0EA): relates to WM154788
-        "WM426374",  # land on the south side of 15 Carlyle Road, Edgbaston, Birmingham (B16 9BH): relates to WM537591
-        "WM44948",
-    ]
-    company_ownership = company_ownership[~company_ownership["Title Number"].isin(ignore_title_numbers)]
-
     # Remove entries where the address begins with the term "land adjoining", or other records that don't reference the
     # the property itself
     starting_terms = [
@@ -271,17 +264,9 @@ def app():
     ]
     for starting_term in starting_terms:
         company_ownership = company_ownership[
-            ~company_ownership["Property Address"].str.lower().str.startswith()
+            ~company_ownership["Property Address"].str.lower().str.startswith(starting_term)
         ]
 
-    biggest_ownership = (
-        company_ownership
-        .groupby(["Company Registration No. (1)", "Proprietor Name (1)"])["Title Number"]
-        .count()
-        .reset_index(name="n_owned_properties")
-    )
-    biggest_ownership = biggest_ownership.sort_values("n_owned_properties", ascending=False)
-
     freehold_matching_lookup = []  # 634
     leasehold_matching_lookup = []  # 86
     shared_leasehold_match = []
@@ -400,13 +385,9 @@ def app():
         pd.concat([freehold_matching_lookup, leasehold_matching_lookup]), company_ownership, properties
     )
 
+    df = pd.concat([freehold_matching_lookup, leasehold_matching_lookup])
+
     investment_20m = combined_aggregate[combined_aggregate["cumulative_value"] <= 20_500_000]
     investment_50m = combined_aggregate[combined_aggregate["cumulative_value"] <= 51_000_000]
 
-    z = company_ownership[
-        (company_ownership["Company Registration No. (1)"] == freehold_aggregate["Company Registration No. (1)"].values[
-            0]) &
-        (company_ownership["Title Number"].isin(freehold_matching_lookup["Title Number"].values))
-        ]
-
-    df = freehold_matching_lookup.merge(company_ownership, how="left", on="Title Number")
+    properties["WALLS_DESCRIPTION"].value_counts(normalize=True)

From 7ec795f5bb247d5a441501e64b5e4a9b61a0d53d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 3 May 2024 15:46:03 +0100
Subject: [PATCH 28/58] completing ashp recommendations

---
 .idea/Model.iml                       |  2 +-
 .idea/misc.xml                        |  2 +-
 recommendations/Costs.py              | 48 ++++++++++++++++++---------
 recommendations/HeatingRecommender.py | 30 +++++++++--------
 recommendations/Recommendations.py    | 11 +++++-
 5 files changed, 60 insertions(+), 33 deletions(-)

diff --git a/.idea/Model.iml b/.idea/Model.iml
index b0f9c00d..4413bb06 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 1122b380..6f308057 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/recommendations/Costs.py b/recommendations/Costs.py
index 113bb6f8..fd3c1692 100644
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@@ -37,21 +37,22 @@ MCS_SOLAR_PV_COST_DATA = {
     "average_cost_per_kwh-Northern Ireland": 2126.09,
 }
 
-# This data is based on the MCS database
+# This data is based on the MCS database, We use the larger figure between the 2023 and 2024 average,
+# to be conservative
 MCS_AIR_SOURCE_HEAT_PUMP_COST_DATA = {
-    "Outer London": None,
-    "Inner London": None,
-    "South East England": None,
-    "South West England": None,
-    "East of England": None,
-    "East Midlands": None,
-    "West Midlands": None,
-    "North East England": None,
-    "North West England": None,
-    "Yorkshire and the Humber": None,
-    "Wales": None,
-    "Scotland": None,
-    "Northern Ireland": None,
+    "Outer London": 13220,
+    "Inner London": 13220,
+    "South East England": 13547,
+    "South West England": 12776,
+    "East of England": 12585,
+    "East Midlands": 12239,
+    "West Midlands": 13182,
+    "North East England": 11829,
+    "North West England": 11714,
+    "Yorkshire and the Humber": 11919,
+    "Wales": 13701,
+    "Scotland": 12586,
+    "Northern Ireland": 12000,  # There are hardly any air source heat pump installs going on in Northern Ireland
 }
 BOILER_UPGRADE_SCHEME_ASHP_VALUE = 7500
 
@@ -1264,8 +1265,23 @@ class Costs:
         Based on the region and type of property, this function will produce a cost estimation for an air source heat
         pump. This cost will include the boiler upgrade scheme grant
 
-        :return:
         """
 
+        # This is the average cost of a project, we'll add some additional contingency
         regional_cost = MCS_AIR_SOURCE_HEAT_PUMP_COST_DATA[self.region]
-        pass
+
+        total_cost = regional_cost * (1 + self.CONTINGENCY) - BOILER_UPGRADE_SCHEME_ASHP_VALUE
+        subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
+        vat = total_cost - subtotal_before_vat
+
+        # We assume 3 days installation
+        labour_days = 3
+        labour_hours = labour_days * 8
+
+        return {
+            "total": total_cost,
+            "subtotal": subtotal_before_vat,
+            "vat": vat,
+            "labour_hours": labour_hours,
+            "labour_days": labour_days,
+        }
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index b42a9d5b..a51803f2 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -16,10 +16,10 @@ class HeatingRecommender:
         self.heating_recommendations = []
         self.heating_control_recommendations = []
 
-    def recommend(self, has_cavity_and_loft_recommendations, phase=0):
+    def recommend(self, has_cavity_or_loft_recommendations, phase=0):
         """
         Produces heating recommendations
-        :param has_cavity_and_loft_recommendations: boolean indicating if we have produced a cavity or loft insulation
+        :param has_cavity_or_loft_recommendations: boolean indicating if we have produced a cavity or loft insulation
         recommendation. If there are cavity or loft recommendations, the property would need to complete those measures
         before being able to get the boiler upgrade scheme benefits. The messaging in the front end would be to
         :param phase: indicates the phase of the retrofit programme
@@ -93,17 +93,17 @@ class HeatingRecommender:
         # In the future, we'll allow overrides, so that non-intrusive surveys can contradict these conditions
         # and either allow or prevent the recommendation of an air source heat pump
 
-        # suitable_property_types = self.property.data["property-type"] in ["House", "Bungalow"]
-        # has_air_source_heat_pump = self.property.main_heating["has_air_source_heat_pump"]
-        #
-        # if suitable_property_types and not has_air_source_heat_pump:
-        #     self.recommend_air_source_heat_pump(
-        #         phase=phase, has_cavity_and_loft_recommendations=has_cavity_and_loft_recommendations
-        #     )
+        suitable_property_type = self.property.data["property-type"] in ["House", "Bungalow"]
+        has_air_source_heat_pump = self.property.main_heating["has_air_source_heat_pump"]
+
+        if suitable_property_type and not has_air_source_heat_pump:
+            self.recommend_air_source_heat_pump(
+                phase=phase, has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations
+            )
 
         return
 
-    def recommend_air_source_heat_pump(self, phase, has_cavity_and_loft_recommendations):
+    def recommend_air_source_heat_pump(self, phase, has_cavity_or_loft_recommendations):
         """
         This method will implement the recommendation for an air source heat pump
         This is ultimately an overhaul to the heating system and so is recommended as an alternative to other
@@ -127,20 +127,20 @@ class HeatingRecommender:
         else:
             if controls_recommender.recommendation:
                 description = ("Install an air source heat pump, and upgrade heating controls to Smart Thermostats, "
-                               "room sensors and smart radiator valves (time & temperature zone control) ")
+                               "room sensors and smart radiator valves (time & temperature zone control).")
             else:
                 description = "Install an air source heat pump."
 
             # If the property does not have existing cavity and loft insulation, we include a note that the cost
             # includes the boiler upgrade scheme and that the cavity and loft need to be treated, to ensure access
             # to the funding
-            if has_cavity_and_loft_recommendations:
-                description = description + (f" The cost of works includes the £"
+            if has_cavity_or_loft_recommendations:
+                description = description + (f" The cost includes the £"
                                              f"{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant. "
                                              f"You must ensure that the property has an insulated cavity and "
                                              f"270mm+ loft insulation to qualify for the grant")
             else:
-                description = description + (f" The cost of works includes the £"
+                description = description + (f" The cost includes the £"
                                              f"{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant")
 
         simulation_config = {
@@ -178,6 +178,8 @@ class HeatingRecommender:
         if controls_recommender.recommendation:
             # We should have just the single recommendation for heat controls, which is time
             # and temperature zone controls
+            if len(controls_recommender.recommendation) != 1:
+                raise NotImplementedError("More than one heat controls recommendation for air source heat pump")
             simulation_config = {
                 **simulation_config,
                 **controls_recommender.recommendation[0]["simulation_config"]
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 1a6d7a1c..0942ab12 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -109,7 +109,16 @@ class Recommendations:
 
         # Heating and Electical systems
         if "heating" not in self.exclusions:
-            self.heating_recommender.recommend(phase=phase, has_cavity_and_loft_recommendations=None)
+
+            cavity_or_loft_recommendations = [
+                r for r in self.wall_recomender.recommendations + self.roof_recommender.recommendations
+                if r["type"] in ["cavity_wall_insulation", "loft_insulation"]
+            ]
+            has_cavity_or_loft_recommendations = len(cavity_or_loft_recommendations) > 0
+
+            self.heating_recommender.recommend(
+                phase=phase, has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations
+            )
             if (
                 self.heating_recommender.heating_recommendations or
                 self.heating_recommender.heating_control_recommendations

From f21221d721049444c82bce084199421aab19ce23 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 3 May 2024 16:08:14 +0100
Subject: [PATCH 29/58] working on ashp recommendations

---
 recommendations/Recommendations.py | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 0942ab12..c8113cdc 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -123,11 +123,28 @@ class Recommendations:
                 self.heating_recommender.heating_recommendations or
                 self.heating_recommender.heating_control_recommendations
             ):
-                if self.heating_recommender.heating_recommendations:
-                    property_recommendations.append(self.heating_recommender.heating_recommendations)
 
-                if self.heating_recommender.heating_control_recommendations:
-                    property_recommendations.append(self.heating_recommender.heating_control_recommendations)
+                # We split into first and second phase recommendations
+                first_phase_recommendations = [
+                    r for r in (
+                        self.heating_recommender.heating_recommendations +
+                        self.heating_recommender.heating_control_recommendations
+                    )
+                    if r["phase"] == phase
+                ]
+                second_phase_recommendations = [
+                    r for r in (
+                        self.heating_recommender.heating_recommendations +
+                        self.heating_recommender.heating_control_recommendations
+                    )
+                    if r["phase"] == phase + 1
+                ]
+
+                if first_phase_recommendations:
+                    property_recommendations.append(first_phase_recommendations)
+
+                if second_phase_recommendations:
+                    property_recommendations.append(second_phase_recommendations)
 
                 # We check if we have distinct heating and heating controls recommendations
                 # If so, we increment by 2 (one of the heating system, one for the heating controls)

From 56472f201e9dee48d8fa31b9dced73acc7fcc37d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 7 May 2024 16:56:14 +0100
Subject: [PATCH 30/58] Added ashp unit tests

---
 .idea/Model.iml                               |   2 +-
 .idea/misc.xml                                |   2 +-
 etl/customers/goldman/property_ownership.py   |  14 +
 .../tests/test_air_source_heat_pump.py        | 867 ++++++++++++++++++
 4 files changed, 883 insertions(+), 2 deletions(-)

diff --git a/.idea/Model.iml b/.idea/Model.iml
index 4413bb06..b0f9c00d 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 6f308057..1122b380 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py
index 89e7c976..24922f68 100644
--- a/etl/customers/goldman/property_ownership.py
+++ b/etl/customers/goldman/property_ownership.py
@@ -391,3 +391,17 @@ def app():
     investment_50m = combined_aggregate[combined_aggregate["cumulative_value"] <= 51_000_000]
 
     properties["WALLS_DESCRIPTION"].value_counts(normalize=True)
+
+
+def company_aggregation():
+    company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_04.csv")
+    aggregation = (
+        company_ownership
+        .groupby(["Proprietor Name (1)", "Company Registration No. (1)"])
+        ["Property Address"]
+        .count()
+        .reset_index(name="Number of Properties")
+    )
+    aggregation = aggregation.sort_values("Number of Properties", ascending=False)
+
+    aggregation.to_excel("Company ownership aggregation.xlsx")
diff --git a/recommendations/tests/test_air_source_heat_pump.py b/recommendations/tests/test_air_source_heat_pump.py
index d80afc6e..0d69b10d 100644
--- a/recommendations/tests/test_air_source_heat_pump.py
+++ b/recommendations/tests/test_air_source_heat_pump.py
@@ -1,6 +1,154 @@
+import pandas as pd
+import msgpack
+from datetime import datetime
+
+from utils.s3 import read_dataframe_from_s3_parquet, read_from_s3
 from backend.Property import Property
 from recommendations.HeatingRecommender import HeatingRecommender
+from recommendations.Recommendations import Recommendations
 from etl.epc.Record import EPCRecord
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+from backend.ml_models.api import ModelApi
+
+
+def find_examples():
+    """ Some scrappy helper code to find EPC examples"""
+    # Let's look for some testing data, where the only thing different pre and post is the installation of an
+    # air source heat pump
+    data = read_dataframe_from_s3_parquet(
+        bucket_name="retrofit-data-dev",
+        file_key="sap_change_model/2024-03-24-15-51-13/dataset_no_cleaning.parquet"
+    )
+
+    # Firstly, take records where before there was no air source heat pump and afterwards there was
+    data = data[
+        data["has_air_source_heat_pump_ending"] & ~data["has_air_source_heat_pump"]
+        ]
+
+    # Start with a property that has a boiler
+    data = data[data["has_boiler"]]
+
+    static_columns = [
+        # Walls
+        'walls_thermal_transmittance_ending',
+        'is_filled_cavity_ending',
+        'is_park_home_ending',
+        'walls_insulation_thickness_ending',
+        'external_insulation_ending',
+        'internal_insulation_ending',
+        # Floors
+        # 'floor_thermal_transmittance_ending',  # Don't subset on this, because it changes based on floor area
+        'floor_insulation_thickness_ending',
+        # Roof
+        'roof_thermal_transmittance_ending',
+        'is_at_rafters_ending',
+        'roof_insulation_thickness_ending',
+        # Hot water - air source heat pump will shange the hot water system (probably from whatever it was -> main)
+        # 'heater_type_ending',
+        # 'system_type_ending',
+        # 'thermostat_characteristics_ending',
+        # 'heating_scope_ending',
+        # 'energy_recovery_ending',
+        # 'hotwater_tariff_type_ending',
+        # 'extra_features_ending',
+        # 'chp_systems_ending',
+        # 'distribution_system_ending',
+        # 'no_system_present_ending',
+        # 'appliance_ending',
+        # Heating - Will change when installing an ASHP
+        # 'has_radiators_ending',
+        # 'has_fan_coil_units_ending',
+        # 'has_pipes_in_screed_above_insulation_ending',
+        # 'has_pipes_in_insulated_timber_floor_ending',
+        # 'has_pipes_in_concrete_slab_ending',
+        # 'has_boiler_ending',
+        # 'has_air_source_heat_pump_ending',  # We want the air source heat pump to change
+        # 'has_room_heaters_ending',
+        # 'has_electric_storage_heaters_ending',
+        # 'has_warm_air_ending',
+        # 'has_electric_underfloor_heating_ending',
+        # 'has_electric_ceiling_heating_ending',
+        # 'has_community_scheme_ending',
+        # 'has_ground_source_heat_pump_ending',
+        # 'has_no_system_present_ending',
+        # 'has_portable_electric_heaters_ending',
+        # 'has_water_source_heat_pump_ending',
+        # 'has_electric_heat_pump_ending',
+        # 'has_micro-cogeneration_ending',
+        # 'has_solar_assisted_heat_pump_ending',
+        # 'has_exhaust_source_heat_pump_ending',
+        # 'has_community_heat_pump_ending',
+        # 'has_electric_ending',
+        # 'has_mains_gas_ending',
+        # 'has_wood_logs_ending', 'has_coal_ending', 'has_oil_ending',
+        # 'has_wood_pellets_ending', 'has_anthracite_ending', 'has_dual_fuel_mineral_and_wood_ending',
+        # 'has_smokeless_fuel_ending', 'has_lpg_ending', 'has_b30k_ending', 'has_electricaire_ending',
+        # 'has_assumed_for_most_rooms_ending', 'has_underfloor_heating_ending',
+        # 'thermostatic_control_ending',
+        # 'charging_system_ending',
+        # 'switch_system_ending',
+        # 'no_control_ending',
+        # 'dhw_control_ending',
+        # 'community_heating_ending',
+        # 'multiple_room_thermostats_ending',
+        # 'auxiliary_systems_ending',
+        # 'trvs_ending',
+        # 'rate_control_ending',
+        # Window
+        'glazing_type_ending',
+        # Fuel - could change with ASHP
+        # 'fuel_type_ending',
+        # 'main-fuel_tariff_type_ending',
+        # 'is_community_ending',
+        # 'no_individual_heating_or_community_network_ending',
+        # 'complex_fuel_type_ending',
+
+        'mechanical_ventilation_ending', 'secondheat_description_ending', 'glazed_type_ending',
+        'multi_glaze_proportion_ending', 'low_energy_lighting_ending', 'number_open_fireplaces_ending',
+        'solar_water_heating_flag_ending',
+        'photo_supply_ending',
+        'energy_tariff_ending',
+        'extension_count_ending',
+        'total_floor_area_ending',
+        # 'hot_water_energy_eff_ending',
+        'floor_energy_eff_ending',
+        'windows_energy_eff_ending',
+        'walls_energy_eff_ending',
+        'sheating_energy_eff_ending',
+        'roof_energy_eff_ending',
+        # 'mainheat_energy_eff_ending',
+        # 'mainheatc_energy_eff_ending',
+        'lighting_energy_eff_ending',
+        'number_habitable_rooms_ending',
+        'number_heated_rooms_ending',
+    ]
+
+    for col in static_columns:
+
+        base_starting = col.split("_ending")[0]
+        if base_starting + "_starting" in data.columns:
+            starting_col = base_starting + "_starting"
+        else:
+            starting_col = base_starting
+        # Filter
+        print("Column: %s" % col)
+        print("Starting size: %s" % data.shape[0])
+        data = data[data[starting_col] == data[col]]
+        print("Ending size: %s" % data.shape[0])
+
+        z = data[['uprn', col, starting_col]]
+
+    # Great example UPRNs
+    # 100030969273
+    # 10034685399 - Completely transforms the heating and hot water systems in the home (goes from oil -> electricity)
+    # 100091200828 - goes from a liquid petroleum gas boiler to ashp
+
+    # Look for starting with a gas boiler
+    data[
+        data["has_boiler"] & data["has_radiators"] & data["has_mains_gas"] & ~data["has_boiler_ending"]
+        ]
+
+    # UPRN: 100011776843
 
 
 class TestAirSourceHeatPump:
@@ -75,3 +223,722 @@ class TestAirSourceHeatPump:
         recommender.recommend(phase=0)
 
         assert recommender.recommendation is None
+
+    def test_air_source_heat_pump_gas_boiler_starting(self):
+        starting_epc = {
+            'low-energy-fixed-light-count': '', 'address': '430 Gidlow Lane', 'uprn-source': 'Energy Assessor',
+            'floor-height': '2.62', 'heating-cost-potential': '599', 'unheated-corridor-length': '',
+            'hot-water-cost-potential': '67', 'construction-age-band': 'England and Wales: 1950-1966',
+            'potential-energy-rating': 'C', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Good',
+            'lighting-energy-eff': 'Very Good', 'environment-impact-potential': '72',
+            'glazed-type': 'double glazing installed during or after 2002', 'heating-cost-current': '913',
+            'address3': '', 'mainheatcont-description': 'Programmer, no room thermostat', 'sheating-energy-eff': 'N/A',
+            'property-type': 'House', 'local-authority-label': 'Wigan', 'fixed-lighting-outlets-count': '9',
+            'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '210',
+            'county': '', 'postcode': 'WN6 8RG', 'solar-water-heating-flag': 'N', 'constituency': 'E14001039',
+            'co2-emissions-potential': '2.6', 'number-heated-rooms': '4',
+            'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '180',
+            'local-authority': 'E08000010', 'built-form': 'Mid-Terrace', 'number-open-fireplaces': '0',
+            'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2022-02-15',
+            'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '78', 'address1': '430 Gidlow Lane',
+            'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Wigan',
+            'roof-energy-eff': 'Very Poor', 'total-floor-area': '80.0', 'building-reference-number': '10002334112',
+            'environment-impact-current': '38', 'co2-emissions-current': '6.2',
+            'roof-description': 'Pitched, no insulation (assumed)', 'floor-energy-eff': 'N/A',
+            'number-habitable-rooms': '4', 'address2': '', 'hot-water-env-eff': 'Poor', 'posttown': 'WIGAN',
+            'mainheatc-energy-eff': 'Very Poor', 'main-fuel': 'mains gas (not community)',
+            'lighting-env-eff': 'Very Good', 'windows-energy-eff': 'Good', 'floor-env-eff': 'N/A',
+            'sheating-env-eff': 'N/A', 'lighting-description': 'Low energy lighting in all fixed outlets',
+            'roof-env-eff': 'Very Poor', 'walls-energy-eff': 'Average', 'photo-supply': '0.0',
+            'lighting-cost-potential': '67', 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100',
+            'main-heating-controls': '', 'lodgement-datetime': '2022-02-23 16:39:41', 'flat-top-storey': '',
+            'current-energy-rating': 'E', 'secondheat-description': 'Room heaters, mains gas',
+            'walls-env-eff': 'Average', 'transaction-type': 'ECO assessment', 'uprn': '100011776843',
+            'current-energy-efficiency': '45', 'energy-consumption-current': '441',
+            'mainheat-description': 'Boiler and radiators, mains gas', 'lighting-cost-current': '67',
+            'lodgement-date': '2022-02-23', 'extension-count': '1', 'mainheatc-env-eff': 'Very Poor',
+            'lmk-key': '46cb404438a6d88ddff8965cab8b3027ec15c32d93e0b6a5f0381a5109b9bb0d', 'wind-turbine-count': '0',
+            'tenure': 'Owner-occupied', 'floor-level': '', 'potential-energy-efficiency': '77',
+            'hot-water-energy-eff': 'Poor', 'low-energy-lighting': '100',
+            'walls-description': 'Cavity wall, filled cavity',
+            'hotwater-description': 'From main system, no cylinder thermostat'
+        }
+
+        ending_epc = {
+            'low-energy-fixed-light-count': '', 'address': '430 Gidlow Lane', 'uprn-source': 'Energy Assessor',
+            'floor-height': '2.62', 'heating-cost-potential': '803', 'unheated-corridor-length': '',
+            'hot-water-cost-potential': '292', 'construction-age-band': 'England and Wales: 1950-1966',
+            'potential-energy-rating': 'C', 'mainheat-energy-eff': 'Very Good', 'windows-env-eff': 'Good',
+            'lighting-energy-eff': 'Very Good', 'environment-impact-potential': '78',
+            'glazed-type': 'double glazing installed during or after 2002', 'heating-cost-current': '861',
+            'address3': '', 'mainheatcont-description': 'Time and temperature zone control',
+            'sheating-energy-eff': 'N/A', 'property-type': 'House', 'local-authority-label': 'Wigan',
+            'fixed-lighting-outlets-count': '9', 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural',
+            'hot-water-cost-current': '434', 'county': '', 'postcode': 'WN6 8RG', 'solar-water-heating-flag': 'N',
+            'constituency': 'E14001039', 'co2-emissions-potential': '2.0', 'number-heated-rooms': '4',
+            'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '147',
+            'local-authority': 'E08000010', 'built-form': 'Mid-Terrace', 'number-open-fireplaces': '0',
+            'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2022-05-11',
+            'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '43', 'address1': '430 Gidlow Lane',
+            'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Wigan',
+            'roof-energy-eff': 'Very Poor', 'total-floor-area': '80.0', 'building-reference-number': '10002334112',
+            'environment-impact-current': '63', 'co2-emissions-current': '3.4',
+            'roof-description': 'Pitched, no insulation (assumed)', 'floor-energy-eff': 'N/A',
+            'number-habitable-rooms': '4', 'address2': '', 'hot-water-env-eff': 'Poor', 'posttown': 'WIGAN',
+            'mainheatc-energy-eff': 'Very Good', 'main-fuel': 'electricity (not community)',
+            'lighting-env-eff': 'Very Good', 'windows-energy-eff': 'Good', 'floor-env-eff': 'N/A',
+            'sheating-env-eff': 'N/A', 'lighting-description': 'Low energy lighting in all fixed outlets',
+            'roof-env-eff': 'Very Poor', 'walls-energy-eff': 'Average', 'photo-supply': '0.0',
+            'lighting-cost-potential': '67', 'mainheat-env-eff': 'Very Good', 'multi-glaze-proportion': '100',
+            'main-heating-controls': '', 'lodgement-datetime': '2022-06-06 13:01:20', 'flat-top-storey': '',
+            'current-energy-rating': 'E', 'secondheat-description': 'Room heaters, mains gas',
+            'walls-env-eff': 'Average', 'transaction-type': 'ECO assessment', 'uprn': '100011776843',
+            'current-energy-efficiency': '53', 'energy-consumption-current': '252',
+            'mainheat-description': 'Air source heat pump, radiators, electric', 'lighting-cost-current': '67',
+            'lodgement-date': '2022-06-06', 'extension-count': '1', 'mainheatc-env-eff': 'Very Good',
+            'lmk-key': '672d5947f3d4a55d97255af71651d6127a939418fa66a687070af77e0ba90df2', 'wind-turbine-count': '0',
+            'tenure': 'Owner-occupied', 'floor-level': '', 'potential-energy-efficiency': '70',
+            'hot-water-energy-eff': 'Very Poor', 'low-energy-lighting': '100',
+            'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system'
+        }
+
+        # differences = []
+        # for k, v in ending_epc.items():
+        #     if v != starting_epc[k]:
+        #         differences.append(
+        #             {
+        #                 "variable": k,
+        #                 "starting_value": starting_epc[k],
+        #                 "ending_value": v
+        #             }
+        #         )
+        # differences = pd.DataFrame(differences)
+        #
+        # diffs = differences[
+        #     differences["variable"].isin(
+        #         [
+        #             "mainheat-energy-eff",
+        #             "mainheatcont-description",
+        #             "mainheatc-energy-eff",
+        #             "main-fuel",
+        #             "mainheat-env-eff",
+        #             "mainheat-description",
+        #             "hot-water-energy-eff",
+        #             "hotwater-description"
+        #         ]
+        #     )
+        # ]
+
+        cleaning_data = read_dataframe_from_s3_parquet(
+            bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+        )
+
+        cleaned = read_from_s3(
+            s3_file_name="cleaned_epc_data/cleaned.bson",
+            bucket_name="retrofit-data-dev"
+        )
+        cleaned = msgpack.unpackb(cleaned, raw=False)
+
+        photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+        epc = EPCRecord(
+            epc_records={
+                'original_epc': starting_epc,
+                'full_sap_epc': {},
+                'old_data': []
+            },
+            run_mode="newdata",
+            cleaning_data=cleaning_data
+        )
+
+        home = Property(
+            id=0,
+            address="",
+            postcode="",
+            epc_record=epc,
+            already_installed={},
+            non_invasive_recommendations={},
+        )
+        home.in_conservation_area = False
+        home.is_listed = False
+        home.is_heritage = False
+        home.restricted_measures = True
+        home.get_components(
+            cleaned=cleaned,
+            photo_supply_lookup=photo_supply_lookup,
+            floor_area_decile_thresholds=floor_area_decile_thresholds
+        )
+
+        recommender = HeatingRecommender(property_instance=home)
+        recommender.recommend_air_source_heat_pump(phase=0, has_cavity_or_loft_recommendations=False)
+
+        # Patch - for this property, the hot water energy efficiency is very poor. it's not clear why this is,
+        # but we insert this for this test
+        recommender.heating_recommendations[0]["simulation_config"]["hot_water_energy_eff_ending"] = "Very Poor"
+
+        property_recommendations = Recommendations.insert_temp_recommendation_id([recommender.heating_recommendations])
+
+        assert len(recommender.heating_recommendations) == 1
+
+        home.create_base_difference_epc_record(cleaned_lookup=cleaned)
+        home.adjust_difference_record_with_recommendations(
+            property_recommendations, []
+        )
+
+        scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop(
+            columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+                     "carbon_ending"]
+        )
+
+        model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
+        model_api.MODEL_PREFIXES = ["sap_change_predictions"]
+
+        predictions_dict = model_api.predict_all(
+            df=scoring_data,
+            bucket="retrofit-data-dev",
+            prediction_buckets={
+                "sap_change_predictions": "retrofit-sap-predictions-dev",
+            }
+        )
+        assert predictions_dict["sap_change_predictions"]["predictions"].values[0] == 52.2
+
+    def test_air_source_heat_pump_gas_boiler_starting_2(self):
+        """
+        This property seems to have miniscule movement in SAP - just 2 poins
+        :return:
+        """
+
+        starting_epc = {
+            'low-energy-fixed-light-count': '', 'address': '31 Whinney Hill Park', 'uprn-source': 'Energy Assessor',
+            'floor-height': '2.3', 'heating-cost-potential': '394', 'unheated-corridor-length': '',
+            'hot-water-cost-potential': '48', 'construction-age-band': 'England and Wales: 1967-1975',
+            'potential-energy-rating': 'B', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average',
+            'lighting-energy-eff': 'Good', 'environment-impact-potential': '87',
+            'glazed-type': 'double glazing, unknown install date', 'heating-cost-current': '487', 'address3': '',
+            'mainheatcont-description': 'Programmer, room thermostat and TRVs', 'sheating-energy-eff': 'N/A',
+            'property-type': 'Bungalow', 'local-authority-label': 'Calderdale', 'fixed-lighting-outlets-count': '5',
+            'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '86',
+            'county': '', 'postcode': 'HD6 2PX', 'solar-water-heating-flag': 'N', 'constituency': 'E14000614',
+            'co2-emissions-potential': '0.8', 'number-heated-rooms': '2',
+            'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '105',
+            'local-authority': 'E08000033', 'built-form': 'End-Terrace', 'number-open-fireplaces': '0',
+            'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2021-11-25',
+            'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '56', 'address1': '31 Whinney Hill Park',
+            'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Calder Valley',
+            'roof-energy-eff': 'Good', 'total-floor-area': '44.0', 'building-reference-number': '10001772583',
+            'environment-impact-current': '62', 'co2-emissions-current': '2.5',
+            'roof-description': 'Pitched, 250 mm loft insulation', 'floor-energy-eff': 'N/A',
+            'number-habitable-rooms': '2', 'address2': '', 'hot-water-env-eff': 'Good', 'posttown': 'BRIGHOUSE',
+            'mainheatc-energy-eff': 'Good', 'main-fuel': 'mains gas (not community)', 'lighting-env-eff': 'Good',
+            'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
+            'lighting-description': 'Low energy lighting in 60% of fixed outlets', 'roof-env-eff': 'Good',
+            'walls-energy-eff': 'Average', 'photo-supply': '0.0', 'lighting-cost-potential': '40',
+            'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
+            'lodgement-datetime': '2021-11-25 11:39:35', 'flat-top-storey': '', 'current-energy-rating': 'D',
+            'secondheat-description': 'Room heaters, electric', 'walls-env-eff': 'Average',
+            'transaction-type': 'rental', 'uprn': '100051304421', 'current-energy-efficiency': '62',
+            'energy-consumption-current': '322', 'mainheat-description': 'Boiler and radiators, mains gas',
+            'lighting-cost-current': '56', 'lodgement-date': '2021-11-25', 'extension-count': '0',
+            'mainheatc-env-eff': 'Good', 'lmk-key': '077f70657e9c3f1f0ce5392798398398616b159493b2a8ca2338961596631c27',
+            'wind-turbine-count': '0', 'tenure': 'Rented (social)', 'floor-level': '',
+            'potential-energy-efficiency': '86', 'hot-water-energy-eff': 'Good', 'low-energy-lighting': '60',
+            'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system'
+        }
+
+        ending_epc = {
+            'low-energy-fixed-light-count': '', 'address': '31 Whinney Hill Park',
+            'uprn-source': 'Energy Assessor', 'floor-height': '2.3', 'heating-cost-potential': '277',
+            'unheated-corridor-length': '', 'hot-water-cost-potential': '266',
+            'construction-age-band': 'England and Wales: 1967-1975', 'potential-energy-rating': 'B',
+            'mainheat-energy-eff': 'Very Good', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Good',
+            'environment-impact-potential': '90', 'glazed-type': 'double glazing, unknown install date',
+            'heating-cost-current': '331', 'address3': '',
+            'mainheatcont-description': 'Programmer and room thermostat', 'sheating-energy-eff': 'N/A',
+            'property-type': 'Bungalow', 'local-authority-label': 'Calderdale',
+            'fixed-lighting-outlets-count': '5', 'energy-tariff': 'Single',
+            'mechanical-ventilation': 'natural', 'hot-water-cost-current': '404', 'county': '',
+            'postcode': 'HD6 2PX', 'solar-water-heating-flag': 'N', 'constituency': 'E14000614',
+            'co2-emissions-potential': '0.7', 'number-heated-rooms': '2',
+            'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '92',
+            'local-authority': 'E08000033', 'built-form': 'End-Terrace', 'number-open-fireplaces': '0',
+            'windows-description': 'Fully double glazed', 'glazed-area': 'Normal',
+            'inspection-date': '2021-11-25', 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '48',
+            'address1': '31 Whinney Hill Park', 'heat-loss-corridor': '', 'flat-storey-count': '',
+            'constituency-label': 'Calder Valley', 'roof-energy-eff': 'Good', 'total-floor-area': '44.0',
+            'building-reference-number': '10001772583', 'environment-impact-current': '68',
+            'co2-emissions-current': '2.1', 'roof-description': 'Pitched, 250 mm loft insulation',
+            'floor-energy-eff': 'N/A', 'number-habitable-rooms': '2', 'address2': '',
+            'hot-water-env-eff': 'Poor', 'posttown': 'BRIGHOUSE', 'mainheatc-energy-eff': 'Average',
+            'main-fuel': 'electricity (not community)', 'lighting-env-eff': 'Good',
+            'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
+            'lighting-description': 'Low energy lighting in 60% of fixed outlets', 'roof-env-eff': 'Good',
+            'walls-energy-eff': 'Average', 'photo-supply': '0.0', 'lighting-cost-potential': '40',
+            'mainheat-env-eff': 'Very Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
+            'lodgement-datetime': '2022-03-23 16:06:21', 'flat-top-storey': '', 'current-energy-rating': 'D',
+            'secondheat-description': 'Room heaters, electric', 'walls-env-eff': 'Average',
+            'transaction-type': 'rental', 'uprn': '100051304421', 'current-energy-efficiency': '64',
+            'energy-consumption-current': '283',
+            'mainheat-description': 'Air source heat pump, radiators, electric',
+            'lighting-cost-current': '57', 'lodgement-date': '2022-03-23', 'extension-count': '0',
+            'mainheatc-env-eff': 'Average',
+            'lmk-key': '6296248141447b53426a40f1c39da17dad5f4786485db55ee38737891111a4d4',
+            'wind-turbine-count': '0', 'tenure': 'Rented (social)', 'floor-level': '',
+            'potential-energy-efficiency': '89', 'hot-water-energy-eff': 'Very Poor',
+            'low-energy-lighting': '60', 'walls-description': 'Cavity wall, filled cavity',
+            'hotwater-description': 'From main system'
+        }
+
+        # differences = []
+        # for k, v in ending_epc.items():
+        #     if v != starting_epc[k]:
+        #         differences.append(
+        #             {
+        #                 "variable": k,
+        #                 "starting_value": starting_epc[k],
+        #                 "ending_value": v
+        #             }
+        #         )
+        # differences = pd.DataFrame(differences)
+        #
+        # diffs = differences[
+        #     differences["variable"].isin(
+        #         [
+        #             "mainheat-energy-eff",
+        #             "mainheatcont-description",
+        #             "mainheatc-energy-eff",
+        #             "main-fuel",
+        #             "mainheat-env-eff",
+        #             "mainheat-description",
+        #             "hot-water-energy-eff",
+        #             "hotwater-description"
+        #         ]
+        #     )
+        # ]
+
+        cleaning_data = read_dataframe_from_s3_parquet(
+            bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+        )
+
+        cleaned = read_from_s3(
+            s3_file_name="cleaned_epc_data/cleaned.bson",
+            bucket_name="retrofit-data-dev"
+        )
+        cleaned = msgpack.unpackb(cleaned, raw=False)
+
+        photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+        epc = EPCRecord(
+            epc_records={
+                'original_epc': starting_epc,
+                'full_sap_epc': {},
+                'old_data': []
+            },
+            run_mode="newdata",
+            cleaning_data=cleaning_data
+        )
+
+        home = Property(
+            id=0,
+            address="",
+            postcode="",
+            epc_record=epc,
+            already_installed={},
+            non_invasive_recommendations={},
+        )
+        home.in_conservation_area = False
+        home.is_listed = False
+        home.is_heritage = False
+        home.restricted_measures = True
+        home.get_components(
+            cleaned=cleaned,
+            photo_supply_lookup=photo_supply_lookup,
+            floor_area_decile_thresholds=floor_area_decile_thresholds
+        )
+
+        recommender = HeatingRecommender(property_instance=home)
+        recommender.recommend_air_source_heat_pump(phase=0, has_cavity_or_loft_recommendations=False)
+        property_recommendations = Recommendations.insert_temp_recommendation_id([recommender.heating_recommendations])
+
+        assert len(recommender.heating_recommendations) == 1
+
+        home.create_base_difference_epc_record(cleaned_lookup=cleaned)
+        home.adjust_difference_record_with_recommendations(
+            property_recommendations, []
+        )
+
+        scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop(
+            columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+                     "carbon_ending"]
+        )
+
+        model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
+        model_api.MODEL_PREFIXES = ["sap_change_predictions"]
+
+        predictions_dict = model_api.predict_all(
+            df=scoring_data,
+            bucket="retrofit-data-dev",
+            prediction_buckets={
+                "sap_change_predictions": "retrofit-sap-predictions-dev",
+            }
+        )
+        assert predictions_dict["sap_change_predictions"]["predictions"].values[0] == 69.3
+
+        # In actuality with this property, the heating controls get downgraded, so we test a manual patch of this
+        patched_simulation_config = {
+            'mainheat_energy_eff_ending': "Very Good",
+            'hot_water_energy_eff_ending': 'Very Poor',
+            'has_boiler_ending': False,
+            'has_air_source_heat_pump_ending': True,
+            'has_electric_ending': True,
+            'has_mains_gas_ending': False,
+            'fuel_type_ending': 'electricity',
+            'trvs_ending': None,
+            "mainheatc_energy_eff_ending": 'Average'
+        }
+
+        # PATCHING
+        property_recommendations_patch = Recommendations.insert_temp_recommendation_id(
+            [recommender.heating_recommendations]
+        )
+        property_recommendations_patch[0][0]["simulation_config"] = patched_simulation_config
+
+        home.create_base_difference_epc_record(cleaned_lookup=cleaned)
+        home.adjust_difference_record_with_recommendations(
+            property_recommendations_patch, []
+        )
+
+        scoring_data_patch = pd.DataFrame(home.recommendations_scoring_data).drop(
+            columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+                     "carbon_ending"]
+        )
+
+        model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
+        model_api.MODEL_PREFIXES = ["sap_change_predictions"]
+
+        predictions_dict_patch = model_api.predict_all(
+            df=scoring_data_patch,
+            bucket="retrofit-data-dev",
+            prediction_buckets={
+                "sap_change_predictions": "retrofit-sap-predictions-dev",
+            }
+        )
+        # The error is only 0.3, so the model is working
+        assert predictions_dict_patch["sap_change_predictions"]["predictions"].values[0] == 64.3
+        assert ending_epc["current-energy-efficiency"] == '64'
+
+    def test_air_source_heat_pump_lpg_boiler(self):
+        starting_epc = {
+            'low-energy-fixed-light-count': '', 'address': 'Holly Lodge, The Drive, Perry',
+            'uprn-source': 'Energy Assessor', 'floor-height': '2.8', 'heating-cost-potential': '1628',
+            'unheated-corridor-length': '', 'hot-water-cost-potential': '175',
+            'construction-age-band': 'England and Wales: 1950-1966', 'potential-energy-rating': 'D',
+            'mainheat-energy-eff': 'Poor', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Average',
+            'environment-impact-potential': '70', 'glazed-type': 'double glazing, unknown install date',
+            'heating-cost-current': '2158', 'address3': 'Perry',
+            'mainheatcont-description': 'No time or thermostatic control of room temperature',
+            'sheating-energy-eff': 'N/A', 'property-type': 'Bungalow', 'local-authority-label': 'Huntingdonshire',
+            'fixed-lighting-outlets-count': '12', 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural',
+            'hot-water-cost-current': '257', 'county': 'Cambridgeshire', 'postcode': 'PE28 0SX',
+            'solar-water-heating-flag': 'N', 'constituency': 'E14000757', 'co2-emissions-potential': '3.3',
+            'number-heated-rooms': '5', 'floor-description': 'Solid, no insulation (assumed)',
+            'energy-consumption-potential': '128', 'local-authority': 'E07000011', 'built-form': 'Semi-Detached',
+            'number-open-fireplaces': '0', 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal',
+            'inspection-date': '2023-08-31', 'mains-gas-flag': 'N', 'co2-emiss-curr-per-floor-area': '51',
+            'address1': 'Holly Lodge', 'heat-loss-corridor': '', 'flat-storey-count': '',
+            'constituency-label': 'Huntingdon', 'roof-energy-eff': 'Good', 'total-floor-area': '117.0',
+            'building-reference-number': '10005199915', 'environment-impact-current': '50',
+            'co2-emissions-current': '5.9', 'roof-description': 'Pitched, 270 mm loft insulation',
+            'floor-energy-eff': 'N/A', 'number-habitable-rooms': '5', 'address2': 'The Drive',
+            'hot-water-env-eff': 'Good', 'posttown': 'HUNTINGDON', 'mainheatc-energy-eff': 'Very Poor',
+            'main-fuel': 'LPG (not community)', 'lighting-env-eff': 'Average', 'windows-energy-eff': 'Average',
+            'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
+            'lighting-description': 'Low energy lighting in 33% of fixed outlets', 'roof-env-eff': 'Good',
+            'walls-energy-eff': 'Average', 'photo-supply': '0.0', 'lighting-cost-potential': '166',
+            'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
+            'lodgement-datetime': '2023-10-30 13:46:54', 'flat-top-storey': '', 'current-energy-rating': 'F',
+            'secondheat-description': 'Room heaters, electric', 'walls-env-eff': 'Average',
+            'transaction-type': 'ECO assessment', 'uprn': '100091200828', 'current-energy-efficiency': '32',
+            'energy-consumption-current': '243', 'mainheat-description': 'Boiler and radiators, LPG',
+            'lighting-cost-current': '277', 'lodgement-date': '2023-10-30', 'extension-count': '0',
+            'mainheatc-env-eff': 'Very Poor',
+            'lmk-key': 'f1d3bd4b8b50bc9b006231ccb158537c408523b748b3f4ef7e98cd03b144afa5', 'wind-turbine-count': '0',
+            'tenure': 'Owner-occupied', 'floor-level': '', 'potential-energy-efficiency': '56',
+            'hot-water-energy-eff': 'Poor', 'low-energy-lighting': '33',
+            'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system'
+        }
+
+        ending_epc = {
+            'low-energy-fixed-light-count': '', 'address': 'Holly Lodge, The Drive, Perry',
+            'uprn-source': 'Energy Assessor', 'floor-height': '2.8', 'heating-cost-potential': '917',
+            'unheated-corridor-length': '', 'hot-water-cost-potential': '328',
+            'construction-age-band': 'England and Wales: 1950-1966', 'potential-energy-rating': 'A',
+            'mainheat-energy-eff': 'Very Good', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Average',
+            'environment-impact-potential': '96', 'glazed-type': 'double glazing, unknown install date',
+            'heating-cost-current': '1098', 'address3': 'Perry',
+            'mainheatcont-description': 'Programmer, TRVs and bypass', 'sheating-energy-eff': 'N/A',
+            'property-type': 'Bungalow', 'local-authority-label': 'Huntingdonshire',
+            'fixed-lighting-outlets-count': '12', 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural',
+            'hot-water-cost-current': '328', 'county': 'Cambridgeshire', 'postcode': 'PE28 0SX',
+            'solar-water-heating-flag': 'N', 'constituency': 'E14000757', 'co2-emissions-potential': '0.3',
+            'number-heated-rooms': '5', 'floor-description': 'Solid, no insulation (assumed)',
+            'energy-consumption-potential': '16', 'local-authority': 'E07000011', 'built-form': 'Semi-Detached',
+            'number-open-fireplaces': '0', 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal',
+            'inspection-date': '2023-10-05', 'mains-gas-flag': 'N', 'co2-emiss-curr-per-floor-area': '6',
+            'address1': 'Holly Lodge', 'heat-loss-corridor': '', 'flat-storey-count': '',
+            'constituency-label': 'Huntingdon', 'roof-energy-eff': 'Good', 'total-floor-area': '117.0',
+            'building-reference-number': '10005199915', 'environment-impact-current': '92',
+            'co2-emissions-current': '0.7', 'roof-description': 'Pitched, 270 mm loft insulation',
+            'floor-energy-eff': 'N/A', 'number-habitable-rooms': '5', 'address2': 'The Drive',
+            'hot-water-env-eff': 'Very Good', 'posttown': 'HUNTINGDON', 'mainheatc-energy-eff': 'Average',
+            'main-fuel': 'electricity (not community)', 'lighting-env-eff': 'Average', 'windows-energy-eff': 'Average',
+            'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
+            'lighting-description': 'Low energy lighting in 33% of fixed outlets', 'roof-env-eff': 'Good',
+            'walls-energy-eff': 'Average', 'photo-supply': '', 'lighting-cost-potential': '166',
+            'mainheat-env-eff': 'Very Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
+            'lodgement-datetime': '2023-11-01 16:29:16', 'flat-top-storey': '', 'current-energy-rating': 'A',
+            'secondheat-description': 'Room heaters, electric', 'walls-env-eff': 'Average',
+            'transaction-type': 'ECO assessment', 'uprn': '100091200828', 'current-energy-efficiency': '92',
+            'energy-consumption-current': '37', 'mainheat-description': 'Air source heat pump, radiators, electric',
+            'lighting-cost-current': '277', 'lodgement-date': '2023-11-01', 'extension-count': '0',
+            'mainheatc-env-eff': 'Average',
+            'lmk-key': 'cb7f2838b727907767c8c2a385cd22f722b1e4745463391d910d228e52124515', 'wind-turbine-count': '0',
+            'tenure': 'Owner-occupied', 'floor-level': '', 'potential-energy-efficiency': '95',
+            'hot-water-energy-eff': 'Good', 'low-energy-lighting': '33',
+            'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system'
+        }
+
+        cleaning_data = read_dataframe_from_s3_parquet(
+            bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+        )
+
+        cleaned = read_from_s3(
+            s3_file_name="cleaned_epc_data/cleaned.bson",
+            bucket_name="retrofit-data-dev"
+        )
+        cleaned = msgpack.unpackb(cleaned, raw=False)
+
+        photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+        epc = EPCRecord(
+            epc_records={
+                'original_epc': starting_epc,
+                'full_sap_epc': {},
+                'old_data': []
+            },
+            run_mode="newdata",
+            cleaning_data=cleaning_data
+        )
+
+        home = Property(
+            id=0,
+            address="",
+            postcode="",
+            epc_record=epc,
+            already_installed={},
+            non_invasive_recommendations={},
+        )
+        home.in_conservation_area = False
+        home.is_listed = False
+        home.is_heritage = False
+        home.restricted_measures = True
+        home.get_components(
+            cleaned=cleaned,
+            photo_supply_lookup=photo_supply_lookup,
+            floor_area_decile_thresholds=floor_area_decile_thresholds
+        )
+
+        recommender = HeatingRecommender(property_instance=home)
+        recommender.recommend_air_source_heat_pump(phase=0, has_cavity_or_loft_recommendations=False)
+        property_recommendations = Recommendations.insert_temp_recommendation_id([recommender.heating_recommendations])
+
+        assert len(recommender.heating_recommendations) == 1
+
+        home.create_base_difference_epc_record(cleaned_lookup=cleaned)
+        home.adjust_difference_record_with_recommendations(
+            property_recommendations, []
+        )
+
+        scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop(
+            columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+                     "carbon_ending"]
+        )
+
+        model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
+        model_api.MODEL_PREFIXES = ["sap_change_predictions"]
+
+        predictions_dict = model_api.predict_all(
+            df=scoring_data,
+            bucket="retrofit-data-dev",
+            prediction_buckets={
+                "sap_change_predictions": "retrofit-sap-predictions-dev",
+            }
+        )
+        # We predict a huge uplift but not quite as much as the EPC, due to some distinct differences between our
+        # recommendation and the EPC
+        assert predictions_dict["sap_change_predictions"]["predictions"].values[0] == 81.3
+        assert ending_epc['current-energy-efficiency'] == '92'
+
+        # PATCH
+        # We patch the simulation config, to reflect the ending EPC, to see if we get the ending EPC's config
+        patched_simulation_config = {
+            'mainheat_energy_eff_ending': "Very Good",
+            'hot_water_energy_eff_ending': 'Good',
+            'has_boiler_ending': False,
+            'has_air_source_heat_pump_ending': True,
+            'has_electric_ending': True,
+            'has_lpg_ending': False,
+            'fuel_type_ending': 'electricity',
+            'switch_system_ending': 'programmer',
+            'no_control_ending': None,
+            'auxiliary_systems_ending': 'bypass',
+            'trvs_ending': 'trvs',
+            "mainheatc_energy_eff_ending": 'Average'
+        }
+
+        # PATCHING
+        property_recommendations_patch = Recommendations.insert_temp_recommendation_id(
+            [recommender.heating_recommendations]
+        )
+        property_recommendations_patch[0][0]["simulation_config"] = patched_simulation_config
+
+        home.create_base_difference_epc_record(cleaned_lookup=cleaned)
+        home.adjust_difference_record_with_recommendations(
+            property_recommendations_patch, []
+        )
+
+        scoring_data_patch = pd.DataFrame(home.recommendations_scoring_data).drop(
+            columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+                     "carbon_ending"]
+        )
+
+        model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
+        model_api.MODEL_PREFIXES = ["sap_change_predictions"]
+
+        predictions_dict_patch = model_api.predict_all(
+            df=scoring_data_patch,
+            bucket="retrofit-data-dev",
+            prediction_buckets={
+                "sap_change_predictions": "retrofit-sap-predictions-dev",
+            }
+        )
+
+        assert predictions_dict_patch["sap_change_predictions"]["predictions"].values[0] == 88.9
+        # We still underpredict but the improvement is notable
+
+    def test_offgrid(self):
+        """
+        We test on a property we've worked with before, where we compare two options
+        a) Upgrading to a boiler
+        b) Upgrading to a heat pump
+        :return:
+        """
+
+        starting_epc = {
+            'low-energy-fixed-light-count': '', 'address': '6 Beech Road', 'uprn-source': 'Energy Assessor',
+            'floor-height': '2.4', 'heating-cost-potential': '612', 'unheated-corridor-length': '',
+            'hot-water-cost-potential': '123', 'construction-age-band': 'England and Wales: 1930-1949',
+            'potential-energy-rating': 'B', 'mainheat-energy-eff': 'Very Poor', 'windows-env-eff': 'Good',
+            'lighting-energy-eff': 'Good', 'environment-impact-potential': '87',
+            'glazed-type': 'double glazing installed during or after 2002', 'heating-cost-current': '2278',
+            'address3': '', 'mainheatcont-description': 'Appliance thermostats', 'sheating-energy-eff': 'N/A',
+            'property-type': 'House', 'local-authority-label': 'Dudley', 'fixed-lighting-outlets-count': '9',
+            'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '604',
+            'county': '', 'postcode': 'DY1 4BP', 'solar-water-heating-flag': 'N', 'constituency': 'E14000671',
+            'co2-emissions-potential': '1.0', 'number-heated-rooms': '4',
+            'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '93',
+            'local-authority': 'E08000027', 'built-form': 'End-Terrace', 'number-open-fireplaces': '0',
+            'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2024-03-13',
+            'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '83', 'address1': '6 Beech Road',
+            'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Dudley North',
+            'roof-energy-eff': 'Very Poor', 'total-floor-area': '60.0', 'building-reference-number': '10005780080',
+            'environment-impact-current': '41', 'co2-emissions-current': '5.0',
+            'roof-description': 'Pitched, 12 mm loft insulation', 'floor-energy-eff': 'N/A',
+            'number-habitable-rooms': '4', 'address2': '', 'hot-water-env-eff': 'Poor', 'posttown': 'DUDLEY',
+            'mainheatc-energy-eff': 'Good', 'main-fuel': 'electricity (not community)', 'lighting-env-eff': 'Good',
+            'windows-energy-eff': 'Good', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
+            'lighting-description': 'Low energy lighting in 67% of fixed outlets', 'roof-env-eff': 'Very Poor',
+            'walls-energy-eff': 'Average', 'photo-supply': '0.0', 'lighting-cost-potential': '113',
+            'mainheat-env-eff': 'Poor', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
+            'lodgement-datetime': '2024-03-13 11:29:11', 'flat-top-storey': '', 'current-energy-rating': 'F',
+            'secondheat-description': 'None', 'walls-env-eff': 'Average', 'transaction-type': 'rental',
+            'uprn': '90055152', 'current-energy-efficiency': '32', 'energy-consumption-current': '491',
+            'mainheat-description': 'Room heaters, electric', 'lighting-cost-current': '113',
+            'lodgement-date': '2024-03-13', 'extension-count': '1', 'mainheatc-env-eff': 'Good',
+            'lmk-key': '78ddf851b660e599a0894924d0e6b503980f5e0ad1aa711f8411718dc2989c44', 'wind-turbine-count': '0',
+            'tenure': 'Rented (social)', 'floor-level': '', 'potential-energy-efficiency': '87',
+            'hot-water-energy-eff': 'Very Poor', 'low-energy-lighting': '67',
+            'walls-description': 'Cavity wall, filled cavity',
+            'hotwater-description': 'Electric immersion, standard tariff'
+        }
+
+        cleaning_data = read_dataframe_from_s3_parquet(
+            bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+        )
+
+        cleaned = read_from_s3(
+            s3_file_name="cleaned_epc_data/cleaned.bson",
+            bucket_name="retrofit-data-dev"
+        )
+        cleaned = msgpack.unpackb(cleaned, raw=False)
+
+        photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+        epc = EPCRecord(
+            epc_records={
+                'original_epc': starting_epc,
+                'full_sap_epc': {},
+                'old_data': []
+            },
+            run_mode="newdata",
+            cleaning_data=cleaning_data
+        )
+
+        home = Property(
+            id=0,
+            address="",
+            postcode="",
+            epc_record=epc,
+            already_installed={},
+            non_invasive_recommendations={},
+        )
+        home.in_conservation_area = False
+        home.is_listed = False
+        home.is_heritage = False
+        home.restricted_measures = True
+        home.get_components(
+            cleaned=cleaned,
+            photo_supply_lookup=photo_supply_lookup,
+            floor_area_decile_thresholds=floor_area_decile_thresholds
+        )
+
+        recommender = HeatingRecommender(property_instance=home)
+        recommender.recommend_air_source_heat_pump(phase=0, has_cavity_or_loft_recommendations=False)
+        recommender.recommend_boiler_upgrades(phase=0, system_change=True, exising_room_heaters=False)
+
+        assert len(recommender.heating_recommendations) == 3
+
+        property_recommendations = Recommendations.insert_temp_recommendation_id([recommender.heating_recommendations])
+
+        home.create_base_difference_epc_record(cleaned_lookup=cleaned)
+        home.adjust_difference_record_with_recommendations(
+            property_recommendations, []
+        )
+
+        scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop(
+            columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+                     "carbon_ending"]
+        )
+
+        model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
+        model_api.MODEL_PREFIXES = ["sap_change_predictions"]
+
+        predictions_dict = model_api.predict_all(
+            df=scoring_data,
+            bucket="retrofit-data-dev",
+            prediction_buckets={
+                "sap_change_predictions": "retrofit-sap-predictions-dev",
+            }
+        )
+
+        # The ASHP isn't better under SAP, compared to a gas boiler with good heat controls
+        assert predictions_dict["sap_change_predictions"]["predictions"].tolist() == [66.9, 65.5, 65.9]

From f0936bd1d48e70e0afc726d9e34e44de61b92ab8 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 7 May 2024 17:46:51 +0100
Subject: [PATCH 31/58] Added an extra test for solar

---
 .../tests/test_solar_pv_recommendations.py    | 161 ++++++++++++++++++
 1 file changed, 161 insertions(+)

diff --git a/recommendations/tests/test_solar_pv_recommendations.py b/recommendations/tests/test_solar_pv_recommendations.py
index 5481cb17..e912f373 100644
--- a/recommendations/tests/test_solar_pv_recommendations.py
+++ b/recommendations/tests/test_solar_pv_recommendations.py
@@ -2,6 +2,13 @@ import pytest
 from recommendations.SolarPvRecommendations import SolarPvRecommendations
 from backend.Property import Property
 from etl.epc.Record import EPCRecord
+import pandas as pd
+from datetime import datetime
+from utils.s3 import read_dataframe_from_s3_parquet, read_from_s3
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+from recommendations.Recommendations import Recommendations
+from backend.ml_models.api import ModelApi
+import msgpack
 
 
 class TestSolarPvRecommendations:
@@ -82,3 +89,157 @@ class TestSolarPvRecommendations:
                 'photo_supply': 4000
             }
         ]
+
+    def test_model(self):
+        """
+        This function tests the recommendation engine, in conjunction with the model
+        :return:
+        """
+
+        starting_epc = {
+            'low-energy-fixed-light-count': '', 'address': '27 Cromwell Street', 'uprn-source': 'Energy Assessor',
+            'floor-height': '2.5', 'heating-cost-potential': '443', 'unheated-corridor-length': '',
+            'hot-water-cost-potential': '53', 'construction-age-band': 'England and Wales: before 1900',
+            'potential-energy-rating': 'B', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average',
+            'lighting-energy-eff': 'Very Poor', 'environment-impact-potential': '85',
+            'glazed-type': 'double glazing installed before 2002', 'heating-cost-current': '904', 'address3': '',
+            'mainheatcont-description': 'Programmer, room thermostat and TRVs', 'sheating-energy-eff': 'N/A',
+            'property-type': 'House', 'local-authority-label': 'West Lindsey', 'fixed-lighting-outlets-count': '10',
+            'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '79',
+            'county': 'Lincolnshire', 'postcode': 'DN21 1DH', 'solar-water-heating-flag': 'N',
+            'constituency': 'E14000707', 'co2-emissions-potential': '1.5', 'number-heated-rooms': '5',
+            'floor-description': 'Suspended, no insulation (assumed)', 'energy-consumption-potential': '92',
+            'local-authority': 'E07000142', 'built-form': 'Mid-Terrace', 'number-open-fireplaces': '0',
+            'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2021-11-17',
+            'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '61', 'address1': '27 Cromwell Street',
+            'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Gainsborough',
+            'roof-energy-eff': 'Very Poor', 'total-floor-area': '89.0', 'building-reference-number': '10001989430',
+            'environment-impact-current': '47', 'co2-emissions-current': '5.4',
+            'roof-description': 'Pitched, no insulation (assumed)', 'floor-energy-eff': 'N/A',
+            'number-habitable-rooms': '5', 'address2': '', 'hot-water-env-eff': 'Good', 'posttown': 'GAINSBOROUGH',
+            'mainheatc-energy-eff': 'Good', 'main-fuel': 'mains gas (not community)', 'lighting-env-eff': 'Very Poor',
+            'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
+            'lighting-description': 'No low energy lighting', 'roof-env-eff': 'Very Poor',
+            'walls-energy-eff': 'Very Poor', 'photo-supply': '0.0', 'lighting-cost-potential': '72',
+            'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
+            'lodgement-datetime': '2021-12-01 10:12:23', 'flat-top-storey': '', 'current-energy-rating': 'E',
+            'secondheat-description': 'Room heaters, mains gas', 'walls-env-eff': 'Very Poor',
+            'transaction-type': 'ECO assessment', 'uprn': '100030949912', 'current-energy-efficiency': '54',
+            'energy-consumption-current': '346', 'mainheat-description': 'Boiler and radiators, mains gas',
+            'lighting-cost-current': '144', 'lodgement-date': '2021-12-01', 'extension-count': '2',
+            'mainheatc-env-eff': 'Good', 'lmk-key': '3ec5533af02ec78361c1f9bea8dd2e878c2c6fa6cf59e5cc505c3eeb038e0f91',
+            'wind-turbine-count': '0', 'tenure': 'Owner-occupied', 'floor-level': '',
+            'potential-energy-efficiency': '86', 'hot-water-energy-eff': 'Good', 'low-energy-lighting': '0',
+            'walls-description': 'Solid brick, as built, no insulation (assumed)',
+            'hotwater-description': 'From main system'
+        }
+
+        ending_epc = {
+            'low-energy-fixed-light-count': '', 'address': '27 Cromwell Street', 'uprn-source': 'Energy Assessor',
+            'floor-height': '2.5', 'heating-cost-potential': '443', 'unheated-corridor-length': '',
+            'hot-water-cost-potential': '53', 'construction-age-band': 'England and Wales: before 1900',
+            'potential-energy-rating': 'B', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average',
+            'lighting-energy-eff': 'Very Poor', 'environment-impact-potential': '86',
+            'glazed-type': 'double glazing installed before 2002', 'heating-cost-current': '904', 'address3': '',
+            'mainheatcont-description': 'Programmer, room thermostat and TRVs', 'sheating-energy-eff': 'N/A',
+            'property-type': 'House', 'local-authority-label': 'West Lindsey', 'fixed-lighting-outlets-count': '10',
+            'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '79',
+            'county': 'Lincolnshire', 'postcode': 'DN21 1DH', 'solar-water-heating-flag': 'N',
+            'constituency': 'E14000707', 'co2-emissions-potential': '1.4', 'number-heated-rooms': '5',
+            'floor-description': 'Suspended, no insulation (assumed)', 'energy-consumption-potential': '84',
+            'local-authority': 'E07000142', 'built-form': 'Mid-Terrace', 'number-open-fireplaces': '0',
+            'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2021-12-21',
+            'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '49', 'address1': '27 Cromwell Street',
+            'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Gainsborough',
+            'roof-energy-eff': 'Very Poor', 'total-floor-area': '89.0', 'building-reference-number': '10001989430',
+            'environment-impact-current': '55', 'co2-emissions-current': '4.4',
+            'roof-description': 'Pitched, no insulation (assumed)', 'floor-energy-eff': 'N/A',
+            'number-habitable-rooms': '5', 'address2': '', 'hot-water-env-eff': 'Good', 'posttown': 'GAINSBOROUGH',
+            'mainheatc-energy-eff': 'Good', 'main-fuel': 'mains gas (not community)', 'lighting-env-eff': 'Very Poor',
+            'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
+            'lighting-description': 'No low energy lighting', 'roof-env-eff': 'Very Poor',
+            'walls-energy-eff': 'Very Poor', 'photo-supply': '50.0', 'lighting-cost-potential': '72',
+            'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
+            'lodgement-datetime': '2021-12-21 17:33:09', 'flat-top-storey': '', 'current-energy-rating': 'D',
+            'secondheat-description': 'Room heaters, mains gas', 'walls-env-eff': 'Very Poor',
+            'transaction-type': 'ECO assessment', 'uprn': '100030949912', 'current-energy-efficiency': '65',
+            'energy-consumption-current': '277', 'mainheat-description': 'Boiler and radiators, mains gas',
+            'lighting-cost-current': '144', 'lodgement-date': '2021-12-21', 'extension-count': '2',
+            'mainheatc-env-eff': 'Good', 'lmk-key': 'b0b19583c59afbc69db12f4d6c98cd8837e80da3214d577c426eb3e672d424fc',
+            'wind-turbine-count': '0', 'tenure': 'Owner-occupied', 'floor-level': '',
+            'potential-energy-efficiency': '88', 'hot-water-energy-eff': 'Good', 'low-energy-lighting': '0',
+            'walls-description': 'Solid brick, as built, no insulation (assumed)',
+            'hotwater-description': 'From main system'
+        }
+
+        cleaning_data = read_dataframe_from_s3_parquet(
+            bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+        )
+
+        cleaned = read_from_s3(
+            s3_file_name="cleaned_epc_data/cleaned.bson",
+            bucket_name="retrofit-data-dev"
+        )
+        cleaned = msgpack.unpackb(cleaned, raw=False)
+
+        photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+        epc = EPCRecord(
+            epc_records={
+                'original_epc': starting_epc,
+                'full_sap_epc': {},
+                'old_data': []
+            },
+            run_mode="newdata",
+            cleaning_data=cleaning_data
+        )
+
+        home = Property(
+            id=0,
+            address="",
+            postcode="",
+            epc_record=epc,
+            already_installed={},
+            non_invasive_recommendations={},
+        )
+        home.in_conservation_area = False
+        home.is_listed = False
+        home.is_heritage = False
+        home.restricted_measures = True
+        home.get_components(
+            cleaned=cleaned,
+            photo_supply_lookup=photo_supply_lookup,
+            floor_area_decile_thresholds=floor_area_decile_thresholds
+        )
+
+        recommender = SolarPvRecommendations(property_instance=home)
+        recommender.recommend(phase=0)
+
+        coverage_50_percent = [x for x in recommender.recommendation if x["photo_supply"] == 50]
+        assert len(coverage_50_percent) == 2
+
+        property_recommendations = Recommendations.insert_temp_recommendation_id([coverage_50_percent])
+
+        home.create_base_difference_epc_record(cleaned_lookup=cleaned)
+        home.adjust_difference_record_with_recommendations(
+            property_recommendations, []
+        )
+
+        scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop(
+            columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+                     "carbon_ending"]
+        )
+
+        model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
+        model_api.MODEL_PREFIXES = ["sap_change_predictions"]
+
+        predictions_dict = model_api.predict_all(
+            df=scoring_data,
+            bucket="retrofit-data-dev",
+            prediction_buckets={
+                "sap_change_predictions": "retrofit-sap-predictions-dev",
+            }
+        )
+
+        assert predictions_dict["sap_change_predictions"]["predictions"].tolist() == [65.9, 65.9]
+        assert ending_epc["current-energy-efficiency"] == '65'

From 732f3eb356e61b444f7fff002d7f22f13051d5c3 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 7 May 2024 17:59:30 +0100
Subject: [PATCH 32/58] Added additional test for solar

---
 .../tests/test_solar_pv_recommendations.py    | 164 ++++++++++++++++++
 1 file changed, 164 insertions(+)

diff --git a/recommendations/tests/test_solar_pv_recommendations.py b/recommendations/tests/test_solar_pv_recommendations.py
index e912f373..fbbfe3a1 100644
--- a/recommendations/tests/test_solar_pv_recommendations.py
+++ b/recommendations/tests/test_solar_pv_recommendations.py
@@ -243,3 +243,167 @@ class TestSolarPvRecommendations:
 
         assert predictions_dict["sap_change_predictions"]["predictions"].tolist() == [65.9, 65.9]
         assert ending_epc["current-energy-efficiency"] == '65'
+
+    def test_model2(self):
+        data[["uprn", "sap_ending"]]
+        #
+
+        searcher = SearchEpc(
+            address1="",
+            postcode="",
+            auth_token="a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA=",
+            os_api_key="",
+            full_address="",
+            uprn=100030952942,
+        )
+        searcher.find_property(False)
+
+        ending_epc = {
+            'low-energy-fixed-light-count': '', 'address': '6 Kenmare Crescent',
+            'uprn-source': 'Energy Assessor', 'floor-height': '2.49', 'heating-cost-potential': '464',
+            'unheated-corridor-length': '', 'hot-water-cost-potential': '46',
+            'construction-age-band': 'England and Wales: 1967-1975', 'potential-energy-rating': 'B',
+            'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Very Good',
+            'environment-impact-potential': '91', 'glazed-type': 'not defined', 'heating-cost-current': '535',
+            'address3': '', 'mainheatcont-description': 'Programmer, room thermostat and TRVs',
+            'sheating-energy-eff': 'N/A', 'property-type': 'Bungalow',
+            'local-authority-label': 'West Lindsey', 'fixed-lighting-outlets-count': '9',
+            'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '69',
+            'county': 'Lincolnshire', 'postcode': 'DN21 1PR', 'solar-water-heating-flag': 'N',
+            'constituency': 'E14000707', 'co2-emissions-potential': '0.7', 'number-heated-rooms': '3',
+            'floor-description': 'Suspended, no insulation (assumed)', 'energy-consumption-potential': '56',
+            'local-authority': 'E07000142', 'built-form': 'Semi-Detached', 'number-open-fireplaces': '0',
+            'windows-description': 'Fully double glazed', 'glazed-area': 'Much More Than Typical',
+            'inspection-date': '2022-08-24', 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '18',
+            'address1': '6 Kenmare Crescent', 'heat-loss-corridor': '', 'flat-storey-count': '',
+            'constituency-label': 'Gainsborough', 'roof-energy-eff': 'Very Good', 'total-floor-area': '66.0',
+            'building-reference-number': '10002845316', 'environment-impact-current': '85',
+            'co2-emissions-current': '1.2', 'roof-description': 'Pitched, 300 mm loft insulation',
+            'floor-energy-eff': 'N/A', 'number-habitable-rooms': '3', 'address2': '',
+            'hot-water-env-eff': 'Good', 'posttown': 'GAINSBOROUGH', 'mainheatc-energy-eff': 'Good',
+            'main-fuel': 'mains gas (not community)', 'lighting-env-eff': 'Very Good',
+            'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
+            'lighting-description': 'Low energy lighting in all fixed outlets', 'roof-env-eff': 'Very Good',
+            'walls-energy-eff': 'Average', 'photo-supply': '40.0', 'lighting-cost-potential': '65',
+            'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
+            'lodgement-datetime': '2022-08-24 15:39:42', 'flat-top-storey': '', 'current-energy-rating': 'B',
+            'secondheat-description': 'Room heaters, electric', 'walls-env-eff': 'Average',
+            'transaction-type': 'ECO assessment', 'uprn': '100030952942', 'current-energy-efficiency': '87',
+            'energy-consumption-current': '100', 'mainheat-description': 'Boiler and radiators, mains gas',
+            'lighting-cost-current': '65', 'lodgement-date': '2022-08-24', 'extension-count': '0',
+            'mainheatc-env-eff': 'Good',
+            'lmk-key': 'e20be883431b1fed15db7fa1f52634fb7655d2b80c2fdad37df779f93ec4dafd',
+            'wind-turbine-count': '0', 'tenure': 'Owner-occupied', 'floor-level': '',
+            'potential-energy-efficiency': '91', 'hot-water-energy-eff': 'Good', 'low-energy-lighting': '100',
+            'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system'
+        }
+        starting_epc = {
+            'low-energy-fixed-light-count': '', 'address': '6 Kenmare Crescent', 'uprn-source': 'Energy Assessor',
+            'floor-height': '2.49', 'heating-cost-potential': '464', 'unheated-corridor-length': '',
+            'hot-water-cost-potential': '46', 'construction-age-band': 'England and Wales: 1967-1975',
+            'potential-energy-rating': 'B', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average',
+            'lighting-energy-eff': 'Very Good', 'environment-impact-potential': '85', 'glazed-type': 'not defined',
+            'heating-cost-current': '535', 'address3': '',
+            'mainheatcont-description': 'Programmer, room thermostat and TRVs', 'sheating-energy-eff': 'N/A',
+            'property-type': 'Bungalow', 'local-authority-label': 'West Lindsey', 'fixed-lighting-outlets-count': '9',
+            'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '69',
+            'county': 'Lincolnshire', 'postcode': 'DN21 1PR', 'solar-water-heating-flag': 'N',
+            'constituency': 'E14000707', 'co2-emissions-potential': '1.2', 'number-heated-rooms': '3',
+            'floor-description': 'Suspended, no insulation (assumed)', 'energy-consumption-potential': '102',
+            'local-authority': 'E07000142', 'built-form': 'Semi-Detached', 'number-open-fireplaces': '0',
+            'windows-description': 'Fully double glazed', 'glazed-area': 'Much More Than Typical',
+            'inspection-date': '2022-05-31', 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '40',
+            'address1': '6 Kenmare Crescent', 'heat-loss-corridor': '', 'flat-storey-count': '',
+            'constituency-label': 'Gainsborough', 'roof-energy-eff': 'Very Good', 'total-floor-area': '66.0',
+            'building-reference-number': '10002845316', 'environment-impact-current': '68',
+            'co2-emissions-current': '2.6', 'roof-description': 'Pitched, 300 mm loft insulation',
+            'floor-energy-eff': 'N/A', 'number-habitable-rooms': '3', 'address2': '', 'hot-water-env-eff': 'Good',
+            'posttown': 'GAINSBOROUGH', 'mainheatc-energy-eff': 'Good', 'main-fuel': 'mains gas (not community)',
+            'lighting-env-eff': 'Very Good', 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A',
+            'sheating-env-eff': 'N/A', 'lighting-description': 'Low energy lighting in all fixed outlets',
+            'roof-env-eff': 'Very Good', 'walls-energy-eff': 'Average', 'photo-supply': '0.0',
+            'lighting-cost-potential': '65', 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100',
+            'main-heating-controls': '', 'lodgement-datetime': '2022-06-15 08:38:02', 'flat-top-storey': '',
+            'current-energy-rating': 'D', 'secondheat-description': 'Room heaters, electric',
+            'walls-env-eff': 'Average', 'transaction-type': 'ECO assessment', 'uprn': '100030952942',
+            'current-energy-efficiency': '68', 'energy-consumption-current': '227',
+            'mainheat-description': 'Boiler and radiators, mains gas', 'lighting-cost-current': '65',
+            'lodgement-date': '2022-06-15', 'extension-count': '0', 'mainheatc-env-eff': 'Good',
+            'lmk-key': 'ce181970b7077cb9b4626242bfb010b30a0e48541b5f22427e81f1adbeeec4f2', 'wind-turbine-count': '0',
+            'tenure': 'Owner-occupied', 'floor-level': '', 'potential-energy-efficiency': '85',
+            'hot-water-energy-eff': 'Good', 'low-energy-lighting': '100',
+            'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system'
+        }
+
+        cleaning_data = read_dataframe_from_s3_parquet(
+            bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+        )
+
+        cleaned = read_from_s3(
+            s3_file_name="cleaned_epc_data/cleaned.bson",
+            bucket_name="retrofit-data-dev"
+        )
+        cleaned = msgpack.unpackb(cleaned, raw=False)
+
+        photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+        epc = EPCRecord(
+            epc_records={
+                'original_epc': starting_epc,
+                'full_sap_epc': {},
+                'old_data': []
+            },
+            run_mode="newdata",
+            cleaning_data=cleaning_data
+        )
+
+        home = Property(
+            id=0,
+            address="",
+            postcode="",
+            epc_record=epc,
+            already_installed={},
+            non_invasive_recommendations={},
+        )
+        home.in_conservation_area = False
+        home.is_listed = False
+        home.is_heritage = False
+        home.restricted_measures = True
+        home.get_components(
+            cleaned=cleaned,
+            photo_supply_lookup=photo_supply_lookup,
+            floor_area_decile_thresholds=floor_area_decile_thresholds
+        )
+
+        recommender = SolarPvRecommendations(property_instance=home)
+        recommender.recommend(phase=0)
+
+        coverage_40_percent = [x for x in recommender.recommendation if x["photo_supply"] == 40]
+        assert len(coverage_40_percent) == 2
+
+        property_recommendations = Recommendations.insert_temp_recommendation_id([coverage_40_percent])
+
+        home.create_base_difference_epc_record(cleaned_lookup=cleaned)
+        home.adjust_difference_record_with_recommendations(
+            property_recommendations, []
+        )
+
+        scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop(
+            columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+                     "carbon_ending"]
+        )
+
+        model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
+        model_api.MODEL_PREFIXES = ["sap_change_predictions"]
+
+        predictions_dict = model_api.predict_all(
+            df=scoring_data,
+            bucket="retrofit-data-dev",
+            prediction_buckets={
+                "sap_change_predictions": "retrofit-sap-predictions-dev",
+            }
+        )
+
+        assert predictions_dict["sap_change_predictions"]["predictions"].tolist() == [87.1, 87.1]
+        assert ending_epc["current-energy-efficiency"] == '87'
+        assert starting_epc["current-energy-efficiency"] == '68'

From cc170e1fa3d7cfdbb138538b318581ee0df0d2b1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 9 May 2024 16:32:22 +0100
Subject: [PATCH 33/58] set up vander elliot asset list

---
 .idea/Model.iml                      |   2 +-
 .idea/misc.xml                       |   2 +-
 backend/ml_models/Valuation.py       |  11 +++
 etl/customers/vander_elliot/pilot.py | 105 +++++++++++++++++++++++++++
 4 files changed, 118 insertions(+), 2 deletions(-)
 create mode 100644 etl/customers/vander_elliot/pilot.py

diff --git a/.idea/Model.iml b/.idea/Model.iml
index b0f9c00d..4413bb06 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 1122b380..6f308057 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/backend/ml_models/Valuation.py b/backend/ml_models/Valuation.py
index 5c781979..cfd775e7 100644
--- a/backend/ml_models/Valuation.py
+++ b/backend/ml_models/Valuation.py
@@ -79,6 +79,17 @@ class PropertyValuation:
         100070520130: 177_000,  # Based on Zoopla
         100070333957: 185_000,  # Based on Zoopla
         100070543258: 211_000,  # Based on Zoopla
+        # Vander Elliot Pilot - search by going to https://www.zoopla.co.uk/property/uprn/{uprn}/
+        41018850: 104_000,  # Based on Zoopla
+        38237316: 74_000,  # Based on Zoopla
+        38237317: 74_000,  # Based on Zoopla
+        41052320: 70_000,  # Based on Zoopla
+        41052321: 70_000,  # Based on Zoopla
+        41052322: 38_000,  # Based on Zoopla
+        41222759: 38_000,  # Based on Zoopla
+        41222760: 46_000,  # Based on Zoopla
+        41222761: 270_000,  # Based on Zoopla
+        41212534: 38_000,  # Based on Zoopla
     }
 
     # We base our valuation uplifts on a number of sources
diff --git a/etl/customers/vander_elliot/pilot.py b/etl/customers/vander_elliot/pilot.py
new file mode 100644
index 00000000..3c52869b
--- /dev/null
+++ b/etl/customers/vander_elliot/pilot.py
@@ -0,0 +1,105 @@
+import pandas as pd
+from utils.s3 import save_csv_to_s3
+
+EPC_C_PORTFOLIO_ID = 78
+EPC_B_PORTFOLIO_ID = 79
+USER_ID = 8
+
+
+def app():
+    """
+    This code sets up the asset list for the 9 property portfolio for the pilot
+    :return:
+    """
+
+    asset_list = [
+        {
+            "address": "79 Clare Road",
+            "postcode": "L20 9LZ",
+            "uprn": 41018850,  # 3 bedroom property
+        },
+        {
+            "address": "Flat 1, 29 Bedford Road",
+            "postcode": "L4 5PS",
+            "uprn": 38237316  # Single dewlling converted into two flats
+        },
+        {
+            "address": "Flat 2, 29 Bedford Road",
+            "postcode": "L4 5PS",
+            "uprn": 38237317  # Single dewlling converted into two flats
+        },
+        # 7 Flats above a domestic unit
+        {
+            "address": "Flat 1, 2 Linacre Lane",
+            "postcode": "L20 5AH",
+            "uprn": 41052320
+        },
+        {
+            "address": "Flat 2, 2 Linacre Lane",
+            "postcode": "L20 5AH",
+            "uprn": 41052321,
+        },
+        {
+            "address": "Flat 3, 2 Linacre Lane",
+            "postcode": "L20 5AH",
+            "uprn": 41052322,
+        },
+        {
+            "address": "Flat 4, 2 Linacre Lane",
+            "postcode": "L20 5AH",
+            "uprn": 41222759,
+        },
+        {
+            "address": "Flat 1, 4 Linacre Lane",
+            "postcode": "L20 5AH",
+            "uprn": 41222760,
+        },
+        {
+            "address": "Flat 2, 4 Linacre Lane",
+            "postcode": "L20 5AH",
+            "uprn": 41222761,
+        },
+        {
+            "address": "Flat 3, 4 Linacre Lane",
+            "postcode": "L20 5AH",
+            "uprn": 41212534,
+        },
+    ]
+
+    asset_list = pd.DataFrame(asset_list)
+
+    # Store the asset list in s3
+    filename = f"{USER_ID}/{EPC_C_PORTFOLIO_ID}/pilot.csv"
+    save_csv_to_s3(
+        dataframe=asset_list,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    # EPC C portoflio
+    body = {
+        "portfolio_id": str(EPC_C_PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increase EPC",
+        "goal_value": "C",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": "",
+        "budget": None,
+    }
+    print(body)
+
+    # EPC B portoflio
+    body = {
+        "portfolio_id": str(EPC_B_PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increase EPC",
+        "goal_value": "B",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": "",
+        "budget": None,
+    }
+    print(body)

From 8d783c0e6d2dd2fc83fdafa52414fe451c7e4124 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 20 May 2024 18:05:47 +0100
Subject: [PATCH 34/58] added audit scenarios for ha analysis

---
 .idea/Model.iml                               |   2 +-
 .idea/misc.xml                                |   2 +-
 backend/apis/GoogleSolarApi.py                | 334 ++++++++++++++++++
 .../places_for_people/route_march.py          | 165 ++++++++-
 .../ha_15_32/ha_analysis_batch_3.py           | 204 ++++++-----
 5 files changed, 618 insertions(+), 89 deletions(-)
 create mode 100644 backend/apis/GoogleSolarApi.py

diff --git a/.idea/Model.iml b/.idea/Model.iml
index 4413bb06..b0f9c00d 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 6f308057..1122b380 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py
new file mode 100644
index 00000000..86324c58
--- /dev/null
+++ b/backend/apis/GoogleSolarApi.py
@@ -0,0 +1,334 @@
+from backend.Property import Property
+from backend.SearchEpc import SearchEpc
+from etl.epc.Record import EPCRecord
+from dotenv import load_dotenv
+from utils.s3 import read_dataframe_from_s3_parquet
+import os
+import requests
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+# This is for 6 Laura Close, Tintagel, PL34 0EB (same property that Cotswolrd energy used)
+uprn = 100040099104
+
+cleaning_data = read_dataframe_from_s3_parquet(
+    bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+)
+
+searcher = SearchEpc(address1="6 Laura Close", postcode="PL34 0EB", uprn=uprn, auth_token=EPC_AUTH_TOKEN, os_api_key="")
+
+searcher.find_property(skip_os=True)
+
+epc_records = {
+    'original_epc': searcher.newest_epc.copy(),
+    'full_sap_epc': searcher.full_sap_epc.copy(),
+    'old_data': searcher.older_epcs.copy(),
+}
+
+epc = EPCRecord(
+    epc_records=epc_records,
+    run_mode="newdata",
+    cleaning_data=cleaning_data
+)
+
+uprn_filenames = read_dataframe_from_s3_parquet(
+    bucket_name="retrofit-data-dev", file_key="spatial/filename_meta.parquet"
+)
+
+p = Property(
+    id=0,
+    address=searcher.address_clean,
+    postcode=searcher.postcode_clean,
+    epc_record=epc,
+    already_installed={},
+    non_invasive_recommendations={},
+)
+
+p.get_spatial_data(uprn_filenames)
+
+longitude = p.spatial["longitude"]
+latitude = p.spatial["latitude"]
+
+api_key = "AIzaSyCIz8Psu5h-1txuDX0rQpUTgkvdj8yohqU"
+url = 'https://solar.googleapis.com/v1/solarPotential'
+params = {
+    'location.latitude': f'{latitude:.5f}',
+    'location.longitude': f'{longitude:.5f}',
+    'requiredQuality': "MEDIUM",
+    'key': api_key
+}
+
+insights_url = 'https://solar.googleapis.com/v1/buildingInsights:findClosest'
+
+# Make the GET request to the Solar API
+insights_response = requests.get(insights_url, params=params)
+insights_data = insights_response.json()
+
+solar_potential = insights_data["solarPotential"]
+
+from pprint import pprint
+
+pprint(solar_potential)
+
+# This is the size of the panels used in the calculation - 400 watt
+solar_potential["panelCapacityWatts"]
+# Height of the panels used
+solar_potential["panelHeightMeters"]
+# Width of the panels used
+solar_potential["panelWidthMeters"]
+
+solar_potential["wholeRoofStats"]
+
+# Copy of response for testing:
+# {'name': 'buildings/ChIJ2yC6t4KEa0gRh2TIssogI7k', 'center': {'latitude': 50.667375, 'longitude': -4.7416833},
+# 'imageryDate': {'year': 2021, 'month': 7, 'day': 19}, 'regionCode': 'GB', 'solarPotential': {'maxArrayPanelsCount':
+# 39, 'maxArrayAreaMeters2': 76.578636, 'maxSunshineHoursPerYear': 1172.0627, 'carbonOffsetFactorKgPerMwh':
+# 478.99942, 'wholeRoofStats': {'areaMeters2': 129.65686, 'sunshineQuantiles': [537, 738.3836, 805.62445, 842.6802,
+# 909.8431, 972.15234, 1036.1013, 1092.051, 1135.8192, 1163.1444, 1193.6012], 'groundAreaMeters2': 112.33},
+# 'roofSegmentStats': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'stats': {'areaMeters2': 44.08321,
+# 'sunshineQuantiles': [614, 940.86975, 982.39124, 1057.0664, 1109.6869, 1137.5837, 1152.9211, 1163.1106, 1168.2212,
+# 1170.8883, 1193.6012], 'groundAreaMeters2': 37.61}, 'center': {'latitude': 50.6673664, 'longitude':
+# -4.741714099999999}, 'boundingBox': {'sw': {'latitude': 50.6673354, 'longitude': -4.741777}, 'ne': {'latitude':
+# 50.6674029, 'longitude': -4.7416472}}, 'planeHeightAtCenterMeters': 93.0221}, {'pitchDegrees': 34.39779,
+# 'azimuthDegrees': 31.74401, 'stats': {'areaMeters2': 44.622986, 'sunshineQuantiles': [537, 671.49774, 733.84985,
+# 780.82733, 801.4026, 814.0189, 824.0077, 847.77484, 895.08295, 950.1469, 1123.3503], 'groundAreaMeters2': 36.82},
+# 'center': {'latitude': 50.6673966, 'longitude': -4.7416813}, 'boundingBox': {'sw': {'latitude': 50.667361,
+# 'longitude': -4.7417497}, 'ne': {'latitude': 50.6674303, 'longitude': -4.741615599999999}},
+# 'planeHeightAtCenterMeters': 92.87593}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099, 'stats': {
+# 'areaMeters2': 17.074476, 'sunshineQuantiles': [644.71136, 731.0546, 782.89813, 842.7107, 908.55585, 966.6212,
+# 1010.6367, 1038.2543, 1053.2788, 1090.6831, 1128.0178], 'groundAreaMeters2': 17.050001}, 'center': {'latitude':
+# 50.66740850000001, 'longitude': -4.7416025}, 'boundingBox': {'sw': {'latitude': 50.6673895, 'longitude':
+# -4.7416436}, 'ne': {'latitude': 50.667431199999996, 'longitude': -4.7415572}}, 'planeHeightAtCenterMeters':
+# 90.630356}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162, 'stats': {'areaMeters2': 13.501617,
+# 'sunshineQuantiles': [749, 976.85345, 1059.0062, 1081.6173, 1097.4441, 1110.3171, 1128.2186, 1133.9421, 1142.068,
+# 1148.2168, 1157.632], 'groundAreaMeters2': 12.02}, 'center': {'latitude': 50.667315699999996, 'longitude':
+# -4.741675400000001}, 'boundingBox': {'sw': {'latitude': 50.667291399999996, 'longitude': -4.7417066},
+# 'ne': {'latitude': 50.6673372, 'longitude': -4.741648400000001}}, 'planeHeightAtCenterMeters': 92.36334},
+# {'pitchDegrees': 31.666294, 'azimuthDegrees': 308.42334, 'stats': {'areaMeters2': 10.374564, 'sunshineQuantiles': [
+# 617.9507, 752.2504, 847.66315, 872.0505, 881.26227, 900.9639, 933.3188, 967.4747, 1000.8129, 1038.3002, 1105.545],
+# 'groundAreaMeters2': 8.83}, 'center': {'latitude': 50.6673295, 'longitude': -4.7417128}, 'boundingBox': {'sw': {
+# 'latitude': 50.6673134, 'longitude': -4.7417422}, 'ne': {'latitude': 50.6673413, 'longitude': -4.7416775}},
+# 'planeHeightAtCenterMeters': 92.31146}], 'solarPanelConfigs': [{'panelsCount': 4, 'yearlyEnergyDcKwh': 1867.1516,
+# 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 4,
+# 'yearlyEnergyDcKwh': 1867.1515, 'segmentIndex': 0}]}, {'panelsCount': 5, 'yearlyEnergyDcKwh': 2335.0068,
+# 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 5,
+# 'yearlyEnergyDcKwh': 2335.0068, 'segmentIndex': 0}]}, {'panelsCount': 6, 'yearlyEnergyDcKwh': 2799.8508,
+# 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 6,
+# 'yearlyEnergyDcKwh': 2799.8508, 'segmentIndex': 0}]}, {'panelsCount': 7, 'yearlyEnergyDcKwh': 3264.6506,
+# 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 7,
+# 'yearlyEnergyDcKwh': 3264.6506, 'segmentIndex': 0}]}, {'panelsCount': 8, 'yearlyEnergyDcKwh': 3726.2405,
+# 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 8,
+# 'yearlyEnergyDcKwh': 3726.2405, 'segmentIndex': 0}]}, {'panelsCount': 9, 'yearlyEnergyDcKwh': 4187.721,
+# 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 9,
+# 'yearlyEnergyDcKwh': 4187.721, 'segmentIndex': 0}]}, {'panelsCount': 10, 'yearlyEnergyDcKwh': 4646.094,
+# 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 10,
+# 'yearlyEnergyDcKwh': 4646.094, 'segmentIndex': 0}]}, {'panelsCount': 11, 'yearlyEnergyDcKwh': 5103.777,
+# 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 10,
+# 'yearlyEnergyDcKwh': 4646.094, 'segmentIndex': 0}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162,
+# 'panelsCount': 1, 'yearlyEnergyDcKwh': 457.68268, 'segmentIndex': 3}]}, {'panelsCount': 12, 'yearlyEnergyDcKwh':
+# 5559.845, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 10,
+# 'yearlyEnergyDcKwh': 4646.094, 'segmentIndex': 0}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162,
+# 'panelsCount': 2, 'yearlyEnergyDcKwh': 913.7509, 'segmentIndex': 3}]}, {'panelsCount': 13, 'yearlyEnergyDcKwh':
+# 6013.053, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 11,
+# 'yearlyEnergyDcKwh': 5099.302, 'segmentIndex': 0}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162,
+# 'panelsCount': 2, 'yearlyEnergyDcKwh': 913.7509, 'segmentIndex': 3}]}, {'panelsCount': 14, 'yearlyEnergyDcKwh':
+# 6461.664, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 12,
+# 'yearlyEnergyDcKwh': 5547.9126, 'segmentIndex': 0}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162,
+# 'panelsCount': 2, 'yearlyEnergyDcKwh': 913.7509, 'segmentIndex': 3}]}, {'panelsCount': 15, 'yearlyEnergyDcKwh':
+# 6902.33, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 12,
+# 'yearlyEnergyDcKwh': 5547.9126, 'segmentIndex': 0}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162,
+# 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}]}, {'panelsCount': 16, 'yearlyEnergyDcKwh':
+# 7321.6436, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 12,
+# 'yearlyEnergyDcKwh': 5547.9126, 'segmentIndex': 0}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099,
+# 'panelsCount': 1, 'yearlyEnergyDcKwh': 419.31348, 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees':
+# 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}]}, {'panelsCount': 17,
+# 'yearlyEnergyDcKwh': 7740.388, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331,
+# 'panelsCount': 12, 'yearlyEnergyDcKwh': 5547.9126, 'segmentIndex': 0}, {'pitchDegrees': 3.0681775,
+# 'azimuthDegrees': 301.1099, 'panelsCount': 2, 'yearlyEnergyDcKwh': 838.0579, 'segmentIndex': 2}, {'pitchDegrees':
+# 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}]},
+# {'panelsCount': 18, 'yearlyEnergyDcKwh': 8154.265, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022,
+# 'azimuthDegrees': 218.25331, 'panelsCount': 13, 'yearlyEnergyDcKwh': 5961.7896, 'segmentIndex': 0},
+# {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099, 'panelsCount': 2, 'yearlyEnergyDcKwh': 838.0579,
+# 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh':
+# 1354.4171, 'segmentIndex': 3}]}, {'panelsCount': 19, 'yearlyEnergyDcKwh': 8566.032, 'roofSegmentSummaries': [{
+# 'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 14, 'yearlyEnergyDcKwh': 6373.556,
+# 'segmentIndex': 0}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099, 'panelsCount': 2, 'yearlyEnergyDcKwh':
+# 838.0579, 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3,
+# 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}]}, {'panelsCount': 20, 'yearlyEnergyDcKwh': 8976.624,
+# 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 15,
+# 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099,
+# 'panelsCount': 2, 'yearlyEnergyDcKwh': 838.0579, 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees':
+# 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}]}, {'panelsCount': 21,
+# 'yearlyEnergyDcKwh': 9380.78, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331,
+# 'panelsCount': 15, 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0}, {'pitchDegrees': 3.0681775,
+# 'azimuthDegrees': 301.1099, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1242.214, 'segmentIndex': 2}, {'pitchDegrees':
+# 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}]},
+# {'panelsCount': 22, 'yearlyEnergyDcKwh': 9784.078, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022,
+# 'azimuthDegrees': 218.25331, 'panelsCount': 15, 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0},
+# {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099, 'panelsCount': 4, 'yearlyEnergyDcKwh': 1645.5122,
+# 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh':
+# 1354.4171, 'segmentIndex': 3}]}, {'panelsCount': 23, 'yearlyEnergyDcKwh': 10162.354, 'roofSegmentSummaries': [{
+# 'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 15, 'yearlyEnergyDcKwh': 6784.1484,
+# 'segmentIndex': 0}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099, 'panelsCount': 4, 'yearlyEnergyDcKwh':
+# 1645.5122, 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3,
+# 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees': 31.666294, 'azimuthDegrees': 308.42334,
+# 'panelsCount': 1, 'yearlyEnergyDcKwh': 378.2754, 'segmentIndex': 4}]}, {'panelsCount': 24, 'yearlyEnergyDcKwh':
+# 10535.894, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 15,
+# 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099,
+# 'panelsCount': 5, 'yearlyEnergyDcKwh': 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees':
+# 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees': 31.666294,
+# 'azimuthDegrees': 308.42334, 'panelsCount': 1, 'yearlyEnergyDcKwh': 378.2754, 'segmentIndex': 4}]}, {'panelsCount':
+# 25, 'yearlyEnergyDcKwh': 10901.273, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees':
+# 218.25331, 'panelsCount': 15, 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0}, {'pitchDegrees': 3.0681775,
+# 'azimuthDegrees': 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh': 2019.0519, 'segmentIndex': 2}, {'pitchDegrees':
+# 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3},
+# {'pitchDegrees': 31.666294, 'azimuthDegrees': 308.42334, 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497,
+# 'segmentIndex': 4}]}, {'panelsCount': 26, 'yearlyEnergyDcKwh': 11242.756, 'roofSegmentSummaries': [{'pitchDegrees':
+# 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 15, 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0},
+# {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401, 'panelsCount': 1, 'yearlyEnergyDcKwh': 341.4827,
+# 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh':
+# 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3,
+# 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees': 31.666294, 'azimuthDegrees': 308.42334,
+# 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]}, {'panelsCount': 27, 'yearlyEnergyDcKwh':
+# 11579.401, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 15,
+# 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0}, {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401,
+# 'panelsCount': 2, 'yearlyEnergyDcKwh': 678.1277, 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees':
+# 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh': 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596,
+# 'azimuthDegrees': 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees':
+# 31.666294, 'azimuthDegrees': 308.42334, 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]},
+# {'panelsCount': 28, 'yearlyEnergyDcKwh': 11919.106, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022,
+# 'azimuthDegrees': 218.25331, 'panelsCount': 15, 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0},
+# {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1017.83356,
+# 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh':
+# 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3,
+# 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees': 31.666294, 'azimuthDegrees': 308.42334,
+# 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]}, {'panelsCount': 29, 'yearlyEnergyDcKwh':
+# 12255.358, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 15,
+# 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0}, {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401,
+# 'panelsCount': 4, 'yearlyEnergyDcKwh': 1354.0854, 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees':
+# 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh': 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596,
+# 'azimuthDegrees': 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees':
+# 31.666294, 'azimuthDegrees': 308.42334, 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]},
+# {'panelsCount': 30, 'yearlyEnergyDcKwh': 12586.448, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022,
+# 'azimuthDegrees': 218.25331, 'panelsCount': 15, 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0},
+# {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401, 'panelsCount': 5, 'yearlyEnergyDcKwh': 1685.1748,
+# 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh':
+# 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3,
+# 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees': 31.666294, 'azimuthDegrees': 308.42334,
+# 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]}, {'panelsCount': 31, 'yearlyEnergyDcKwh':
+# 12911.502, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 15,
+# 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0}, {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401,
+# 'panelsCount': 6, 'yearlyEnergyDcKwh': 2010.2289, 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees':
+# 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh': 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596,
+# 'azimuthDegrees': 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees':
+# 31.666294, 'azimuthDegrees': 308.42334, 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]},
+# {'panelsCount': 32, 'yearlyEnergyDcKwh': 13233.139, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022,
+# 'azimuthDegrees': 218.25331, 'panelsCount': 15, 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0},
+# {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401, 'panelsCount': 7, 'yearlyEnergyDcKwh': 2331.8652,
+# 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh':
+# 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3,
+# 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees': 31.666294, 'azimuthDegrees': 308.42334,
+# 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]}, {'panelsCount': 33, 'yearlyEnergyDcKwh':
+# 13554.602, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 15,
+# 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0}, {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401,
+# 'panelsCount': 8, 'yearlyEnergyDcKwh': 2653.3286, 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees':
+# 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh': 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596,
+# 'azimuthDegrees': 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees':
+# 31.666294, 'azimuthDegrees': 308.42334, 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]},
+# {'panelsCount': 34, 'yearlyEnergyDcKwh': 13893.903, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022,
+# 'azimuthDegrees': 218.25331, 'panelsCount': 15, 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0},
+# {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401, 'panelsCount': 9, 'yearlyEnergyDcKwh': 2992.6301,
+# 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh':
+# 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3,
+# 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees': 31.666294, 'azimuthDegrees': 308.42334,
+# 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]}, {'panelsCount': 35, 'yearlyEnergyDcKwh':
+# 14221.166, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 15,
+# 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0}, {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401,
+# 'panelsCount': 10, 'yearlyEnergyDcKwh': 3319.893, 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees':
+# 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh': 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596,
+# 'azimuthDegrees': 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees':
+# 31.666294, 'azimuthDegrees': 308.42334, 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]},
+# {'panelsCount': 36, 'yearlyEnergyDcKwh': 14536.154, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022,
+# 'azimuthDegrees': 218.25331, 'panelsCount': 15, 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0},
+# {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401, 'panelsCount': 11, 'yearlyEnergyDcKwh': 3634.8809,
+# 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh':
+# 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3,
+# 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees': 31.666294, 'azimuthDegrees': 308.42334,
+# 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]}, {'panelsCount': 37, 'yearlyEnergyDcKwh':
+# 14850.317, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 15,
+# 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0}, {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401,
+# 'panelsCount': 12, 'yearlyEnergyDcKwh': 3949.0444, 'segmentIndex': 1}, {'pitchDegrees': 3.0681775,
+# 'azimuthDegrees': 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh': 2019.0519, 'segmentIndex': 2}, {'pitchDegrees':
+# 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3},
+# {'pitchDegrees': 31.666294, 'azimuthDegrees': 308.42334, 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497,
+# 'segmentIndex': 4}]}, {'panelsCount': 38, 'yearlyEnergyDcKwh': 15160.658, 'roofSegmentSummaries': [{'pitchDegrees':
+# 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 15, 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0},
+# {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401, 'panelsCount': 13, 'yearlyEnergyDcKwh': 4259.385,
+# 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees': 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh':
+# 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596, 'azimuthDegrees': 132.60162, 'panelsCount': 3,
+# 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees': 31.666294, 'azimuthDegrees': 308.42334,
+# 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]}, {'panelsCount': 39, 'yearlyEnergyDcKwh':
+# 15438.986, 'roofSegmentSummaries': [{'pitchDegrees': 31.443022, 'azimuthDegrees': 218.25331, 'panelsCount': 15,
+# 'yearlyEnergyDcKwh': 6784.1484, 'segmentIndex': 0}, {'pitchDegrees': 34.39779, 'azimuthDegrees': 31.74401,
+# 'panelsCount': 14, 'yearlyEnergyDcKwh': 4537.713, 'segmentIndex': 1}, {'pitchDegrees': 3.0681775, 'azimuthDegrees':
+# 301.1099, 'panelsCount': 5, 'yearlyEnergyDcKwh': 2019.0519, 'segmentIndex': 2}, {'pitchDegrees': 27.093596,
+# 'azimuthDegrees': 132.60162, 'panelsCount': 3, 'yearlyEnergyDcKwh': 1354.4171, 'segmentIndex': 3}, {'pitchDegrees':
+# 31.666294, 'azimuthDegrees': 308.42334, 'panelsCount': 2, 'yearlyEnergyDcKwh': 743.65497, 'segmentIndex': 4}]}],
+# 'panelCapacityWatts': 400, 'panelHeightMeters': 1.879, 'panelWidthMeters': 1.045, 'panelLifetimeYears': 20,
+# 'buildingStats': {'areaMeters2': 138.38115, 'sunshineQuantiles': [537, 728.5604, 799.23975, 833.99713, 900.88086,
+# 959.65875, 1024.2743, 1086.1285, 1132.8774, 1162.1904, 1193.6012], 'groundAreaMeters2': 117.16}, 'solarPanels': [{
+# 'center': {'latitude': 50.667371499999994, 'longitude': -4.7417235}, 'orientation': 'LANDSCAPE',
+# 'yearlyEnergyDcKwh': 468.5037, 'segmentIndex': 0}, {'center': {'latitude': 50.6673614, 'longitude': -4.7417023},
+# 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 467.61072, 'segmentIndex': 0}, {'center': {'latitude':
+# 50.667365100000005, 'longitude': -4.7417311}, 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 465.55005,
+# 'segmentIndex': 0}, {'center': {'latitude': 50.6673512, 'longitude': -4.741681000000001}, 'orientation':
+# 'LANDSCAPE', 'yearlyEnergyDcKwh': 465.48712, 'segmentIndex': 0}, {'center': {'latitude': 50.667357599999995,
+# 'longitude': -4.7416734}, 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 467.8553, 'segmentIndex': 0},
+# {'center': {'latitude': 50.6673779, 'longitude': -4.741715999999999}, 'orientation': 'LANDSCAPE',
+# 'yearlyEnergyDcKwh': 464.84396, 'segmentIndex': 0}, {'center': {'latitude': 50.6673678, 'longitude': -4.7416947},
+# 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 464.79984, 'segmentIndex': 0}, {'center': {'latitude': 50.6673549,
+# 'longitude': -4.7417098}, 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 461.58975, 'segmentIndex': 0},
+# {'center': {'latitude': 50.6673816, 'longitude': -4.7417448}, 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh':
+# 461.48065, 'segmentIndex': 0}, {'center': {'latitude': 50.6673881, 'longitude': -4.7417372}, 'orientation':
+# 'LANDSCAPE', 'yearlyEnergyDcKwh': 458.3733, 'segmentIndex': 0}, {'center': {'latitude': 50.6673149, 'longitude':
+# -4.7416768}, 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 457.68268, 'segmentIndex': 3}, {'center': {
+# 'latitude': 50.6673204, 'longitude': -4.7416867}, 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 456.06827,
+# 'segmentIndex': 3}, {'center': {'latitude': 50.667375199999995, 'longitude': -4.7417524}, 'orientation':
+# 'LANDSCAPE', 'yearlyEnergyDcKwh': 453.20776, 'segmentIndex': 0}, {'center': {'latitude': 50.667364, 'longitude':
+# -4.7416659}, 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 448.61087, 'segmentIndex': 0}, {'center': {
+# 'latitude': 50.6673094, 'longitude': -4.741666899999999}, 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh':
+# 440.66626, 'segmentIndex': 3}, {'center': {'latitude': 50.667403799999995, 'longitude': -4.741588900000001},
+# 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 419.31348, 'segmentIndex': 2}, {'center': {'latitude':
+# 50.66740850000001, 'longitude': -4.7416016999999995}, 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 418.74448,
+# 'segmentIndex': 2}, {'center': {'latitude': 50.6673688, 'longitude': -4.7417599}, 'orientation': 'LANDSCAPE',
+# 'yearlyEnergyDcKwh': 413.877, 'segmentIndex': 0}, {'center': {'latitude': 50.667348499999996, 'longitude':
+# -4.7417174}, 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 411.76657, 'segmentIndex': 0}, {'center': {
+# 'latitude': 50.6673587, 'longitude': -4.7417387}, 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 410.5925,
+# 'segmentIndex': 0}, {'center': {'latitude': 50.6673992, 'longitude': -4.7415761}, 'orientation': 'LANDSCAPE',
+# 'yearlyEnergyDcKwh': 404.15607, 'segmentIndex': 2}, {'center': {'latitude': 50.6674132, 'longitude': -4.7416145},
+# 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh': 403.29822, 'segmentIndex': 2}, {'center': {'latitude': 50.6673324,
+# 'longitude': -4.7417015}, 'orientation': 'PORTRAIT', 'yearlyEnergyDcKwh': 378.2754, 'segmentIndex': 4}, {'center':
+# {'latitude': 50.667417799999996, 'longitude': -4.7416273}, 'orientation': 'LANDSCAPE', 'yearlyEnergyDcKwh':
+# 373.53967, 'segmentIndex': 2}, {'center': {'latitude': 50.667324900000004, 'longitude': -4.7417104}, 'orientation':
+# 'PORTRAIT', 'yearlyEnergyDcKwh': 365.37958, 'segmentIndex': 4}, {'center': {'latitude': 50.6674043, 'longitude':
+# -4.741680800000001}, 'orientation': 'PORTRAIT', 'yearlyEnergyDcKwh': 341.4827, 'segmentIndex': 1}, {'center': {
+# 'latitude': 50.667392299999996, 'longitude': -4.7416919}, 'orientation': 'PORTRAIT', 'yearlyEnergyDcKwh':
+# 336.64502, 'segmentIndex': 1}, {'center': {'latitude': 50.667397, 'longitude': -4.741704599999999}, 'orientation':
+# 'PORTRAIT', 'yearlyEnergyDcKwh': 339.7059, 'segmentIndex': 1}, {'center': {'latitude': 50.6674018, 'longitude':
+# -4.7417174}, 'orientation': 'PORTRAIT', 'yearlyEnergyDcKwh': 336.25195, 'segmentIndex': 1}, {'center': {'latitude':
+# 50.6673875, 'longitude': -4.7416791}, 'orientation': 'PORTRAIT', 'yearlyEnergyDcKwh': 331.08936, 'segmentIndex':
+# 1}, {'center': {'latitude': 50.6674065, 'longitude': -4.7417301}, 'orientation': 'PORTRAIT', 'yearlyEnergyDcKwh':
+# 325.05405, 'segmentIndex': 1}, {'center': {'latitude': 50.6673828, 'longitude': -4.7416664}, 'orientation':
+# 'PORTRAIT', 'yearlyEnergyDcKwh': 321.63647, 'segmentIndex': 1}, {'center': {'latitude': 50.667378, 'longitude':
+# -4.741653599999999}, 'orientation': 'PORTRAIT', 'yearlyEnergyDcKwh': 321.46332, 'segmentIndex': 1}, {'center': {
+# 'latitude': 50.667373299999994, 'longitude': -4.7416409}, 'orientation': 'PORTRAIT', 'yearlyEnergyDcKwh': 339.3016,
+# 'segmentIndex': 1}, {'center': {'latitude': 50.6673853, 'longitude': -4.7416298}, 'orientation': 'PORTRAIT',
+# 'yearlyEnergyDcKwh': 327.26282, 'segmentIndex': 1}, {'center': {'latitude': 50.667399499999995, 'longitude':
+# -4.741668}, 'orientation': 'PORTRAIT', 'yearlyEnergyDcKwh': 314.9878, 'segmentIndex': 1}, {'center': {'latitude':
+# 50.6673948, 'longitude': -4.7416553}, 'orientation': 'PORTRAIT', 'yearlyEnergyDcKwh': 314.16364, 'segmentIndex':
+# 1}, {'center': {'latitude': 50.667390000000005, 'longitude': -4.7416425}, 'orientation': 'PORTRAIT',
+# 'yearlyEnergyDcKwh': 310.3404, 'segmentIndex': 1}, {'center': {'latitude': 50.6674186, 'longitude': -4.7417191},
+# 'orientation': 'PORTRAIT', 'yearlyEnergyDcKwh': 278.3281, 'segmentIndex': 1}]}, 'boundingBox': {'sw': {'latitude':
+# 50.6672904, 'longitude': -4.741778}, 'ne': {'latitude': 50.667431199999996, 'longitude': -4.7415536}},
+# 'imageryQuality': 'MEDIUM', 'imageryProcessedDate': {'year': 2024, 'month': 4, 'day': 18}}
diff --git a/etl/customers/places_for_people/route_march.py b/etl/customers/places_for_people/route_march.py
index c38c71d3..5da1c2f7 100644
--- a/etl/customers/places_for_people/route_march.py
+++ b/etl/customers/places_for_people/route_march.py
@@ -1,4 +1,5 @@
 import os
+import time
 
 import pandas as pd
 from tqdm import tqdm
@@ -33,7 +34,7 @@ def app():
         lst = [
             pfp_property["ADDRESS"],
             pfp_property["ADDRESS.1"],
-            pfp_property["ADDRESS.2"],
+            # pfp_property["ADDRESS.2"],
             pfp_property["POSTCODE"]
         ]
         lst = [str(x).strip() for x in lst if not pd.isnull(x)]
@@ -135,3 +136,165 @@ def app():
     # Store as an excel
     filename = "Places For People EPC data.xlsx"
     asset_list.to_excel(filename, index=False)
+
+
+# TODO: TEMP
+# This script takes in a a list of properties
+# Will be postcode and address
+
+import requests
+import numpy as np
+import pandas as pd
+from bs4 import BeautifulSoup
+from tqdm import tqdm
+
+SEARCH_POSTCODE_URL = ("https://find-energy-certificate.service.gov.uk/find-a-certificate/search-by-postcode?postcode"
+                       "={postcode_input}")
+BASE_ENERGY_URL = "https://find-energy-certificate.service.gov.uk"
+
+
+def retrieve_find_my_epc_data(postcode: str, address: str):
+    """
+    For a post code and address, we pull out all the required data from the find my epc website
+    """
+
+    headers = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
+                      'Chrome/111.0.0.0 Safari/537.36'}
+    postcode_input = postcode.replace(" ", "+")
+    postcode_search = SEARCH_POSTCODE_URL.format(postcode_input=postcode_input)
+    postcode_response = requests.get(postcode_search, headers=headers)
+
+    postcode_res = BeautifulSoup(postcode_response.text)
+    address_links_full = postcode_res.findAll('a', {'class': 'govuk-link', 'rel': 'nofollow'})
+    address_links = {element.text.lstrip().rstrip(): BASE_ENERGY_URL + element['href'] for element in
+                     address_links_full}
+
+    # TODO: to check the logic works for all cases but seems to be good
+    index_of_address = [key.lower().startswith(address) for key in list(address_links.keys())]
+    chosen_epc = address_links[list(address_links.keys())[np.where(index_of_address)[0][0]]]
+
+    epc_certificate = chosen_epc.split('/')[-1]
+
+    address_response = requests.get(chosen_epc, headers=headers)
+    address_res = BeautifulSoup(address_response.text)
+
+    # print("## Energy rating - current and potential")
+    ratings = address_res.find('desc', {'id': 'svg-desc'}).text
+
+    # print('### Current EPC rating')
+    current_rating = ratings.split(".")[0]
+    # print("##### " + current_rating)
+
+    # print('### Potential EPC rating')
+    potential_rating = ratings.split(".")[1]
+    # print("##### " + potential_rating)
+
+    new_property_df = pd.DataFrame(
+        {'address': [address],
+         'epc_certificate': [epc_certificate],
+         'current_epc_rating': [current_rating.split(' ')[-6]],
+         'current_epc_efficiency': [current_rating.split(' ')[-1]],
+         'potential_epc_rating': [potential_rating.split(' ')[-6]],
+         "potential_epc_efficiency": [potential_rating.split(' ')[-1]]}
+    )
+
+    # print("Find assessor")
+    assessor_block = address_res.find('div', {'class': 'epc-contact-assessor'})
+    assessor_fields = assessor_block.find_all('dd', {"class": 'govuk-summary-list__value govuk-!-width-one-half'})
+    assessor_name = assessor_fields[0].text.strip()
+    assessor_number = assessor_fields[1].text.strip()
+    assessor_email = assessor_fields[2].text.strip()
+
+    new_property_df['assessor_name'] = assessor_name
+    new_property_df['assessor_number'] = assessor_number
+    new_property_df['assessor_email'] = assessor_email
+
+    return new_property_df
+
+    # print('### Changes that can be made:')
+    # improvements = address_res.find('div', {"class": "govuk-body printable-area epb-recommended-improvements"})
+
+    # if improvements is None:
+    #     print("No changes suggested")
+    # else:
+    #     changes = improvements.find_all('h3')
+    #     changes_impact = improvements.find_all('dl', {"class": 'govuk-summary-list'})
+
+    #     for element in zip(changes, changes_impact):
+    #         improvement_header = element[0].text
+    #         print("#### " + improvement_header)
+
+    #         improvement_text = element[1].text
+    #         print(improvement_text)
+
+    #         col_name = improvement_header.split(":")[1]
+    #         cost = element[1].find('dd', {"class": "govuk-summary-list__value"}).text.lstrip().rstrip()
+
+    #         impact = element[1].find('text', {"class": "govuk-!-font-weight-bold"}).text.split(" ")
+    #         impact_num = impact[0]
+    #         impact_cat = impact[1]
+    #         print(cost)
+    #         new_property_df[col_name] = True
+    #         # cost_column = col_name + '-cost'
+    #         # new_property_df.assign(cost_column=cost)
+    #         new_property_df[col_name + '-cost'] = cost
+    #         new_property_df[col_name + '-impact_num'] = impact_num
+    #         new_property_df[col_name + '-impact_cat'] = impact_cat
+
+    #     data = pd.concat([data, new_property_df])
+    #     data.to_csv('./portfolio.csv')
+
+
+def main():
+    """
+    Main pipeline function to take in a predefined list of properties and extract names of contractors
+    """
+
+    # Load in list of properties
+    addresses_df = pd.read_excel("/Users/khalimconn-kowlessar/Downloads/Places For People EPC data.xlsx")
+    addresses_df["uprn"] = addresses_df["uprn"].astype("Int64").astype(str)
+    # 1256
+
+    find_my_epc_data_list = []
+    for i, row in tqdm(addresses_df.iterrows(), total=addresses_df.shape[0]):
+
+        if pd.isnull(row['Matched EPC Address']):
+            continue
+        # 10 second break every 50 iterations
+        if (i % 50 == 0) and (i != 0):
+            time.sleep(10)
+        time.sleep(1)
+        if row['Matched EPC Address'] == "6 CHURCHWOOD, CHURCH STREET, CRAMLINGTON":
+            address_data = retrieve_find_my_epc_data(
+                postcode=row['POSTCODE'],
+                address=" ".join([str(row["ADDRESS"]), row["ADDRESS.1"]]).lower()
+            )
+        else:
+            address_data = retrieve_find_my_epc_data(
+                postcode=row['POSTCODE'],
+                address=", ".join(row['Matched EPC Address'].split(", ")[:-1]).lower()
+            )
+
+        address_data.insert(0, "uprn", row["uprn"])
+
+        find_my_epc_data_list.append(address_data)
+
+    find_my_epc_data = pd.concat(find_my_epc_data_list)
+
+    find_my_epc_data.to_csv('find_my_epc_data.csv')
+
+    find_my_epc_data = find_my_epc_data.drop_duplicates("uprn")
+
+    # Match back to addresses
+    addresses_df2 = addresses_df.merge(
+        find_my_epc_data,
+        how="left",
+        on="uprn"
+    )
+
+    addresses_df2.to_excel("Places For People EPC data with surveyor.xlsx", index=False)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index f99c7b1a..aca36584 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -24,9 +24,13 @@ from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
 from etl.epc.DataProcessor import EPCDataProcessor
 from datetime import datetime
 
+import inspect
+
+src_file_path = inspect.getfile(lambda: None)
+
 EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
-ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
-DATA_FOLDER = Path(__file__).parent / "local_data" / "ha_data"
+ENV_FILE = Path(src_file_path).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
+DATA_FOLDER = Path(src_file_path).parent / "local_data" / "ha_data"
 
 logger = setup_logger()
 load_dotenv(ENV_FILE)
@@ -6127,6 +6131,9 @@ def classify_loft(x):
 
 
 def fml_analysis(loader):
+    # In the case of the optimistic scenario, we assume that the at-risk pipeline is still viable, just at a lower rate
+    optimistic_scenario_rate = 1500
+
     assumed_ciga_pass_rate = 0.731
     has_bruh = [
         "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13",
@@ -6224,6 +6231,7 @@ def fml_analysis(loader):
         if fuck_this.shape[0] != before_merge_shape:
             raise Exception("SOMETHING WENT WRONG")
 
+        # Automated archetype check
         if any(fuck_this["ECO Eligibility"].str.contains("subject to archetype")):
             # We perform the archetype test. If the property is a house, we it needs to be detached, semi-detached
             # or end terrace. If it's a bungalow, it must be attached
@@ -6319,6 +6327,7 @@ def fml_analysis(loader):
             ]
 
         # Characterise no CIGA check needed
+        # !!!!!!!!!!!! AT RISK !!!!!!!!!!!!
         ciga_check_passed = had_survey[had_survey["ECO Eligibility"] == "eco4 - passed ciga"]
         # These should be treated the same as one that have passed their ciga checks, from a detection perspective
         ciga_check_passed_eligible = ciga_check_passed[
@@ -6392,6 +6401,12 @@ def fml_analysis(loader):
             identified_as_gbis_looks_like_eco4
         )
 
+        # This is the work that is at risk
+        eco4_work_at_risk = (
+            passed_ciga_expectation +
+            ciga_check_expectation
+        )
+
         no_ciga_check_needed_actually_gbis = no_ciga_check_needed_eligible_gbis.shape[0]
         gbis_qualified = gbis_qualified.shape[0]
 
@@ -6490,11 +6505,13 @@ def fml_analysis(loader):
                 # "Of which sold": sales_since_nov,
                 "EPC verified ECO4 Eligible - Remaining": int(total_eco4_expectation),
                 "EPC verified GBIS Eligibile - Remaining": int(total_gbis_expectation),
+                # At risk work
+                "Work at risk due to audits": eco4_work_at_risk
             }
         )
 
     results_df = pd.DataFrame(results)
-    results_df.to_csv("analysis - revised.csv")
+    results_df.to_csv("analysis - revised - audit update.csv")
 
     # results_df["Delta vs November"] = 100 * (
     #     results_df["Of which ECO4 Eligible - Remaining"] - results_df["Original ECO4 Estimate - Remaining"]
@@ -6509,7 +6526,7 @@ def create_final_report():
     This function will produce the final output for the HA analysis
     :return:
     """
-    epc_validated_results = pd.read_csv("analysis - revised.csv")
+    epc_validated_results = pd.read_csv("analysis - revised - audit update.csv")
     pipeline_results = pd.read_csv("pipeline_remaining_raw.csv")
 
     ####################################
@@ -6593,12 +6610,14 @@ def create_final_report():
         [
             "HA Name",
             "EPC verified ECO4 Eligible - Remaining",
-            "EPC verified GBIS Eligibile - Remaining"
+            "EPC verified GBIS Eligibile - Remaining",
+            "Work at risk due to audits"
         ]
     ].copy().rename(
         columns={
             "EPC verified ECO4 Eligible - Remaining": "# ECO4 remaining - From EPC Database (post CIGA)",
             "EPC verified GBIS Eligibile - Remaining": "# GBIS remaining - From EPC Database (post CIGA)",
+            "Work at risk due to audits": "ECO4 remaining work at risk due to Audits",
         }
     )
 
@@ -6623,7 +6642,8 @@ def create_final_report():
         '# ECO4 remaining - All HA Summary',
         '# ECO4 remaining - Postcode list (pre CIGA)',
         '# ECO4 remaining - Postcode list (post CIGA)',
-        '# ECO4 remaining - From EPC Database (post CIGA)'
+        '# ECO4 remaining - From EPC Database (post CIGA)',
+        'ECO4 remaining work at risk due to Audits'
     ]:
         revenue[col] = revenue[col] * 1710
 
@@ -6688,8 +6708,8 @@ def create_final_report():
     #     "# GBIS remaining - Postcode list (post CIGA)"]]
 
     # Store final outputs
-    volumes.to_csv("HA Analysis Final - volumes.csv")
-    revenue.to_csv("HA Analysis Final - revenue.csv")
+    volumes.to_csv("HA Analysis - Audit Update - volumes.csv")
+    revenue.to_csv("HA Analysis - Audit Update - revenue.csv")
 
 
 def identify_eco_works(loader):
@@ -7203,84 +7223,96 @@ def app():
     loader.load()
     loader.ha_facts_and_figures()
 
+    # import pickle
+    # with open("ha_analysis_data_temp.pkl", "wb") as f:
+    #     pickle.dump(loader, f)
+    # import pickle
+    # with open("ha_analysis_data_temp.pkl", "rb") as f:
+    #     loader = pickle.load(f)
+
     forecast_remaining_sales(loader)
 
+    # Functions to produce the final output lol...
+    # fml_data_pull(loader)  # If we need to pull EPC data
+    fml_analysis(loader)
+    create_final_report()
+
     # Adhoc - for HA16, get the properties that still need a CIGA check
-    asset_list_ha16 = loader.data["HA16"]["asset_list"].copy()
-    ha_16_need_ciga = asset_list_ha16[
-        asset_list_ha16["ECO Eligibility"].str.contains("subject to ciga")
-    ]
-    completed_cigas = loader.data["HA16"]["ciga_list"].copy()
-    # Store the results
-    ha_16_need_ciga.to_csv("ha16_need_ciga.csv")
-    completed_cigas.to_csv("ha16_completed_cigas.csv")
-
-    # Adhoc - look at the current pipeline and identify how many dormant, CIGA dependent properties there are for
-    # live projects
-
-    # Read excel
-    orderbook_filepath = "local_data/ha_data/Warmfront HA client order book overview_20240129.xlsx"
-    orderbook_workbook = openpyxl.load_workbook(orderbook_filepath)
-    orderbook_sheet = orderbook_workbook["Contractual Info"]
-    orderbook_colnames = [cell.value for cell in orderbook_sheet[1]]
-
-    rows = []
-    for row in orderbook_sheet.iter_rows(min_row=2, values_only=False):
-        row_data = [cell.value for cell in row]  # This will get you the cell values
-        rows.append(row_data)
-
-    orderbook = pd.DataFrame(rows, columns=orderbook_colnames)
-    live_orderbook = orderbook[orderbook["Live, New, or Historic?"] == "LIVE"].copy()
-    live_orderbook['Redacted HA'] = live_orderbook['Redacted HA'].str.replace(" ", "")
-
-    dormant_properties = []
-    missed_has = []
-    for _, customer in live_orderbook.iterrows():
-        if customer['Redacted HA'] not in loader.data.keys():
-            missed_has.append(customer['Redacted HA'])
-            continue
-        asset_list = loader.data[customer['Redacted HA']]["asset_list"].copy()
-        survey_list = loader.data[customer['Redacted HA']]["survey_list"].copy()
-        # Remove sold
-        if not survey_list.empty:
-            survey_list = survey_list[~pd.isnull(survey_list["asset_list_row_id"])]
-            asset_list = asset_list.merge(
-                survey_list[["asset_list_row_id", "installation_status"]],
-                how="left",
-                on="asset_list_row_id"
-            )
-            # Anything that has an installation has gone to installation, and therefore is not remaining
-            asset_list = asset_list[pd.isnull(asset_list["installation_status"])]
-            asset_list = asset_list.drop(columns=["installation_status"])
-
-        # We pull out the properties that need a CIGA check
-        need_ciga = asset_list[asset_list["ECO Eligibility"] == "eco4 (subject to ciga)"]
-        need_archetype = asset_list[asset_list["ECO Eligibility"] == "eco4 (subject to archetype)"]
-        need_ciga_and_archetype = asset_list[
-            asset_list["ECO Eligibility"] == "eco4 (subject to ciga) (subject to archetype)"
-            ]
-
-        dormant_properties.append(
-            {
-                "HA Name": customer['Redacted HA'],
-                "Need CIGA": need_ciga.shape[0],
-                "Need Archetype": need_archetype.shape[0],
-                "Need CIGA and Archetype": need_ciga_and_archetype.shape[0]
-            }
-        )
-
-    dormant_properties = pd.DataFrame(dormant_properties)
-    totals = dormant_properties.sum()
-    totals["HA Name"] = "Total"
-
-    dormant_properties = pd.concat([dormant_properties, totals.to_frame().T])
-    dormant_properties.to_csv("dormant_properties.csv")
-
-    loader.december_figures["ECO4 remaining"].sum()
-    december_figures = loader.december_figures.copy()
-    december_figures["ECO4 remaining"] = np.where(
-        december_figures["ECO4 remaining"] < 0,
-        0,
-        december_figures["ECO4 remaining"]
-    )
-    december_figures["ECO4 remaining"].sum()
+    # asset_list_ha16 = loader.data["HA16"]["asset_list"].copy()
+    # ha_16_need_ciga = asset_list_ha16[
+    #     asset_list_ha16["ECO Eligibility"].str.contains("subject to ciga")
+    # ]
+    # completed_cigas = loader.data["HA16"]["ciga_list"].copy()
+    # # Store the results
+    # ha_16_need_ciga.to_csv("ha16_need_ciga.csv")
+    # completed_cigas.to_csv("ha16_completed_cigas.csv")
+    #
+    # # Adhoc - look at the current pipeline and identify how many dormant, CIGA dependent properties there are for
+    # # live projects
+    #
+    # # Read excel
+    # orderbook_filepath = "local_data/ha_data/Warmfront HA client order book overview_20240129.xlsx"
+    # orderbook_workbook = openpyxl.load_workbook(orderbook_filepath)
+    # orderbook_sheet = orderbook_workbook["Contractual Info"]
+    # orderbook_colnames = [cell.value for cell in orderbook_sheet[1]]
+    #
+    # rows = []
+    # for row in orderbook_sheet.iter_rows(min_row=2, values_only=False):
+    #     row_data = [cell.value for cell in row]  # This will get you the cell values
+    #     rows.append(row_data)
+    #
+    # orderbook = pd.DataFrame(rows, columns=orderbook_colnames)
+    # live_orderbook = orderbook[orderbook["Live, New, or Historic?"] == "LIVE"].copy()
+    # live_orderbook['Redacted HA'] = live_orderbook['Redacted HA'].str.replace(" ", "")
+    #
+    # dormant_properties = []
+    # missed_has = []
+    # for _, customer in live_orderbook.iterrows():
+    #     if customer['Redacted HA'] not in loader.data.keys():
+    #         missed_has.append(customer['Redacted HA'])
+    #         continue
+    #     asset_list = loader.data[customer['Redacted HA']]["asset_list"].copy()
+    #     survey_list = loader.data[customer['Redacted HA']]["survey_list"].copy()
+    #     # Remove sold
+    #     if not survey_list.empty:
+    #         survey_list = survey_list[~pd.isnull(survey_list["asset_list_row_id"])]
+    #         asset_list = asset_list.merge(
+    #             survey_list[["asset_list_row_id", "installation_status"]],
+    #             how="left",
+    #             on="asset_list_row_id"
+    #         )
+    #         # Anything that has an installation has gone to installation, and therefore is not remaining
+    #         asset_list = asset_list[pd.isnull(asset_list["installation_status"])]
+    #         asset_list = asset_list.drop(columns=["installation_status"])
+    #
+    #     # We pull out the properties that need a CIGA check
+    #     need_ciga = asset_list[asset_list["ECO Eligibility"] == "eco4 (subject to ciga)"]
+    #     need_archetype = asset_list[asset_list["ECO Eligibility"] == "eco4 (subject to archetype)"]
+    #     need_ciga_and_archetype = asset_list[
+    #         asset_list["ECO Eligibility"] == "eco4 (subject to ciga) (subject to archetype)"
+    #         ]
+    #
+    #     dormant_properties.append(
+    #         {
+    #             "HA Name": customer['Redacted HA'],
+    #             "Need CIGA": need_ciga.shape[0],
+    #             "Need Archetype": need_archetype.shape[0],
+    #             "Need CIGA and Archetype": need_ciga_and_archetype.shape[0]
+    #         }
+    #     )
+    #
+    # dormant_properties = pd.DataFrame(dormant_properties)
+    # totals = dormant_properties.sum()
+    # totals["HA Name"] = "Total"
+    #
+    # dormant_properties = pd.concat([dormant_properties, totals.to_frame().T])
+    # dormant_properties.to_csv("dormant_properties.csv")
+    #
+    # loader.december_figures["ECO4 remaining"].sum()
+    # december_figures = loader.december_figures.copy()
+    # december_figures["ECO4 remaining"] = np.where(
+    #     december_figures["ECO4 remaining"] < 0,
+    #     0,
+    #     december_figures["ECO4 remaining"]
+    # )
+    # december_figures["ECO4 remaining"].sum()

From 42d9821fff7e3e3a5ff41e749cd10d660d74bf18 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 21 May 2024 10:05:53 +0100
Subject: [PATCH 35/58] set up e.on asset list

---
 backend/app/plan/router.py            |   9 ++
 etl/customers/eon/pilot_asset_list.py | 225 ++++++++++++++++++++++++++
 utils/s3.py                           |   6 +-
 3 files changed, 238 insertions(+), 2 deletions(-)
 create mode 100644 etl/customers/eon/pilot_asset_list.py

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 06d1aadf..ce5577bb 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -618,3 +618,12 @@ async def trigger_plan(body: PlanTriggerRequest):
         session.close()
 
     return Response(status_code=200)
+
+
+@router.post("/mds")
+async def build_mds(body: PlanTriggerRequest):
+    # TODO: This is a placeholder location for the MDS endpoint, which this is being assembled
+
+    logger.info("Connecting to db")
+    session = sessionmaker(bind=db_engine)()
+    created_at = datetime.now().isoformat()
diff --git a/etl/customers/eon/pilot_asset_list.py b/etl/customers/eon/pilot_asset_list.py
new file mode 100644
index 00000000..8401fde5
--- /dev/null
+++ b/etl/customers/eon/pilot_asset_list.py
@@ -0,0 +1,225 @@
+import time
+
+import pandas as pd
+
+from utils.s3 import read_excel_from_s3
+from backend.SearchEpc import SearchEpc
+from dotenv import load_dotenv
+import os
+from tqdm import tqdm
+from utils.s3 import save_csv_to_s3
+
+# Read in the .env file in backend
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+# Stored in my notes
+ORDNANCE_SURVEY_API_KEY = ""
+
+PORTFOLIO_ID = 80
+USER_ID = 8
+
+
+def extract_mds_measures(config):
+    measures = []
+    if not pd.isnull(config["EWI (Trad Const)"]):
+        measures.append({"external_wall_insulation": "EWI (Trad Const)"})
+
+    if not pd.isnull(config["EWI (Non Trad Const)"]):
+        measures.append({"external_wall_insulation": "EWI (Non Trad Const)"})
+
+    if not pd.isnull(config["CWI"]):
+        measures.append({"cavity_wall_insulation": "CWI"})
+
+    if not pd.isnull(config["LI"]):
+        measures.append({"loft_insulation": "LI"})
+
+    if not pd.isnull(config["Party Wall Insu"]):
+        measures.append({"party_wall_insulation": "Party Wall Insu"})
+
+    if not pd.isnull(config["IWI (POA - Prov Sum Only)"]):
+        measures.append({"internal_wall_insulation": "IWI (POA - Prov Sum Only)"})
+
+    if not pd.isnull(config["U/F Insu (Manual install)"]):
+        measures.append({"suspended_floor_insulation": "U/F Insu (Manual install)"})
+
+    if not pd.isnull(config["U/F insu (Qbot)"]):
+        measures.append({"suspended_floor_insulation": "U/F insu (Qbot)"})
+
+    if not pd.isnull(config["Solid floor insl (Out of scope - Prov sum only)"]):
+        measures.append({"solid_floor_insulation": "Solid floor insl (Out of scope - Prov sum only)"})
+
+    if not pd.isnull(config["ASHP Htg"]):
+        measures.append({"air_source_heat_pump": "ASHP Htg"})
+
+    if not pd.isnull(config["GSHP Htg"]):
+        measures.append({"ground_source_heat_pump": "GSHP Htg"})
+
+    if not pd.isnull(config["Shared ground loops"]):
+        measures.append({"shared_ground_loops": "Shared ground loops"})
+
+    if not pd.isnull(config["Communal heat networks"]):
+        measures.append({"communal_heat_networks": "Communal heat networks"})
+
+    if not pd.isnull(config["District heating networks"]):
+        measures.append({"district_heating_networks": "District heating networks"})
+
+    if not pd.isnull(config["Elec Storage Htrs (Out of scope -Prov sum only)"]):
+        measures.append({"electric_storage_heaters": "Elec Storage Htrs (Out of scope -Prov sum only)"})
+
+    if not pd.isnull(config["Low Energy Bulbs"]):
+        measures.append({"low_energy_lighting": "Low Energy Bulbs"})
+
+    if not pd.isnull(config["Cyl Insulation"]):
+        measures.append({"cylinder_insulation": "Cyl Insulation"})
+
+    if not pd.isnull(config["Smart controls"]):
+        measures.append({"smart_controls": "Smart controls"})
+
+    if not pd.isnull(config["Zone controls"]):
+        measures.append({"zone_controls": "Zone controls"})
+
+    if not pd.isnull(config["Upgrade TRV's"]):
+        measures.append({"trvs": "Upgrade TRV's"})
+
+    if not pd.isnull(config["Solar PV"]):
+        measures.append({"solar_pv": "Solar PV"})
+
+    if not pd.isnull(config["Solar Thermal"]):
+        measures.append({"solar_thermal": "Solar Thermal"})
+
+    if not pd.isnull(config["Double Glazing (POA - Prov sum only)"]):
+        measures.append({"double_glazing": "Double Glazing (POA - Prov sum only)"})
+
+    if not pd.isnull(config["Draught Proofing"]):
+        measures.append({"draught_proofing": "Draught Proofing"})
+
+    if not pd.isnull(config["Ventilation upgrade"]):
+        measures.append({"mechanical_ventilation": "Ventilation upgrade"})
+
+    if not pd.isnull(config["Gas Boiler Replacement"]):
+        measures.append({"gas_boiler": "Gas Boiler Replacement"})
+
+    if not pd.isnull(config["Flat roof (Out of scope - prov sum only)"]):
+        measures.append({"flat_roof_insulation": "Flat roof (Out of scope - prov sum only)"})
+
+    if not pd.isnull(config["RIR (POA - Prov sum only)"]):
+        measures.append({"room_in_roof_insulation": "RIR (POA - Prov sum only)"})
+
+    if not pd.isnull(config["EV Charging"]):
+        measures.append({"ev_charging": "EV Charging"})
+
+    if not pd.isnull(config["Battery"]):
+        measures.append({"battery": "Battery"})
+
+    return measures
+
+
+def app():
+    """
+    Create the initial asset list for the E.ON pilot
+    :return:
+    """
+
+    raw_asset_list = read_excel_from_s3(
+        bucket_name="retrofit-datalake-dev",
+        file_key="customers/E.ON/sample SHDF Information MDS Template Vr3.0.xlsx",
+        header_row=11,
+        drop_all_na=False
+    )
+
+    # Keep just the columns we need
+    raw_asset_list_base = raw_asset_list[
+        [
+            "Address", "Postcode", "No Bedrooms"
+        ]
+    ].copy().rename(
+        columns={
+            "Address": "address",
+            "Postcode": "postcode",
+            "No Bedrooms": "n_bedrooms"
+        }
+    )
+
+    # For each property, retrieve UPRN with from the Ordnance Survey API. To do this, I have created a free
+    # trial with Ordnance Survey with my personal account as a temporary solution.
+    # Let's just pull the full EPC data for this
+    asset_list_with_uprn = []
+    for row, property_meta in tqdm(raw_asset_list_base.iterrows(), total=raw_asset_list_base.shape[0]):
+        if row <= 104:
+            continue
+        time.sleep(1.1)
+        searcher = SearchEpc(
+            address1=property_meta["address"],
+            postcode=property_meta["postcode"],
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key=ORDNANCE_SURVEY_API_KEY,
+            full_address=", ".join([property_meta["address"], property_meta["postcode"]])
+        )
+
+        # Let's just find the UPRN
+        searcher.ordnance_survey_client.get_places_api()
+
+        uprn = searcher.ordnance_survey_client.most_relevant_result["UPRN"]
+
+        # searcher.find_property(skip_os=False)
+
+        asset_list_with_uprn.append(
+            {
+                **property_meta,
+                "uprn": uprn,
+            }
+        )
+
+    # Store this as a backup
+    # import pandas as pd
+    # asset_list_with_uprn_df = pd.DataFrame(asset_list_with_uprn)
+    # asset_list_with_uprn_df.to_csv("eon_asset_list_with_uprn.csv", index=False)
+
+    # Store the asset list and create the portfolio payload
+    asset_list_with_uprn_df = pd.DataFrame(asset_list_with_uprn)
+    asset_list_with_uprn_df["uprn"] = asset_list_with_uprn_df["uprn"].astype(str).astype(int)
+
+    # We now determine which measures we need for each property
+    finalised_asset_list = []
+    for i, config in raw_asset_list.iterrows():
+        asset_config = asset_list_with_uprn_df[
+            (asset_list_with_uprn_df["address"] == config["Address"]) &
+            (asset_list_with_uprn_df["postcode"] == config["Postcode"])
+            ]
+        if asset_config.shape[0] != 1:
+            raise ValueError("Could not find a unique match for the property")
+
+        measures = extract_mds_measures(config)
+
+        finalised_asset_list.append(
+            {
+                "address": config["Address"],
+                "postcode": config["Postcode"],
+                "uprn": asset_config["uprn"].values[0],
+                "n_bedrooms": config["No Bedrooms"],
+                "measures": measures
+            }
+        )
+    finalised_asset_list = pd.DataFrame(finalised_asset_list)
+
+    # Store the asset list in s3
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
+    save_csv_to_s3(
+        dataframe=finalised_asset_list,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    # EPC C portoflio
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Social",
+        "goal": "Increase EPC",
+        "goal_value": "C",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": "",
+        "budget": None,
+    }
+    print(body)
diff --git a/utils/s3.py b/utils/s3.py
index fd5992ce..05482271 100644
--- a/utils/s3.py
+++ b/utils/s3.py
@@ -198,13 +198,14 @@ def read_pickle_from_s3(bucket_name, s3_file_name):
     return data
 
 
-def read_excel_from_s3(bucket_name, file_key, header_row):
+def read_excel_from_s3(bucket_name, file_key, header_row, drop_all_na=True):
     """
     Read an Excel file from an S3 bucket and return it as a pandas DataFrame.
 
     :param bucket_name: Name of the S3 bucket.
     :param file_key: Key of the file (including directory path within the bucket).
     :param header_row: The row number to use as the header (0-indexed).
+    :param drop_all_na: Whether to drop columns where all values are NaN.
     :return: A pandas DataFrame containing the data from the Excel file.
     """
 
@@ -219,7 +220,8 @@ def read_excel_from_s3(bucket_name, file_key, header_row):
     df = pd.read_excel(excel_buffer, header=header_row)
 
     # Drop columns where all values are NaN
-    df.dropna(axis=1, how='all', inplace=True)
+    if drop_all_na:
+        df.dropna(axis=1, how='all', inplace=True)
 
     # Reset index if the first column is just an index or entirely NaN
     df.reset_index(drop=True, inplace=True)

From 1102d5383ef8be854ea1f578aa93de13caf53c2e Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 21 May 2024 10:16:01 +0100
Subject: [PATCH 36/58] assembling input construction

---
 .idea/Model.iml            |   2 +-
 .idea/misc.xml             |   2 +-
 backend/Property.py        |  11 +++-
 backend/app/plan/router.py | 104 +++++++++++++++++++++++++++++++++++++
 4 files changed, 116 insertions(+), 3 deletions(-)

diff --git a/.idea/Model.iml b/.idea/Model.iml
index b0f9c00d..4413bb06 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 1122b380..6f308057 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/backend/Property.py b/backend/Property.py
index 2e6cbbb6..3cb8969a 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -61,7 +61,14 @@ class Property:
     n_bedrooms = None
 
     def __init__(
-        self, id, postcode, address, epc_record, already_installed=None, non_invasive_recommendations=None,
+        self,
+        id,
+        postcode,
+        address,
+        epc_record,
+        already_installed=None,
+        non_invasive_recommendations=None,
+        measures=None,
         **kwargs
     ):
 
@@ -85,6 +92,8 @@ class Property:
             ast.literal_eval(non_invasive_recommendations['recommendations']) if
             non_invasive_recommendations else []
         )
+        # This is a list of measures that have been recommended for the property
+        self.measures = ast.literal_eval(measures) if measures else None
 
         self.uprn = epc_record.get("uprn")
         self.full_sap_epc = epc_record.get("full_sap_epc")
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index ce5577bb..70827de2 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -627,3 +627,107 @@ async def build_mds(body: PlanTriggerRequest):
     logger.info("Connecting to db")
     session = sessionmaker(bind=db_engine)()
     created_at = datetime.now().isoformat()
+
+    try:
+        session.begin()
+        logger.info("Getting the inputs")
+        plan_input = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path)
+
+        cleaning_data = read_dataframe_from_s3_parquet(
+            bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
+        )
+
+        input_properties = []
+        for property_id, config in tqdm(enumerate(plan_input)):
+            # We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
+            uprn = config.get("uprn", None)
+            if uprn:
+                uprn = int(float(uprn))
+
+            epc_searcher = SearchEpc(
+                address1=config["address"],
+                postcode=config["postcode"],
+                uprn=uprn,
+                auth_token=get_settings().EPC_AUTH_TOKEN,
+                os_api_key=get_settings().ORDNANCE_SURVEY_API_KEY,
+            )
+            epc_searcher.ordnance_survey_client.built_form = config.get("built_form", None)
+            epc_searcher.ordnance_survey_client.property_type = config.get("property_type", None)
+            # For the moment, our OS API access is unavailable, so we skip and interpolate
+            epc_searcher.find_property(skip_os=True)
+            # Create a record in db
+            # TODO: If we productionise the creation of this mds report, we will need to store this in the db
+            # property_id, is_new = create_property(
+            #     session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn
+            # )
+            # if not is_new:
+            #     continue
+            #
+            # create_property_targets(
+            #     session,
+            #     property_id=property_id,
+            #     portfolio_id=body.portfolio_id,
+            #     epc_target=body.goal_value,
+            #     heat_demand_target=None
+            # )
+
+            epc_records = {
+                'original_epc': epc_searcher.newest_epc.copy(),
+                'full_sap_epc': epc_searcher.full_sap_epc.copy(),
+                'old_data': epc_searcher.older_epcs.copy(),
+            }
+
+            # patch = next((
+            #     x for x in patches if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
+            # ), {})
+            # epc_records = patch_epc(patch, epc_records)
+
+            prepared_epc = EPCRecord(
+                epc_records=epc_records,
+                run_mode="newdata",
+                cleaning_data=cleaning_data
+            )
+
+            # property_already_installed = next((
+            #     x for x in already_installed if
+            #     (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
+            # ), {})
+            #
+            # property_non_invasive_recommendations = next((
+            #     x for x in non_invasive_recommendations if
+            #     (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
+            # ), {})
+
+            measures = config["measures"] if "measures" in config else None
+
+            input_properties.append(
+                Property(
+                    id=property_id,
+                    address=epc_searcher.address_clean,
+                    postcode=epc_searcher.postcode_clean,
+                    epc_record=prepared_epc,
+                    # already_installed=property_already_installed,
+                    # non_invasive_recommendations=property_non_invasive_recommendations,
+                    measures=measures,
+                    **Property.extract_kwargs(config)
+                )
+            )
+
+    except IntegrityError:
+        logger.error("Database integrity error occurred", exc_info=True)
+        session.rollback()
+        return Response(status_code=500, content="Database integrity error.")
+    except OperationalError:
+        logger.error("Database operational error occurred", exc_info=True)
+        session.rollback()
+        return Response(status_code=500, content="Database operational error.")
+    except ValueError:
+        logger.error("Value error - possibly due to malformed data", exc_info=True)
+        session.rollback()
+        return Response(status_code=400, content="Bad request: malformed data.")
+    except Exception as e:  # General exception handling
+        logger.error(f"An error occurred: {e}")
+        session.rollback()
+        return Response(status_code=500, content="An unexpected error occurred.")
+    finally:
+        session.close()

From 5a65032bfecbe25f4efa75c4ffd390b7ddc084b1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 21 May 2024 10:31:17 +0100
Subject: [PATCH 37/58] Added property type and built form to asset list

---
 .idea/Model.iml                       |  2 +-
 .idea/misc.xml                        |  2 +-
 backend/app/plan/router.py            |  2 +-
 etl/customers/eon/pilot_asset_list.py | 35 ++++++++++++++++++++++++++-
 4 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/.idea/Model.iml b/.idea/Model.iml
index 4413bb06..b0f9c00d 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 6f308057..1122b380 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 70827de2..33759010 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -638,7 +638,7 @@ async def build_mds(body: PlanTriggerRequest):
         )
 
         input_properties = []
-        for property_id, config in tqdm(enumerate(plan_input)):
+        for property_id, config in tqdm(enumerate(plan_input), total=len(plan_input)):
             # We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
             uprn = config.get("uprn", None)
             if uprn:
diff --git a/etl/customers/eon/pilot_asset_list.py b/etl/customers/eon/pilot_asset_list.py
index 8401fde5..f46ed21b 100644
--- a/etl/customers/eon/pilot_asset_list.py
+++ b/etl/customers/eon/pilot_asset_list.py
@@ -114,6 +114,33 @@ def extract_mds_measures(config):
     return measures
 
 
+def parse_property_type(config):
+    # This should come from the ordnance survey api eventually
+
+    # array(['Detached', 'Semi-detached', 'Bungalow', 'Mid Terrace',
+    #        'End Terrace', 'Top Flat', 'Mid Flat',
+    #        'Low rise flat (1-2 storey)', nan], dtype=object)
+
+    if config["Address"] == "Flat Central Garage":
+        return {"property_type": "Bungalow", "built_form": "Mid-Terrace"}
+
+    if pd.isnull(config["Property Type"]):
+        return {"property_type": None, "built_form": None}
+
+    lookup = {
+        "Detached": {"property_type": "House", "built_form": "Detached"},
+        "Semi-detached": {"property_type": "House", "built_form": "Semi-detached"},
+        "Bungalow": {"property_type": "Bungalow", "built_form": "Detached"},
+        "Mid Terrace": {"property_type": "House", "built_form": "Mid Terrace"},
+        "End Terrace": {"property_type": "House", "built_form": "End Terrace"},
+        "Top Flat": {"property_type": "Flat", "built_form": None},
+        "Mid Flat": {"property_type": "Flat", "built_form": None},
+        "Low rise flat (1-2 storey)": {"property_type": "Flat", "built_form": None},
+    }
+
+    return lookup[config["Property Type"]]
+
+
 def app():
     """
     Create the initial asset list for the E.ON pilot
@@ -174,6 +201,8 @@ def app():
     # import pandas as pd
     # asset_list_with_uprn_df = pd.DataFrame(asset_list_with_uprn)
     # asset_list_with_uprn_df.to_csv("eon_asset_list_with_uprn.csv", index=False)
+    # Read in
+    # asset_list_with_uprn = pd.read_csv("eon_asset_list_with_uprn.csv").to_dict(orient="records")
 
     # Store the asset list and create the portfolio payload
     asset_list_with_uprn_df = pd.DataFrame(asset_list_with_uprn)
@@ -191,13 +220,17 @@ def app():
 
         measures = extract_mds_measures(config)
 
+        # Get the property type
+        pt = parse_property_type(config)
+
         finalised_asset_list.append(
             {
                 "address": config["Address"],
                 "postcode": config["Postcode"],
                 "uprn": asset_config["uprn"].values[0],
                 "n_bedrooms": config["No Bedrooms"],
-                "measures": measures
+                "measures": measures,
+                **pt
             }
         )
     finalised_asset_list = pd.DataFrame(finalised_asset_list)

From 8672ecc1f936d9e4d3d93d476fbc7734af635b3d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 21 May 2024 10:46:23 +0100
Subject: [PATCH 38/58] Fixed spelling of built form for asset lst

---
 etl/customers/eon/pilot_asset_list.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/etl/customers/eon/pilot_asset_list.py b/etl/customers/eon/pilot_asset_list.py
index f46ed21b..5e6efbba 100644
--- a/etl/customers/eon/pilot_asset_list.py
+++ b/etl/customers/eon/pilot_asset_list.py
@@ -131,8 +131,8 @@ def parse_property_type(config):
         "Detached": {"property_type": "House", "built_form": "Detached"},
         "Semi-detached": {"property_type": "House", "built_form": "Semi-detached"},
         "Bungalow": {"property_type": "Bungalow", "built_form": "Detached"},
-        "Mid Terrace": {"property_type": "House", "built_form": "Mid Terrace"},
-        "End Terrace": {"property_type": "House", "built_form": "End Terrace"},
+        "Mid Terrace": {"property_type": "House", "built_form": "Mid-Terrace"},
+        "End Terrace": {"property_type": "House", "built_form": "End-Terrace"},
         "Top Flat": {"property_type": "Flat", "built_form": None},
         "Mid Flat": {"property_type": "Flat", "built_form": None},
         "Low rise flat (1-2 storey)": {"property_type": "Flat", "built_form": None},

From 332393a4fc50ee77395ae9709aaadf8bc299df65 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 21 May 2024 10:49:10 +0100
Subject: [PATCH 39/58] updated extract_kwargs to handle empty string

---
 .idea/Model.iml     | 2 +-
 .idea/misc.xml      | 2 +-
 backend/Property.py | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.idea/Model.iml b/.idea/Model.iml
index b0f9c00d..4413bb06 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 1122b380..6f308057 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/backend/Property.py b/backend/Property.py
index 3cb8969a..a5918802 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -172,12 +172,12 @@ class Property:
         :return:
         """
         n_bathrooms = kwargs.get("n_bathrooms", None)
-        if n_bathrooms is not None:
+        if n_bathrooms not in [None, ""]:
             # We add on a small value to ensure that the number of bathrooms is rounded up, in case the value is 0.5
             n_bathrooms = int(round(float(n_bathrooms) + 1e-5))
 
         n_bedrooms = kwargs.get("n_bedrooms", None)
-        if n_bedrooms is not None:
+        if n_bedrooms not in [None, ""]:
             n_bedrooms = int(round(float(n_bedrooms) + 1e-5))
 
         return {

From f91cbec883af758d87850cca465f0c3dbb4cd038 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 21 May 2024 11:09:04 +0100
Subject: [PATCH 40/58] set up recommendation structure for mds

---
 backend/app/plan/router.py |  26 +++++++
 recommendations/Mds.py     | 139 +++++++++++++++++++++++++++++++++++++
 2 files changed, 165 insertions(+)
 create mode 100644 recommendations/Mds.py

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 33759010..7e0deae8 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -35,6 +35,7 @@ from recommendations.optimiser.CostOptimiser import CostOptimiser
 from recommendations.optimiser.GainOptimiser import GainOptimiser
 from recommendations.optimiser.optimiser_functions import prepare_input_measures
 from recommendations.Recommendations import Recommendations
+from recommendations.Mds import Mds
 from utils.logger import setup_logger
 from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3
 from backend.ml_models.Valuation import PropertyValuation
@@ -713,6 +714,31 @@ async def build_mds(body: PlanTriggerRequest):
                 )
             )
 
+        logger.info("Reading in materials and cleaned datasets")
+        materials = get_materials(session)
+        cleaned = get_cleaned()
+
+        uprn_filenames = read_dataframe_from_s3_parquet(
+            bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet"
+        )
+        photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket=get_settings().DATA_BUCKET)
+
+        logger.info("Getting spatial data")
+        for p in input_properties:
+            p.get_spatial_data(uprn_filenames)
+
+        logger.info("Getting components and epc recommendations")
+        recommendations = {}
+        recommendations_scoring_data = []
+        representative_recommendations = {}
+
+        for p in tqdm(input_properties):
+            p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
+
+            mds = Mds(property_instance=p, materials=materials)
+            mds.build()
+
+
     except IntegrityError:
         logger.error("Database integrity error occurred", exc_info=True)
         session.rollback()
diff --git a/recommendations/Mds.py b/recommendations/Mds.py
new file mode 100644
index 00000000..78a7a510
--- /dev/null
+++ b/recommendations/Mds.py
@@ -0,0 +1,139 @@
+from backend.Property import Property
+from recommendations.FloorRecommendations import FloorRecommendations
+from recommendations.WallRecommendations import WallRecommendations
+from recommendations.RoofRecommendations import RoofRecommendations
+from recommendations.VentilationRecommendations import VentilationRecommendations
+from recommendations.FireplaceRecommendations import FireplaceRecommendations
+from recommendations.LightingRecommendations import LightingRecommendations
+from recommendations.SolarPvRecommendations import SolarPvRecommendations
+from recommendations.WindowsRecommendations import WindowsRecommendations
+from recommendations.HeatingRecommender import HeatingRecommender
+from recommendations.HotwaterRecommendations import HotwaterRecommendations
+from recommendations.SecondaryHeating import SecondaryHeating
+
+
+class Mds:
+    """
+    Handles the contruction of the MDS report
+    """
+
+    def __init__(self, property_instance: Property, materials):
+        self.property_instance = property_instance
+
+        self.floor_recommender = FloorRecommendations(property_instance=property_instance, materials=materials)
+        self.wall_recommender = WallRecommendations(property_instance=property_instance, materials=materials)
+        self.roof_recommender = RoofRecommendations(property_instance=property_instance, materials=materials)
+        self.ventilation_recomender = VentilationRecommendations(
+            property_instance=property_instance, materials=materials
+        )
+        self.fireplace_recommender = FireplaceRecommendations(property_instance=property_instance)
+        self.lighting_recommender = LightingRecommendations(property_instance=property_instance, materials=materials)
+        self.windows_recommender = WindowsRecommendations(property_instance=property_instance, materials=materials)
+        self.solar_recommender = SolarPvRecommendations(property_instance=property_instance)
+        self.heating_recommender = HeatingRecommender(property_instance=property_instance)
+        self.hotwater_recommender = HotwaterRecommendations(property_instance=property_instance)
+        self.secondary_heating_recommender = SecondaryHeating(property_instance=property_instance)
+
+    def build(self):
+        if self.property_instance.measures is None:
+            raise NotImplementedError("No measures in the property - implement me")
+
+        measures = self.property_instance.measures
+
+        measure_config_list = [list(m.keys())[0] for m in measures]
+
+        not_implemented_measures = [
+            "party_wall_insulation",
+            "ground_source_heat_pump",
+            "shared_ground_loops",
+            "communal_heat_networks",
+            "district_heating_networks",
+            "solar_thermal",
+            "draught_proofing",
+            "ev_charging",
+            "battery",
+        ]
+        # Check if we have a not implemented measure
+        if any([m in not_implemented_measures for m in measure_config_list]):
+            raise NotImplementedError("Not implemented measure in the property - implement me")
+
+        if "external_wall_insulation" in measure_config_list:
+            raise Exception("check me out")
+            self.wall_recommender.recommend(phase=0)
+            # TODO: Get just ewi
+            ewi_recommendations = self.wall_recommender.recommendations
+            # TODO: Insert the recommendation identifier into this recommendation
+
+        if "cavity_wall_insulation" in measure_config_list:
+            raise Exception("check me out 2")
+            # TODO: get cwi
+            self.wall_recommender.recommend(phase=0)
+            cwi_recommendations = self.wall_recommender.recommendations
+
+        if "loft_insulation" in measure_config_list:
+            raise Exception("check me out 3")
+            self.roof_recommender.recommend(phase=0)
+
+        if "internal_wall_insulation" in measure_config_list:
+            raise Exception("check me out 4")
+            self.wall_recommender.recommend(phase=0)
+
+        if "suspended_floor_insulation" in measure_config_list:
+            raise Exception("check me out 5")
+            self.floor_recommender.recommend(phase=0)
+
+        if "solid_floor_insulation" in measure_config_list:
+            raise Exception("check me out 6")
+            self.floor_recommender.recommend(phase=0)
+
+        if "air_source_heat_pump" in measure_config_list:
+            raise Exception("check me out 7")
+            self.heating_recommender.recommend(phase=0)
+
+        if "electric_storage_heaters" in measure_config_list:
+            raise Exception("check me out 8")
+            self.heating_recommender.recommend(phase=0)
+
+        if "low_energy_lighting" in measure_config_list:
+            raise Exception("check me out 9")
+            self.lighting_recommender.recommend(phase=0)
+
+        if "cylinder_insulation" in measure_config_list:
+            raise Exception("check me out 10")
+            self.hotwater_recommender.recommend(phase=0)
+
+        if "smart_controls" in measure_config_list:
+            raise Exception("check me out 11")
+            self.heating_recommender.recommend(phase=0)
+
+        if "zone_controls" in measure_config_list:
+            raise Exception("check me out 12")
+            self.heating_recommender.recommend(phase=0)
+
+        if "trvs" in measure_config_list:
+            raise Exception("check me out 13")
+            self.heating_recommender.recommend(phase=0)
+
+        if "solar_pv" in measure_config_list:
+            raise Exception("check me out 14")
+            self.solar_recommender.recommend(phase=0)
+
+        if "double_glazing" in measure_config_list:
+            raise Exception("check me out 15")
+            self.windows_recommender.recommend(phase=0)
+
+        if "mechanical_ventilation" in measure_config_list:
+            raise Exception("check me out 16")
+            self.ventilation_recomender.recommend(phase=0)
+
+        if "gas_boiler" in measure_config_list:
+            raise Exception("check me out 17")
+            self.heating_recommender.recommend(phase=0)
+
+        if "flat_roof_insulation" in measure_config_list:
+            raise Exception("check me out 18")
+            self.roof_recommender.recommend(phase=0)
+
+        if "room_in_roof_insulation" in measure_config_list:
+            raise Exception("check me out 19")
+            self.roof_recommender.recommend(phase=0)

From 8d6085be0f7f3130538449b5f1a3cc2a953220d9 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 21 May 2024 11:49:17 +0100
Subject: [PATCH 41/58] added mds cavity wall insulation

---
 backend/ml_models/Valuation.py         |  4 ++++
 recommendations/HeatingRecommender.py  |  6 +++++-
 recommendations/Mds.py                 | 30 ++++++++++++++++++++------
 recommendations/WallRecommendations.py | 17 +++++++++++++++
 4 files changed, 50 insertions(+), 7 deletions(-)

diff --git a/backend/ml_models/Valuation.py b/backend/ml_models/Valuation.py
index cfd775e7..dd77fb4b 100644
--- a/backend/ml_models/Valuation.py
+++ b/backend/ml_models/Valuation.py
@@ -150,6 +150,10 @@ class PropertyValuation:
 
     ]
 
+    # Additional sources:
+    # https://superhomes.org.uk/wp-content/uploads/2024/05/The-Impact-of-Retrofit-on-Residential-Property-Market
+    # -Values-7-rotated-1.pdf
+
     EPC_BANDS = ["G", "F", "E", "D", "C", "B", "A"]
 
     @classmethod
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index a51803f2..3163f84f 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -312,7 +312,7 @@ class HeatingRecommender:
 
         return output
 
-    def recommend_hhr_storage_heaters(self, phase, system_change, heating_controls_only):
+    def recommend_hhr_storage_heaters(self, phase, system_change, heating_controls_only, _return=False):
         """
         We will recommend upgrading to a high heat retention storage system, if the current system is not already
         high heat retention storage
@@ -321,6 +321,8 @@ class HeatingRecommender:
         :param system_change: Indicates if we are recommending a different type of heating system, compared to the
         current system
         :param heating_controls_only: Indicates if we should include a recommendation for just heating controls
+        :param _return: Indicates if we should return the recommendations, rather than appending them to the
+                        recommendations list
         :return:
         """
 
@@ -374,6 +376,8 @@ class HeatingRecommender:
             heating_controls_only=heating_controls_only,
             system_change=system_change
         )
+        if _return:
+            return recommendations
 
         self.heating_recommendations.extend(recommendations)
 
diff --git a/recommendations/Mds.py b/recommendations/Mds.py
index 78a7a510..0eeb19a2 100644
--- a/recommendations/Mds.py
+++ b/recommendations/Mds.py
@@ -10,6 +10,7 @@ from recommendations.WindowsRecommendations import WindowsRecommendations
 from recommendations.HeatingRecommender import HeatingRecommender
 from recommendations.HotwaterRecommendations import HotwaterRecommendations
 from recommendations.SecondaryHeating import SecondaryHeating
+from recommendations.Recommendations import Recommendations
 
 
 class Mds:
@@ -57,6 +58,8 @@ class Mds:
         if any([m in not_implemented_measures for m in measure_config_list]):
             raise NotImplementedError("Not implemented measure in the property - implement me")
 
+        mds_recommendations = []
+
         if "external_wall_insulation" in measure_config_list:
             raise Exception("check me out")
             self.wall_recommender.recommend(phase=0)
@@ -65,10 +68,9 @@ class Mds:
             # TODO: Insert the recommendation identifier into this recommendation
 
         if "cavity_wall_insulation" in measure_config_list:
-            raise Exception("check me out 2")
-            # TODO: get cwi
-            self.wall_recommender.recommend(phase=0)
-            cwi_recommendations = self.wall_recommender.recommendations
+            recs = self.wall_recommender.mds_recommend_cavity_wall_insulation(phase=0)
+            recs = self.insert_recommendation_id(recs, measures, "cavity_wall_insulation")
+            mds_recommendations.append(recs)
 
         if "loft_insulation" in measure_config_list:
             raise Exception("check me out 3")
@@ -91,8 +93,11 @@ class Mds:
             self.heating_recommender.recommend(phase=0)
 
         if "electric_storage_heaters" in measure_config_list:
-            raise Exception("check me out 8")
-            self.heating_recommender.recommend(phase=0)
+            recs = self.heating_recommender.recommend_hhr_storage_heaters(
+                phase=0, system_change=True, heating_controls_only=False, _return=True
+            )
+            recs = self.insert_recommendation_id(recs, measures)
+            mds_recommendations.append(recs)
 
         if "low_energy_lighting" in measure_config_list:
             raise Exception("check me out 9")
@@ -137,3 +142,16 @@ class Mds:
         if "room_in_roof_insulation" in measure_config_list:
             raise Exception("check me out 19")
             self.roof_recommender.recommend(phase=0)
+
+        property_representative_recommendations = Recommendations.create_representative_recommendations(
+            mds_recommendations, non_invasive_recommendations=[]
+        )
+
+    @staticmethod
+    def insert_recommendation_id(recommendations, measures, measure_name):
+        # Insert the recommendation identifier into this recommendation
+        measure_config = [m for m in measures if measure_name in m][0]
+        for r in recommendations:
+            r["recommendation_id"] = list(measure_config.values())[0]
+
+        return recommendations
diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py
index 20fc453c..3e38704e 100644
--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@@ -103,6 +103,23 @@ class WallRecommendations(Definitions):
 
         return True
 
+    def mds_recommend_cavity_wall_insulation(self, phase=None):
+        # Function specifically for cavity wall insulation, for usage in the mds report
+        self.recommendations = []
+        insulation_thickness = self.property.walls["insulation_thickness"]
+
+        u_value = get_wall_u_value(
+            clean_description=self.property.walls["clean_description"],
+            age_band=self.property.age_band,
+            is_granite_or_whinstone=self.property.walls["is_granite_or_whinstone"],
+            is_sandstone_or_limestone=self.property.walls["is_sandstone_or_limestone"],
+        )
+
+        # Test filling cavity
+        self.find_cavity_insulation(u_value, insulation_thickness, phase)
+
+        return self.recommendations
+
     def recommend(self, phase=0):
         # if building built after 1990 + we're able to identify U-value +
         # U-value less than 0.18 and if in or close to a conversation area,

From 1db6509e3638e6cf37c7bfc55403138ac5dda392 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 21 May 2024 11:55:50 +0100
Subject: [PATCH 42/58] Added ashp to mds

---
 recommendations/HeatingRecommender.py | 4 +++-
 recommendations/Mds.py                | 7 +++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 3163f84f..2041f783 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -103,7 +103,7 @@ class HeatingRecommender:
 
         return
 
-    def recommend_air_source_heat_pump(self, phase, has_cavity_or_loft_recommendations):
+    def recommend_air_source_heat_pump(self, phase, has_cavity_or_loft_recommendations, _return=False):
         """
         This method will implement the recommendation for an air source heat pump
         This is ultimately an overhaul to the heating system and so is recommended as an alternative to other
@@ -200,6 +200,8 @@ class HeatingRecommender:
             **ashp_costs
         }
 
+        if _return:
+            return [ashp_recommendation]
         self.heating_recommendations.append(ashp_recommendation)
 
     @staticmethod
diff --git a/recommendations/Mds.py b/recommendations/Mds.py
index 0eeb19a2..ca6ee3db 100644
--- a/recommendations/Mds.py
+++ b/recommendations/Mds.py
@@ -89,8 +89,11 @@ class Mds:
             self.floor_recommender.recommend(phase=0)
 
         if "air_source_heat_pump" in measure_config_list:
-            raise Exception("check me out 7")
-            self.heating_recommender.recommend(phase=0)
+            recs = self.heating_recommender.recommend_air_source_heat_pump(
+                phase=0, has_cavity_or_loft_recommendations=False, _return=True
+            )
+            recs = self.insert_recommendation_id(recs, measures, "air_source_heat_pump")
+            mds_recommendations.append(recs)
 
         if "electric_storage_heaters" in measure_config_list:
             recs = self.heating_recommender.recommend_hhr_storage_heaters(

From 69981192089e6256df5f4918c6ec244037be4c05 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 21 May 2024 12:10:38 +0100
Subject: [PATCH 43/58] added solar to mds

---
 backend/apis/GoogleSolarApi.py            |  6 ++--
 recommendations/Mds.py                    |  5 +--
 recommendations/SolarPvRecommendations.py | 40 +++++++++++++++++++++++
 3 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py
index 86324c58..205a3560 100644
--- a/backend/apis/GoogleSolarApi.py
+++ b/backend/apis/GoogleSolarApi.py
@@ -11,12 +11,14 @@ EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
 
 # This is for 6 Laura Close, Tintagel, PL34 0EB (same property that Cotswolrd energy used)
 uprn = 100040099104
+# This is for 353A, Hermitage Lane, ME16 9NT (one of the e.on properties)
+uprn = 200000964454
 
 cleaning_data = read_dataframe_from_s3_parquet(
     bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
 )
 
-searcher = SearchEpc(address1="6 Laura Close", postcode="PL34 0EB", uprn=uprn, auth_token=EPC_AUTH_TOKEN, os_api_key="")
+searcher = SearchEpc(address1="", postcode="", uprn=uprn, auth_token=EPC_AUTH_TOKEN, os_api_key="")
 
 searcher.find_property(skip_os=True)
 
@@ -80,7 +82,7 @@ solar_potential["panelWidthMeters"]
 
 solar_potential["wholeRoofStats"]
 
-# Copy of response for testing:
+# Copy of response for testing - 6 Laura Close, Tintagel, PL34 0EB
 # {'name': 'buildings/ChIJ2yC6t4KEa0gRh2TIssogI7k', 'center': {'latitude': 50.667375, 'longitude': -4.7416833},
 # 'imageryDate': {'year': 2021, 'month': 7, 'day': 19}, 'regionCode': 'GB', 'solarPotential': {'maxArrayPanelsCount':
 # 39, 'maxArrayAreaMeters2': 76.578636, 'maxSunshineHoursPerYear': 1172.0627, 'carbonOffsetFactorKgPerMwh':
diff --git a/recommendations/Mds.py b/recommendations/Mds.py
index ca6ee3db..ecc8c852 100644
--- a/recommendations/Mds.py
+++ b/recommendations/Mds.py
@@ -123,8 +123,9 @@ class Mds:
             self.heating_recommender.recommend(phase=0)
 
         if "solar_pv" in measure_config_list:
-            raise Exception("check me out 14")
-            self.solar_recommender.recommend(phase=0)
+            recs = self.solar_recommender.mds_recommend(phase=0, solar_pv_percentage=0.5)
+            recs = self.insert_recommendation_id(recs, measures, "solar_pv")
+            mds_recommendations.append(recs)
 
         if "double_glazing" in measure_config_list:
             raise Exception("check me out 15")
diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index 58d4b123..14161da3 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -35,6 +35,46 @@ class SolarPvRecommendations:
 
         return trimmed_list
 
+    def mds_recommend(self, phase=None, solar_pv_percentage=0.5):
+        # For specific usage within the mds report
+
+        solar_pv_roof_area = self.property.get_solar_pv_roof_area(solar_pv_percentage)
+
+        number_solar_panels = np.floor(solar_pv_roof_area / self.SOLAR_PANEL_AREA)
+        solar_panel_wattage = number_solar_panels * self.SOLAR_PANEL_WATTAGE
+
+        solar_panel_wattage = np.clip(
+            a=solar_panel_wattage, a_min=self.MIN_SYSTEM_WATTAGE, a_max=self.MAX_SYSTEM_WATTAGE
+        )
+
+        # We now have a property which is potentially suitable for solar PV
+        roof_coverage_percent = round(solar_pv_percentage * 100)
+        # Given the wattage, we estimate the cost of the solar PV system. This is based on the MCS database
+        # of solar PV installations
+        cost_result = self.costs.solar_pv(wattage=solar_panel_wattage, has_battery=False)
+        kw = np.floor(solar_panel_wattage / 100) / 10
+
+        description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) p"
+                       f"anel system on {round(roof_coverage_percent)}% the roof.")
+
+        return [
+            {
+                "phase": phase,
+                "parts": [],
+                "type": "solar_pv",
+                "description": description,
+                "starting_u_value": None,
+                "new_u_value": None,
+                "sap_points": None,
+                "already_installed": False,
+                **cost_result,
+                # This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we scale
+                # back up here
+                "photo_supply": roof_coverage_percent,
+                "has_battery": False
+            }
+        ]
+
     def recommend(self, phase):
         """
         We check if a property is potentially suitable for solar PV based on the following criteria:

From e95a2d0113c2613621b9bd8af2debbc5e4782518 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 21 May 2024 12:22:47 +0100
Subject: [PATCH 44/58] Added ewi and loft insulation

---
 recommendations/Mds.py                 | 19 ++++++++++++-------
 recommendations/RoofRecommendations.py | 20 ++++++++++++++++++++
 recommendations/WallRecommendations.py | 22 ++++++++++++++++++++++
 3 files changed, 54 insertions(+), 7 deletions(-)

diff --git a/recommendations/Mds.py b/recommendations/Mds.py
index ecc8c852..d371f2ec 100644
--- a/recommendations/Mds.py
+++ b/recommendations/Mds.py
@@ -60,12 +60,14 @@ class Mds:
 
         mds_recommendations = []
 
+        # TODO: Could use a decarator to reduce the boilerplate code - insert_recommendation_id and then the append
+
         if "external_wall_insulation" in measure_config_list:
-            raise Exception("check me out")
-            self.wall_recommender.recommend(phase=0)
-            # TODO: Get just ewi
-            ewi_recommendations = self.wall_recommender.recommendations
-            # TODO: Insert the recommendation identifier into this recommendation
+            recs = self.wall_recommender.mds_recommend_ewi(phase=0)
+            if not recs:
+                raise Exception("No recommendations for external wall insulation")
+            recs = self.insert_recommendation_id(recs, measures, "external_wall_insulation")
+            mds_recommendations.append(recs)
 
         if "cavity_wall_insulation" in measure_config_list:
             recs = self.wall_recommender.mds_recommend_cavity_wall_insulation(phase=0)
@@ -73,8 +75,11 @@ class Mds:
             mds_recommendations.append(recs)
 
         if "loft_insulation" in measure_config_list:
-            raise Exception("check me out 3")
-            self.roof_recommender.recommend(phase=0)
+            recs = self.roof_recommender.mds_loft_insulation(phase=0)
+            if not recs:
+                raise Exception("No recommendations for loft insulation")
+            recs = self.insert_recommendation_id(recs, measures, "loft_insulation")
+            mds_recommendations.append(recs)
 
         if "internal_wall_insulation" in measure_config_list:
             raise Exception("check me out 4")
diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py
index dc5ee7db..5424ab57 100644
--- a/recommendations/RoofRecommendations.py
+++ b/recommendations/RoofRecommendations.py
@@ -54,6 +54,26 @@ class RoofRecommendations:
             ]
         ]
 
+    def mds_loft_insulation(self, phase):
+        """
+        For usages within the mds report
+        :param phase:
+        :return:
+        """
+        self.recommendations = []
+
+        insulation_thickness = convert_thickness_to_numeric(
+            self.property.roof["insulation_thickness"],
+            self.property.roof["is_pitched"],
+            self.property.roof["is_flat"]
+        )
+
+        u_value = get_roof_u_value(**{**self.property.roof, "age_band": self.property.age_band})
+
+        self.recommend_roof_insulation(u_value, insulation_thickness, self.property.roof, phase)
+
+        return self.recommendations
+
     def recommend(self, phase):
 
         if self.property.roof["has_dwelling_above"]:
diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py
index 3e38704e..67fadd8e 100644
--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@@ -120,6 +120,28 @@ class WallRecommendations(Definitions):
 
         return self.recommendations
 
+    def mds_recommend_ewi(self, phase=None):
+        # Function specifically for external wall insulation, for usage in the mds report
+        self.recommendations = []
+        insulation_thickness = self.property.walls["insulation_thickness"]
+
+        u_value = get_wall_u_value(
+            clean_description=self.property.walls["clean_description"],
+            age_band=self.property.age_band,
+            is_granite_or_whinstone=self.property.walls["is_granite_or_whinstone"],
+            is_sandstone_or_limestone=self.property.walls["is_sandstone_or_limestone"],
+        )
+
+        # EWI
+        ewi_recommendations = self._find_insulation(
+            u_value=u_value,
+            insulation_materials=pd.DataFrame(self.external_wall_insulation_materials),
+            non_insulation_materials=self.external_wall_non_insulation_materials,
+            phase=phase
+        )
+
+        return ewi_recommendations
+
     def recommend(self, phase=0):
         # if building built after 1990 + we're able to identify U-value +
         # U-value less than 0.18 and if in or close to a conversation area,

From 0d1e49f1f336dd31910bdc250b6fc1bb3e4990c9 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 21 May 2024 14:12:03 +0100
Subject: [PATCH 45/58] allow costing for flats ewi

---
 backend/app/plan/router.py             |  5 ++++-
 recommendations/Costs.py               |  4 +---
 recommendations/Mds.py                 | 19 +++++++++++++------
 recommendations/WallRecommendations.py |  1 -
 4 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 7e0deae8..e11dded8 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -736,7 +736,10 @@ async def build_mds(body: PlanTriggerRequest):
             p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
 
             mds = Mds(property_instance=p, materials=materials)
-            mds.build()
+            property_representative_recommendations, errors = mds.build()
+
+            if errors:
+                logger.info("Errors occurred during MDS build")
 
 
     except IntegrityError:
diff --git a/recommendations/Costs.py b/recommendations/Costs.py
index fd3c1692..03190727 100644
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@@ -626,12 +626,10 @@ class Costs:
                 preliminaries_rate = self.EWI_SCAFFOLDING_PRELIMINARIES
             else:
                 preliminaries_rate = self.EWI_NO_SCAFFOLDING_PRELIMINARIES
-        elif self.property.data["property-type"] == "Maisonette":
+        elif self.property.data["property-type"] in ["Maisonette", "Flat"]:
             preliminaries_rate = self.EWI_SCAFFOLDING_PRELIMINARIES
         elif self.property.data["property-type"] == "Bungalow":
             preliminaries_rate = self.EWI_NO_SCAFFOLDING_PRELIMINARIES
-        else:
-            raise ValueError("Unsupported property type - haven't handled flats")
 
         demolition_data = [x for x in non_insulation_materials if x["type"] == "ewi_wall_demolition"]
         preparation_data = [x for x in non_insulation_materials if x["type"] == "ewi_wall_preparation"]
diff --git a/recommendations/Mds.py b/recommendations/Mds.py
index d371f2ec..7453e5e9 100644
--- a/recommendations/Mds.py
+++ b/recommendations/Mds.py
@@ -59,6 +59,7 @@ class Mds:
             raise NotImplementedError("Not implemented measure in the property - implement me")
 
         mds_recommendations = []
+        errors = []
 
         # TODO: Could use a decarator to reduce the boilerplate code - insert_recommendation_id and then the append
 
@@ -75,11 +76,15 @@ class Mds:
             mds_recommendations.append(recs)
 
         if "loft_insulation" in measure_config_list:
-            recs = self.roof_recommender.mds_loft_insulation(phase=0)
-            if not recs:
-                raise Exception("No recommendations for loft insulation")
-            recs = self.insert_recommendation_id(recs, measures, "loft_insulation")
-            mds_recommendations.append(recs)
+            # Check if the roof is suitable for loft insulation
+            if self.property_instance.roof['is_roof_room']:
+                errors.append("Roof is a room")
+            else:
+                recs = self.roof_recommender.mds_loft_insulation(phase=0)
+                if not recs:
+                    raise Exception("No recommendations for loft insulation")
+                recs = self.insert_recommendation_id(recs, measures, "loft_insulation")
+                mds_recommendations.append(recs)
 
         if "internal_wall_insulation" in measure_config_list:
             raise Exception("check me out 4")
@@ -104,7 +109,7 @@ class Mds:
             recs = self.heating_recommender.recommend_hhr_storage_heaters(
                 phase=0, system_change=True, heating_controls_only=False, _return=True
             )
-            recs = self.insert_recommendation_id(recs, measures)
+            recs = self.insert_recommendation_id(recs, measures, "electric_storage_heaters")
             mds_recommendations.append(recs)
 
         if "low_energy_lighting" in measure_config_list:
@@ -156,6 +161,8 @@ class Mds:
             mds_recommendations, non_invasive_recommendations=[]
         )
 
+        return property_representative_recommendations, errors
+
     @staticmethod
     def insert_recommendation_id(recommendations, measures, measure_name):
         # Insert the recommendation identifier into this recommendation
diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py
index 67fadd8e..71996e5c 100644
--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@@ -123,7 +123,6 @@ class WallRecommendations(Definitions):
     def mds_recommend_ewi(self, phase=None):
         # Function specifically for external wall insulation, for usage in the mds report
         self.recommendations = []
-        insulation_thickness = self.property.walls["insulation_thickness"]
 
         u_value = get_wall_u_value(
             clean_description=self.property.walls["clean_description"],

From 340e8118eb9a92819ce48a06fe65c07c40f3ef30 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 21 May 2024 14:40:57 +0100
Subject: [PATCH 46/58] adding in u-value extraction and handling cases of
 setting energy efficiency more elegantly

---
 backend/Property.py                    | 14 ++++++++------
 recommendations/WallRecommendations.py | 15 +++++++++------
 2 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index a5918802..f2e6590c 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -351,7 +351,10 @@ class Property:
                 # Setting the insulation thickness here to above average should be tested further because we
                 # don't see a high volume of instances for this
                 output["walls_insulation_thickness_ending"] = "average"
-                output["walls_energy_eff_ending"] = "Good"
+                # In some edge cases, or when running the mds report we might see the energy efficiency already
+                # in Good or Very Good
+                if output["walls_energy_eff_ending"] not in ["Good", "Very Good"]:
+                    output["walls_energy_eff_ending"] = "Good"
 
                 # Note: often when the wall is insulatied, the internal/external insulation is not noted so we should
                 #       test the impact of using these booleans
@@ -384,11 +387,8 @@ class Property:
                         "Have more than 1 floor insulation part - handle this case"
                     )
 
-                # output["floor_thermal_transmittance_ending"] = recommendation["new_u_value"]
                 # We don't really see above average for this in the training data
                 output["floor_insulation_thickness_ending"] = "average"
-                # This is rarely ever populated in the training data
-                # output["floor_energy_eff_ending"] = "Good"
             else:
                 if output["floor_thermal_transmittance_ending"] is None:
                     raise ValueError("We should not have a None value for the u value")
@@ -439,7 +439,8 @@ class Property:
                     if proposed_depth >= 270:
                         output["roof_energy_eff_ending"] = "Very Good"
                     else:
-                        output["roof_energy_eff_ending"] = "Good"
+                        if output["roof_energy_eff_ending"] not in ["Good", "Very Good"]:
+                            output["roof_energy_eff_ending"] = "Good"
                 else:
                     output["roof_energy_eff_ending"] = "Very Good"
             else:
@@ -459,7 +460,8 @@ class Property:
 
             if recommendation["type"] == "windows_glazing":
                 output["multi_glaze_proportion_ending"] = 100
-                output["windows_energy_eff_ending"] = "Average"
+                if output["windows_energy_eff_ending"] not in ["Average", "Good", "Very Good"]:
+                    output["windows_energy_eff_ending"] = "Average"
 
                 is_secondary_glazing = recommendation["is_secondary_glazing"]
 
diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py
index 71996e5c..6cdfbfdd 100644
--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@@ -124,12 +124,15 @@ class WallRecommendations(Definitions):
         # Function specifically for external wall insulation, for usage in the mds report
         self.recommendations = []
 
-        u_value = get_wall_u_value(
-            clean_description=self.property.walls["clean_description"],
-            age_band=self.property.age_band,
-            is_granite_or_whinstone=self.property.walls["is_granite_or_whinstone"],
-            is_sandstone_or_limestone=self.property.walls["is_sandstone_or_limestone"],
-        )
+        u_value = self.property.walls["thermal_transmittance"]
+
+        if u_value is None:
+            u_value = get_wall_u_value(
+                clean_description=self.property.walls["clean_description"],
+                age_band=self.property.age_band,
+                is_granite_or_whinstone=self.property.walls["is_granite_or_whinstone"],
+                is_sandstone_or_limestone=self.property.walls["is_sandstone_or_limestone"],
+            )
 
         # EWI
         ewi_recommendations = self._find_insulation(

From f9e9cb59a69841a953563e342fa9990c8e4f7640 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 21 May 2024 19:08:44 +0100
Subject: [PATCH 47/58] adding scoring data

---
 backend/Property.py        | 23 +++++++++++++++++++++++
 backend/app/plan/router.py | 21 +++++++++++++++++----
 2 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index f2e6590c..94861a3f 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -230,6 +230,29 @@ class Property:
 
         # self.base_difference_record.df
 
+    def simulate_all_representative_recommendations(
+        self, property_representative_recommendations,
+    ):
+        """
+        This method was put together to simulate the impact of the representative recommendations on the property
+        all at once, for usage within the mds report
+        :return:
+        """
+
+        recommendation_record = self.base_difference_record.df.to_dict("records")[
+            0
+        ].copy()
+
+        scoring_dict = self.create_recommendation_scoring_data(
+            property_id=self.id,
+            recommendation_record=recommendation_record,
+            recommendations=property_representative_recommendations,
+            primary_recommendation_id=self.id,
+            non_invasive_recommendations=self.non_invasive_recommendations,
+        )
+
+        return scoring_dict
+
     def adjust_difference_record_with_recommendations(
         self, property_recommendations, property_representative_recommendations
     ):
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index e11dded8..1bbd6a7d 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -641,9 +641,14 @@ async def build_mds(body: PlanTriggerRequest):
         input_properties = []
         for property_id, config in tqdm(enumerate(plan_input), total=len(plan_input)):
             # We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
-            uprn = config.get("uprn", None)
-            if uprn:
-                uprn = int(float(uprn))
+
+            if config["address"] in ["Flat Over 20 Holborough Road", "Flat above 7 Malling Road"]:
+                print("TEMP - not using uprn")
+                uprn = None
+            else:
+                uprn = config.get("uprn", None)
+                if uprn:
+                    uprn = int(float(uprn))
 
             epc_searcher = SearchEpc(
                 address1=config["address"],
@@ -728,7 +733,6 @@ async def build_mds(body: PlanTriggerRequest):
             p.get_spatial_data(uprn_filenames)
 
         logger.info("Getting components and epc recommendations")
-        recommendations = {}
         recommendations_scoring_data = []
         representative_recommendations = {}
 
@@ -741,6 +745,15 @@ async def build_mds(body: PlanTriggerRequest):
             if errors:
                 logger.info("Errors occurred during MDS build")
 
+            representative_recommendations[p.id] = property_representative_recommendations
+
+            # Build the scoring data
+            p.create_base_difference_epc_record(cleaned_lookup=cleaned)
+            p.simulate_all_representative_recommendations(property_representative_recommendations)
+
+            recommendations_scoring_data.extend(p.recommendations_scoring_data)
+
+
 
     except IntegrityError:
         logger.error("Database integrity error occurred", exc_info=True)

From c95c4aeb927ec4289531a9a20ab1647a5b8f22de Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 22 May 2024 19:18:48 +0100
Subject: [PATCH 48/58] placeholder - setting as_built to false when scoring
 wall insulation

---
 backend/Property.py                    |   7 +-
 backend/app/plan/router.py             | 148 +++++++++++++++++++++++--
 backend/ml_models/AnnualBillSavings.py |   9 +-
 backend/ml_models/api.py               |  13 ++-
 etl/customers/eon/pilot_asset_list.py  |  15 ++-
 recommendations/RoofRecommendations.py |   3 +
 6 files changed, 177 insertions(+), 18 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 94861a3f..a1972b5b 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -379,6 +379,9 @@ class Property:
                 if output["walls_energy_eff_ending"] not in ["Good", "Very Good"]:
                     output["walls_energy_eff_ending"] = "Good"
 
+                # TODO TEMP - should be ending?
+                output["is_as_built"] = False
+
                 # Note: often when the wall is insulatied, the internal/external insulation is not noted so we should
                 #       test the impact of using these booleans
                 if recommendation["type"] == "external_wall_insulation":
@@ -450,14 +453,14 @@ class Property:
                     400,
                 ]
 
-                proposed_depth = int(parts[0]["depth"])
+                proposed_depth = recommendation["new_thickness"]
                 if proposed_depth not in valid_numeric_values:
                     # Take the nearest value for scoring
                     proposed_depth = min(
                         valid_numeric_values, key=lambda x: abs(x - proposed_depth)
                     )
 
-                output["roof_insulation_thickness_ending"] = str(proposed_depth)
+                output["roof_insulation_thickness_ending"] = str(int(proposed_depth))
                 if recommendation["type"] == "loft_insulation":
                     if proposed_depth >= 270:
                         output["roof_energy_eff_ending"] = "Very Good"
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 1bbd6a7d..c6a26fcf 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -641,14 +641,10 @@ async def build_mds(body: PlanTriggerRequest):
         input_properties = []
         for property_id, config in tqdm(enumerate(plan_input), total=len(plan_input)):
             # We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
-
-            if config["address"] in ["Flat Over 20 Holborough Road", "Flat above 7 Malling Road"]:
-                print("TEMP - not using uprn")
-                uprn = None
-            else:
-                uprn = config.get("uprn", None)
-                if uprn:
-                    uprn = int(float(uprn))
+            uprn = config.get("uprn", None)
+            uprn = None if uprn == "" else uprn
+            if uprn:
+                uprn = int(float(uprn))
 
             epc_searcher = SearchEpc(
                 address1=config["address"],
@@ -661,6 +657,12 @@ async def build_mds(body: PlanTriggerRequest):
             epc_searcher.ordnance_survey_client.property_type = config.get("property_type", None)
             # For the moment, our OS API access is unavailable, so we skip and interpolate
             epc_searcher.find_property(skip_os=True)
+
+            if config["address"] == "35b High Street":
+                print("Performing temporary patch")
+                epc_searcher.newest_epc["uprn"] = 10002911892
+                epc_searcher.full_sap_epc["uprn"] = 10002911892
+
             # Create a record in db
             # TODO: If we productionise the creation of this mds report, we will need to store this in the db
             # property_id, is_new = create_property(
@@ -729,7 +731,7 @@ async def build_mds(body: PlanTriggerRequest):
         photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket=get_settings().DATA_BUCKET)
 
         logger.info("Getting spatial data")
-        for p in input_properties:
+        for p in tqdm(input_properties):
             p.get_spatial_data(uprn_filenames)
 
         logger.info("Getting components and epc recommendations")
@@ -749,10 +751,134 @@ async def build_mds(body: PlanTriggerRequest):
 
             # Build the scoring data
             p.create_base_difference_epc_record(cleaned_lookup=cleaned)
-            p.simulate_all_representative_recommendations(property_representative_recommendations)
+            recommendations_scoring_data.append(
+                p.simulate_all_representative_recommendations(property_representative_recommendations)
+            )
 
-            recommendations_scoring_data.extend(p.recommendations_scoring_data)
+        logger.info("Preparing data for scoring in sap change api")
+        recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)
 
+        recommendations_scoring_data = recommendations_scoring_data.drop(
+            columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+                     "carbon_ending"]
+        )
+
+        model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
+
+        all_predictions = {
+            "sap_change_predictions": pd.DataFrame(),
+            "heat_demand_predictions": pd.DataFrame(),
+            "carbon_change_predictions": pd.DataFrame()
+        }
+        to_loop_over = range(0, recommendations_scoring_data.shape[0], SCORING_BATCH_SIZE)
+        for chunk in tqdm(to_loop_over, total=len(to_loop_over)):
+            predictions_dict = model_api.predict_all(
+                df=recommendations_scoring_data.iloc[chunk:chunk + SCORING_BATCH_SIZE],
+                bucket=get_settings().DATA_BUCKET,
+                prediction_buckets={
+                    "sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET,
+                    "heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET,
+                    "carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET
+                }
+            )
+
+            # Append the predictions to the predictions dictionary
+            for key, scored in predictions_dict.items():
+                all_predictions[key] = pd.concat([all_predictions[key], scored])
+
+        # We now produce a table of results for the mds report
+
+        # TODO: TEMP
+        for p in plan_input:
+            if p["uprn"]:
+                p["uprn"] = str(int(float(p["uprn"])))
+
+        results = []
+        for p in input_properties:
+            measures = p.measures
+            property_recommendations = [r['type'] for r in representative_recommendations[p.id]]
+
+            # TODO: Check high heat retention storage heaters - looks like it's excluded controls!
+
+            sap_prediction = all_predictions["sap_change_predictions"][
+                all_predictions["sap_change_predictions"]["property_id"] == str(p.id)
+                ]
+
+            heat_demand_prediction = all_predictions["heat_demand_predictions"][
+                all_predictions["heat_demand_predictions"]["property_id"] == str(p.id)
+                ]
+
+            carbon_prediction = all_predictions["carbon_change_predictions"][
+                all_predictions["carbon_change_predictions"]["property_id"] == str(p.id)
+                ]
+
+            # Get a before and after for SAP, heat demand, CO2 and also calculate energy bill and energy savings
+            sap_before = int(p.data["current-energy-efficiency"])
+            sap_after = sap_prediction["predictions"].values[0] if measures else sap_before
+
+            epc_before = p.data["current-energy-rating"]
+            epc_after = sap_to_epc(sap_after) if measures else epc_before
+
+            heat_demand_before = p.data["energy-consumption-current"]
+            heat_demand_after = heat_demand_prediction["predictions"].values[0] if measures else heat_demand_before
+
+            carbon_before = p.data["co2-emissions-current"]
+            carbon_after = carbon_prediction["predictions"].values[0] if measures else carbon_before
+
+            # Estimate bill savings
+
+            from backend.ml_models.AnnualBillSavings import AnnualBillSavings
+            current_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered(
+                epc_energy_consumption=heat_demand_before * p.floor_area,
+                current_epc_rating=epc_before,
+            )
+
+            # TODO: This isn't quite right as this is based on EVERY possible measure, not just the ones that are
+            #       actually implemented
+            expected_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered(
+                epc_energy_consumption=heat_demand_after * p.floor_area,
+                current_epc_rating=epc_before,
+            )
+
+            # TODO: We should determine if the home is gas & electricity or just electricity
+            current_energy_bill = AnnualBillSavings.calculate_annual_bill(
+                current_adjusted_energy,
+            )
+            expected_energy_bill = AnnualBillSavings.calculate_annual_bill(
+                expected_adjusted_energy,
+            )
+
+            bill_savings = current_energy_bill - expected_energy_bill
+            energy_savings = current_adjusted_energy - expected_adjusted_energy
+
+            config = [c for c in plan_input if c["uprn"] == str(p.uprn)]
+            if not config:
+                config = {"address": None, "postcode": None}
+            else:
+                config = config[0]
+
+            to_append = {
+                "config_address": config["address"],
+                "config_postcode": config["postcode"],
+                "address": p.address,
+                "postcode": p.postcode,
+                "measures": measures,
+                "property_recommendations": property_recommendations,
+                "year_of_epc": p.data['lodgement-date'],
+                "sap_before": sap_before,
+                "sap_after": sap_after,
+                "epc_before": epc_before,
+                "epc_after": epc_after,
+                "heat_demand_before": heat_demand_before,
+                "heat_demand_after": heat_demand_after,
+                "carbon_before": carbon_before,
+                "carbon_after": carbon_after,
+                "bill_savings": bill_savings,
+                "energy_savings": energy_savings,
+            }
+            results.append(to_append)
+
+        results = pd.DataFrame(results)
 
 
     except IntegrityError:
diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py
index 99d67126..b92077e4 100644
--- a/backend/ml_models/AnnualBillSavings.py
+++ b/backend/ml_models/AnnualBillSavings.py
@@ -43,15 +43,20 @@ class AnnualBillSavings:
         return cls.ELECTRICITY_PRICE_CAP * kwh
 
     @classmethod
-    def calculate_annual_bill(cls, kwh):
+    def calculate_annual_bill(cls, kwh, mains_gas=True):
         """
         This method will estimate the total annual bill for a property
         It assumed gas & electricity are used
         :param kwh: The total kwh consumption
+        :param mains_gas: Whether the property uses mains gas
         :return: An estimate for annual bill
         """
 
-        return cls.PRICE_FACTOR * kwh + (cls.DAILY_STANDARD_CHARGE_GAS + cls.DAILY_STANDARD_CHARGE_ELECTRICITY * 365)
+        if mains_gas:
+            return cls.PRICE_FACTOR * kwh + (
+                cls.DAILY_STANDARD_CHARGE_GAS + cls.DAILY_STANDARD_CHARGE_ELECTRICITY * 365)
+
+        return cls.ELECTRICITY_PRICE_CAP * kwh + (cls.DAILY_STANDARD_CHARGE_ELECTRICITY * 365)
 
     @classmethod
     def adjust_energy_to_metered(cls, epc_energy_consumption, current_epc_rating):
diff --git a/backend/ml_models/api.py b/backend/ml_models/api.py
index bdc7c178..a2024dd7 100644
--- a/backend/ml_models/api.py
+++ b/backend/ml_models/api.py
@@ -99,6 +99,13 @@ class ModelApi:
             # depending on how you want to handle errors in your application
             return None
 
+    @staticmethod
+    def extract_phase(recommendation_id):
+        if 'phase=' in recommendation_id:
+            return int(recommendation_id.split('phase=')[1][0])
+        else:
+            return None
+
     def predict_all(self, df, bucket, prediction_buckets) -> dict:
 
         """
@@ -135,9 +142,11 @@ class ModelApi:
             # To grab the phase, we pull the integer after "phase=" in the recommendation_id. We can do this with a
             # string split on phase= and then grab the second element of the resulting list. We could also use a
             # regular expression to do this but we use the string split method here, for safety.
-            predictions_df['phase'] = predictions_df['recommendation_id'].str.split('phase=').str[1].str[0]
+            # We may not always have a phase to split on, so we need to handle this case. We can do this by using the
+            # str[1] method to grab the second element of the resulting list. We then grab the first character of this
+            # string to get the phase. We then convert this to an integer.
             # Convert back to int
-            predictions_df['phase'] = predictions_df['phase'].astype(int)
+            predictions_df['phase'] = predictions_df['recommendation_id'].apply(self.extract_phase)
 
             predictions[model_prefix] = predictions_df
 
diff --git a/etl/customers/eon/pilot_asset_list.py b/etl/customers/eon/pilot_asset_list.py
index 5e6efbba..4f79e05e 100644
--- a/etl/customers/eon/pilot_asset_list.py
+++ b/etl/customers/eon/pilot_asset_list.py
@@ -211,6 +211,7 @@ def app():
     # We now determine which measures we need for each property
     finalised_asset_list = []
     for i, config in raw_asset_list.iterrows():
+
         asset_config = asset_list_with_uprn_df[
             (asset_list_with_uprn_df["address"] == config["Address"]) &
             (asset_list_with_uprn_df["postcode"] == config["Postcode"])
@@ -223,11 +224,23 @@ def app():
         # Get the property type
         pt = parse_property_type(config)
 
+        if config["Address"] in [
+            "28 Hermitage Lane",
+            "35a High Street",
+            "35b High Street",
+            "Flat Over 20 Holborough Road",
+            "Flat above 7 Malling Road"
+        ]:
+            print(config["Address"])
+            uprn = None
+        else:
+            uprn = asset_config["uprn"].values[0]
+
         finalised_asset_list.append(
             {
                 "address": config["Address"],
                 "postcode": config["Postcode"],
-                "uprn": asset_config["uprn"].values[0],
+                "uprn": uprn,
                 "n_bedrooms": config["No Bedrooms"],
                 "measures": measures,
                 **pt
diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py
index 5424ab57..538d90e4 100644
--- a/recommendations/RoofRecommendations.py
+++ b/recommendations/RoofRecommendations.py
@@ -230,6 +230,7 @@ class RoofRecommendations:
                         already_installed = "loft_insulation" in self.property.already_installed
                         if already_installed:
                             cost_result = override_costs(cost_result)
+                        new_thickness = insulation_thickness + material["depth"]
                     elif material["type"] == "flat_roof_insulation":
                         cost_result = self.costs.flat_roof_insulation(
                             floor_area=self.property.insulation_floor_area,
@@ -239,6 +240,7 @@ class RoofRecommendations:
                         already_installed = "flat_roof_insulation" in self.property.already_installed
                         if already_installed:
                             cost_result = override_costs(cost_result)
+                        new_thickness = None
                     else:
                         raise ValueError("Invalid material type")
 
@@ -259,6 +261,7 @@ class RoofRecommendations:
                             "new_u_value": new_u_value,
                             "sap_points": None,
                             "already_installed": already_installed,
+                            "new_thickness": new_thickness,
                             **cost_result
                         }
                     )

From 9417611e853c53665330083fd6907e1788b2c046 Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Sat, 25 May 2024 08:33:00 +0100
Subject: [PATCH 49/58] add change to allow is_assumed and is_assumed_ending to
 be a feature

---
 etl/epc/Dataset.py | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/etl/epc/Dataset.py b/etl/epc/Dataset.py
index e897da78..7d5c3ef8 100644
--- a/etl/epc/Dataset.py
+++ b/etl/epc/Dataset.py
@@ -203,11 +203,11 @@ class TrainingDataset(BaseDataset):
         common_cols = [[col + "_starting", col + "_ending"] for col in common_cols]
 
         self.df = self.df.loc[
-                  :,
-                  no_suffix_cols
-                  + only_ending_cols
-                  + [col for cols in common_cols for col in cols],
-                  ]
+            :,
+            no_suffix_cols
+            + only_ending_cols
+            + [col for cols in common_cols for col in cols],
+        ]
 
     def _remove_abnormal_change_in_floor_area(self):
         """
@@ -509,7 +509,7 @@ class TrainingDataset(BaseDataset):
                     expanded_df["is_sandstone_or_limestone"]
                     == expanded_df["is_sandstone_or_limestone_ending"]
                 )
-                ]
+            ]
         elif component == "floor":
             expanded_df = expanded_df[
                 (expanded_df["is_suspended"] == expanded_df["is_suspended_ending"])
@@ -526,7 +526,7 @@ class TrainingDataset(BaseDataset):
                     expanded_df["is_to_external_air"]
                     == expanded_df["is_to_external_air_ending"]
                 )
-                ]
+            ]
         elif component == "roof":
             expanded_df = expanded_df[
                 (expanded_df["is_pitched"] == expanded_df["is_pitched_ending"])
@@ -539,7 +539,7 @@ class TrainingDataset(BaseDataset):
                     expanded_df["has_dwelling_above"]
                     == expanded_df["has_dwelling_above_ending"]
                 )
-                ]
+            ]
 
         return expanded_df
 
@@ -567,13 +567,12 @@ class TrainingDataset(BaseDataset):
                 "is_system_built_ending",
                 "is_timber_frame_ending",
                 "is_granite_or_whinstone_ending",
-                "is_as_built_ending",
+                # "is_as_built_ending",
                 "is_cob_ending",
-                "is_assumed_ending",
                 "is_sandstone_or_limestone_ending",
                 # Re remove the is_assumed columns
-                "is_assumed",
-                "is_assumed_ending",
+                # "is_assumed",
+                # "is_assumed_ending",
             ],
             "floor": [
                 "original_description",
@@ -698,6 +697,8 @@ class TrainingDataset(BaseDataset):
             # Rename columns to component specific names, if they have not been dropped
             expanded_df = expanded_df.rename(
                 columns={
+                    "is_assumed": f"{component}_is_assumed",
+                    "is_assumed_ending": f"{component}_is_assumed_ending",
                     "insulation_thickness": f"{component}_insulation_thickness",
                     "insulation_thickness_ending": f"{component}_insulation_thickness_ending",
                     "thermal_transmittance": f"{component}_thermal_transmittance",

From a2586ab4b6a866d3ae7114813fc84f18acc9608d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 28 May 2024 11:53:41 +0100
Subject: [PATCH 50/58] minor

---
 backend/app/plan/router.py                  |  1 +
 etl/customers/goldman/property_ownership.py | 22 ++++++++++++++++++---
 2 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index c6a26fcf..42efeb42 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -879,6 +879,7 @@ async def build_mds(body: PlanTriggerRequest):
             results.append(to_append)
 
         results = pd.DataFrame(results)
+        results["sap_uplift"] = results["sap_after"] - results["sap_before"]
 
 
     except IntegrityError:
diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py
index 24922f68..45367a63 100644
--- a/etl/customers/goldman/property_ownership.py
+++ b/etl/customers/goldman/property_ownership.py
@@ -363,11 +363,12 @@ def app():
     freehold_matching_lookup = remove_duplicate_matches(freehold_matching_lookup, properties, company_ownership)
     leasehold_matching_lookup = remove_duplicate_matches(leasehold_matching_lookup, properties, company_ownership)
 
-    matched_addresses = freehold_matching_lookup.merge(
-        properties[["UPRN", "ADDRESS"]].rename(columns={"ADDRESS": "epc_address"}),
+    matched_addresses = pd.concat([freehold_matching_lookup, leasehold_matching_lookup]).merge(
+        properties[["UPRN", "ADDRESS", "CURRENT_ENERGY_EFFICIENCY", "CURRENT_ENERGY_RATING"]].rename(
+            columns={"ADDRESS": "epc_address"}),
         how="left", on="UPRN"
     ).merge(
-        company_ownership[["Title Number", "Property Address"]],
+        company_ownership[["Title Number", "Property Address", "Company Registration No. (1)", "Proprietor Name (1)"]],
         how="left", on="Title Number"
     )
 
@@ -377,6 +378,10 @@ def app():
     leasehold_matching_lookup.to_excel("leasehold_matching_lookup.xlsx")
     shared_leasehold_match.to_excel("shared_leasehold_match.xlsx")
     # shared_freehold_match.to_excel("shared_freehold_match.xlsx")
+    # read the files
+    # freehold_matching_lookup = pd.read_excel("freehold_matching_lookup.xlsx")
+    # leasehold_matching_lookup = pd.read_excel("leasehold_matching_lookup.xlsx")
+    # shared_leasehold_match = pd.read_excel("shared_leasehold_match.xlsx")
 
     freehold_aggregate = aggregate_matches(freehold_matching_lookup, company_ownership, properties)
     leasehold_aggregate = aggregate_matches(leasehold_matching_lookup, company_ownership, properties)
@@ -390,6 +395,17 @@ def app():
     investment_20m = combined_aggregate[combined_aggregate["cumulative_value"] <= 20_500_000]
     investment_50m = combined_aggregate[combined_aggregate["cumulative_value"] <= 51_000_000]
 
+    investment_20m_properties = matched_addresses[
+        matched_addresses["Company Registration No. (1)"].isin(investment_20m["Company Registration No. (1)"])
+    ]
+
+    investment_50m_properties = matched_addresses[
+        matched_addresses["Company Registration No. (1)"].isin(investment_50m["Company Registration No. (1)"])
+    ]
+
+    investment_20m_properties.to_excel("investment_20m_properties.xlsx")
+    investment_50m_properties.to_excel("investment_50m_properties.xlsx")
+
     properties["WALLS_DESCRIPTION"].value_counts(normalize=True)
 
 

From 0a3055d70bf362be97d83935dea4963c864e257c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 28 May 2024 17:22:29 +0100
Subject: [PATCH 51/58] added simulation_config convention to wall
 recommendations

---
 backend/Property.py                         |  54 +---------
 backend/app/plan/router.py                  |   1 +
 etl/customers/goldman/property_ownership.py | 105 ++++++++++++++++----
 recommendations/WallRecommendations.py      |  79 ++++++++++++++-
 recommendations/recommendation_utils.py     |   9 +-
 5 files changed, 176 insertions(+), 72 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index a1972b5b..b7753413 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -353,55 +353,6 @@ class Property:
         for recommendation in recommendations:
             # For the list of recommendations we have, we iteratively update the output
 
-            # We update the description to indicate it's insulated
-            if recommendation["type"] in [
-                "internal_wall_insulation",
-                "external_wall_insulation",
-                "cavity_wall_insulation",
-            ]:
-
-                # # If we have a non-incasive recommendation that the cavity wall is partially filled, we skip the
-                # # cavity wall insulation recommendation (since on the EPC, the property will look like how it did
-                # # before any works)
-                # if "cavity_surveyed_as_filled_is_partial" in non_invasive_recommendations:
-                #     continue
-
-                # The upgrade made here is to the u-value of the walls and the description of the
-                # insulation thickness
-                output["walls_thermal_transmittance_ending"] = recommendation[
-                    "new_u_value"
-                ]
-                # Setting the insulation thickness here to above average should be tested further because we
-                # don't see a high volume of instances for this
-                output["walls_insulation_thickness_ending"] = "average"
-                # In some edge cases, or when running the mds report we might see the energy efficiency already
-                # in Good or Very Good
-                if output["walls_energy_eff_ending"] not in ["Good", "Very Good"]:
-                    output["walls_energy_eff_ending"] = "Good"
-
-                # TODO TEMP - should be ending?
-                output["is_as_built"] = False
-
-                # Note: often when the wall is insulatied, the internal/external insulation is not noted so we should
-                #       test the impact of using these booleans
-                if recommendation["type"] == "external_wall_insulation":
-                    output["external_insulation_ending"] = True
-                    output["internal_insulation_ending"] = False
-
-                if recommendation["type"] == "internal_wall_insulation":
-                    output["external_insulation_ending"] = False
-                    output["internal_insulation_ending"] = True
-
-                if recommendation["type"] == "cavity_wall_insulation":
-                    output["is_filled_cavity_ending"] = True
-
-            else:
-                if output["walls_thermal_transmittance_ending"] is None:
-                    raise ValueError("We should not have a None value for the u value")
-
-                if output["walls_insulation_thickness_ending"] is None:
-                    output["walls_insulation_thickness_ending"] = "none"
-
             # Update description to indicate it's insulate
             if recommendation["type"] in [
                 "solid_floor_insulation",
@@ -518,9 +469,12 @@ class Property:
                     )
 
             if recommendation["type"] in [
-                "heating", "hot_water_tank_insulation", "heating_control", "secondary_heating"
+                "heating", "hot_water_tank_insulation", "heating_control", "secondary_heating",
+                "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation",
             ]:
                 # We update the data, as defined in the recommendaton
+                if output["walls_insulation_thickness_ending"] is None:
+                    output["walls_insulation_thickness_ending"] = "none"
 
                 simulation_config = recommendation["simulation_config"]
                 # If any entries in simulation_config are None, we will set them to "Unknown" which is the cleaning
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 42efeb42..1e2c1e6f 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -739,6 +739,7 @@ async def build_mds(body: PlanTriggerRequest):
         representative_recommendations = {}
 
         for p in tqdm(input_properties):
+
             p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
 
             mds = Mds(property_instance=p, materials=materials)
diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py
index 45367a63..d30205ae 100644
--- a/etl/customers/goldman/property_ownership.py
+++ b/etl/customers/goldman/property_ownership.py
@@ -20,27 +20,39 @@ def aggregate_matches(matching_lookup, company_ownership, properties):
         properties[["UPRN", "LOCAL_AUTHORITY_LABEL"]], how="left", on="UPRN"
     )
     counts = (
-        df.groupby(["Company Registration No. (1)", "Proprietor Name (1)", "LOCAL_AUTHORITY_LABEL"])["UPRN"]
+        df.groupby(["Company Registration No. (1)", "LOCAL_AUTHORITY_LABEL"])["UPRN"]
         .count()
         .reset_index(name="number_of_properties")
     )
     counts = counts.sort_values("number_of_properties", ascending=False)
 
     pivot_counts = counts.pivot_table(
-        index=["Company Registration No. (1)", "Proprietor Name (1)"],  # Rows: companies and proprietors
+        index=["Company Registration No. (1)"],  # Rows: companies and proprietors
         columns="LOCAL_AUTHORITY_LABEL",  # Columns: each local authority
         values="number_of_properties",  # The counts of properties
         fill_value=0  # Fill missing values with 0 (where there are no properties owned)
     ).reset_index()
 
     total_counts = (
-        df.groupby(["Company Registration No. (1)", "Proprietor Name (1)"])["UPRN"]
+        df.groupby(["Company Registration No. (1)"])["UPRN"]
         .count()
         .reset_index(name="total_number_of_properties")
     )
 
+    # We have cases where the same company registration number results in the same company name, so we produce a best
+    # name per company registration number
+    best_names = (
+        df.groupby(["Company Registration No. (1)"])["Proprietor Name (1)"]
+        .first()
+        .reset_index()
+    )
+
+    total_counts = best_names.merge(
+        total_counts, how="left", on=["Company Registration No. (1)"]
+    )
+
     pivot_counts = pivot_counts.merge(
-        total_counts, how="left", on=["Company Registration No. (1)", "Proprietor Name (1)"]
+        total_counts, how="left", on=["Company Registration No. (1)"]
     )
 
     pivot_counts = pivot_counts.sort_values("total_number_of_properties", ascending=False)
@@ -187,7 +199,45 @@ def remove_duplicate_matches(matching_lookup, properties, company_ownership):
 
     if not to_drop.empty:
         merged = pd.merge(matching_lookup, to_drop, on=['UPRN', 'Title Number'], how='left', indicator=True)
-        merged[merged['_merge'] == 'left_only'].drop(columns=['_merge'])
+        merged = merged[merged['_merge'] == 'left_only'].drop(columns=['_merge'])
+
+        return merged
+
+    return matching_lookup
+
+
+def remove_duplicate_uprn_matches(matching_lookup, properties, company_ownership):
+    dupe_uprns = matching_lookup[matching_lookup["UPRN"].duplicated()]["UPRN"].unique().tolist()
+
+    to_drop = []
+    for dupe_uprn in dupe_uprns:
+        dupe_data = matching_lookup[matching_lookup["UPRN"] == dupe_uprn].copy()
+        matched_addresses = dupe_data.merge(
+            properties[["UPRN", "ADDRESS"]].rename(columns={"ADDRESS": "epc_address"}),
+            how="left", on="UPRN"
+        ).merge(
+            company_ownership[["Title Number", "Property Address"]],
+            how="left", on="Title Number"
+        )
+        # We perform levenstein to get the best match
+        best_match = levenstein_match(
+            matching_string=matched_addresses["Property Address"].values[0],
+            df=matched_addresses,
+            address_col="epc_address"
+        )
+        matches_to_drop = matched_addresses[
+            ~matched_addresses["Title Number"].isin(best_match["Title Number"].values)
+        ]
+
+        to_drop.append(
+            matches_to_drop[["UPRN", "Title Number"]].copy()
+        )
+
+    to_drop = pd.concat(to_drop)
+
+    if not to_drop.empty:
+        merged = pd.merge(matching_lookup, to_drop, on=['UPRN', 'Title Number'], how='left', indicator=True)
+        merged = merged[merged['_merge'] == 'left_only'].drop(columns=['_merge'])
 
         return merged
 
@@ -254,6 +304,9 @@ def app():
     properties = properties[
         properties["TENURE"].isin(["rental (private)", "Rented (private)", "owner-occupied", "Owner-occupied"])
     ]
+    # We have some duplicated on UPRN
+    # Take the newest UPRN
+    properties = properties.sort_values("LODGEMENT_DATE", ascending=False).drop_duplicates("UPRN")
 
     # Remove entries where the address begins with the term "land adjoining", or other records that don't reference the
     # the property itself
@@ -354,16 +407,29 @@ def app():
     freehold_matching_lookup = pd.DataFrame(freehold_matching_lookup)
     leasehold_matching_lookup = pd.DataFrame(leasehold_matching_lookup)
     shared_leasehold_match = pd.concat(shared_leasehold_match)
+    shared_freehold_match = pd.concat(shared_freehold_match)
+
+    # freehold_matching_lookup.to_excel("freehold_matching_lookup_new.xlsx")
+    # leasehold_matching_lookup.to_excel("leasehold_matching_lookup_new.xlsx")
+    # shared_leasehold_match.to_excel("shared_leasehold_match_new.xlsx")
+    # shared_freehold_match.to_excel("shared_freehold_match_new.xlsx")
 
     # The approximate matches aren't very good
     freehold_matching_lookup = freehold_matching_lookup[freehold_matching_lookup["match_type"] == "exact"]
     leasehold_matching_lookup = leasehold_matching_lookup[leasehold_matching_lookup["match_type"] == "exact"]
 
-    # There are some cases where we have duplicates
-    freehold_matching_lookup = remove_duplicate_matches(freehold_matching_lookup, properties, company_ownership)
-    leasehold_matching_lookup = remove_duplicate_matches(leasehold_matching_lookup, properties, company_ownership)
+    # Combine
+    combined_matching_lookup = pd.concat([freehold_matching_lookup, leasehold_matching_lookup])
+    # Remove duplicates
+    combined_matching_lookup = remove_duplicate_matches(combined_matching_lookup, properties, company_ownership)
+    # We also have duplicates at a UPRN level
+    combined_matching_lookup = remove_duplicate_uprn_matches(combined_matching_lookup, properties, company_ownership)
 
-    matched_addresses = pd.concat([freehold_matching_lookup, leasehold_matching_lookup]).merge(
+    # There are some cases where we have duplicates
+    # freehold_matching_lookup = remove_duplicate_matches(freehold_matching_lookup, properties, company_ownership)
+    # leasehold_matching_lookup = remove_duplicate_matches(leasehold_matching_lookup, properties, company_ownership)
+
+    matched_addresses = combined_matching_lookup.merge(
         properties[["UPRN", "ADDRESS", "CURRENT_ENERGY_EFFICIENCY", "CURRENT_ENERGY_RATING"]].rename(
             columns={"ADDRESS": "epc_address"}),
         how="left", on="UPRN"
@@ -374,9 +440,9 @@ def app():
 
     # shared_freehold_match = pd.DataFrame(shared_freehold_match)
     # Strore these files
-    freehold_matching_lookup.to_excel("freehold_matching_lookup.xlsx")
-    leasehold_matching_lookup.to_excel("leasehold_matching_lookup.xlsx")
-    shared_leasehold_match.to_excel("shared_leasehold_match.xlsx")
+    # freehold_matching_lookup.to_excel("freehold_matching_lookup.xlsx")
+    # leasehold_matching_lookup.to_excel("leasehold_matching_lookup.xlsx")
+    # shared_leasehold_match.to_excel("shared_leasehold_match.xlsx")
     # shared_freehold_match.to_excel("shared_freehold_match.xlsx")
     # read the files
     # freehold_matching_lookup = pd.read_excel("freehold_matching_lookup.xlsx")
@@ -387,11 +453,9 @@ def app():
     leasehold_aggregate = aggregate_matches(leasehold_matching_lookup, company_ownership, properties)
 
     combined_aggregate = aggregate_matches(
-        pd.concat([freehold_matching_lookup, leasehold_matching_lookup]), company_ownership, properties
+        combined_matching_lookup, company_ownership, properties
     )
 
-    df = pd.concat([freehold_matching_lookup, leasehold_matching_lookup])
-
     investment_20m = combined_aggregate[combined_aggregate["cumulative_value"] <= 20_500_000]
     investment_50m = combined_aggregate[combined_aggregate["cumulative_value"] <= 51_000_000]
 
@@ -403,10 +467,15 @@ def app():
         matched_addresses["Company Registration No. (1)"].isin(investment_50m["Company Registration No. (1)"])
     ]
 
-    investment_20m_properties.to_excel("investment_20m_properties.xlsx")
-    investment_50m_properties.to_excel("investment_50m_properties.xlsx")
+    portfolio_epc_data_50m = properties[properties["UPRN"].isin(investment_50m_properties["UPRN"])]
+    portfolio_epc_data_20m = properties[properties["UPRN"].isin(investment_20m_properties["UPRN"])]
 
-    properties["WALLS_DESCRIPTION"].value_counts(normalize=True)
+    investment_20m_properties.to_excel("investment_20m_properties 28th May.xlsx", index=False)
+    investment_50m_properties.to_excel("investment_50m_properties 28th May.xlsx", index=False)
+
+    # Store the EPC data
+    portfolio_epc_data_50m.to_excel("portfolio_epc_data_50m 28th May.xlsx", index=False)
+    portfolio_epc_data_20m.to_excel("portfolio_epc_data_20m 28th May.xlsx", index=False)
 
 
 def company_aggregation():
diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py
index 6cdfbfdd..8d7915cd 100644
--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@@ -6,9 +6,10 @@ import pandas as pd
 from datatypes.enums import QuantityUnits
 from backend.Property import Property
 from BaseUtility import Definitions
+from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes
 from recommendations.recommendation_utils import (
     r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns, update_lowest_selected_u_value,
-    get_recommended_part, get_wall_u_value, override_costs
+    get_recommended_part, get_wall_u_value, override_costs, check_simulation_difference
 )
 from recommendations.config import PARTIALLY_FILLED_PERCENTAGE_ASSUMPTION
 from recommendations.Costs import Costs
@@ -53,6 +54,24 @@ class WallRecommendations(Definitions):
     # threshold
     NEW_BUILD_INSULATED = 0.75
 
+    # These are the ending descriptions we consider for walls with external insulation
+    EXTERNALLY_INSULATED_WALL_DESCRIPTIONS = {
+        "solid_brick": "Solid brick, with external insulation",
+        "cob": "Cob, with external insulation",
+        "system_built": "System built, with external insulation",
+        "granite_or_whinstone": 'Granite or whinstone, with external insulation',
+        "sandstone_or_limestone": 'Sandstone or limestone, with external insulation',
+    }
+
+    # These are the ending descriptions we consider for walls with internal insulation
+    INTERNALLY_INSULATED_WALL_DESCRIPTIONS = {
+        "solid_brick": "Solid brick, with internal insulation",
+        "cob": "Cob, with internal insulation",
+        "system_built": "System built, with internal insulation",
+        "granite_or_whinstone": 'Granite or whinstone, with internal insulation',
+        "sandstone_or_limestone": 'Sandstone or limestone, with internal insulation',
+    }
+
     def __init__(
         self,
         property_instance: Property,
@@ -279,6 +298,21 @@ class WallRecommendations(Definitions):
                 # updated the new u-value with the best possible our installers have
                 new_u_value = max(0.31, new_u_value)
 
+                wall_ending_config = WallAttributes("Cavity wall, filled cavity").process()
+
+                simulation_config = {}
+                if self.property.data["walls-energy-eff"] not in ["Good", "Very Good"]:
+                    simulation_config = {
+                        "walls_energy_eff_ending": "Good",
+                        "walls_thermal_transmittance_ending": new_u_value
+                    }
+
+                walls_simulation_config = check_simulation_difference(
+                    new_config=wall_ending_config, old_config=self.property.walls, prefix="walls_"
+                )
+
+                simulation_config = {**simulation_config, **walls_simulation_config}
+
                 recommendations.append(
                     {
                         "phase": phase,
@@ -296,12 +330,31 @@ class WallRecommendations(Definitions):
                         "new_u_value": new_u_value,
                         "sap_points": None,
                         "already_installed": already_installed,
+                        "simulation_config": simulation_config,
                         **cost_result
                     }
                 )
 
         self.recommendations = recommendations
 
+    def get_internal_external_wall_description(self, description_map):
+        if self.property.walls["is_solid_brick"]:
+            return description_map["solid_brick"]
+
+        if self.property.walls["is_cob"]:
+            return description_map["cob"]
+
+        if self.property.walls["is_system_built"]:
+            return description_map["system_built"]
+
+        if self.property.walls["is_granite_or_whinstone"]:
+            return description_map["granite_or_whinstone"]
+
+        if self.property.walls["is_sandstone_or_limestone"]:
+            return description_map["sandstone_or_limestone"]
+
+        raise NotImplementedError("Not implemented yet")
+
     def _find_insulation(self, u_value, insulation_materials, non_insulation_materials, phase):
 
         lowest_selected_u_value = None
@@ -340,6 +393,10 @@ class WallRecommendations(Definitions):
                         if already_installed:
                             cost_result = override_costs(cost_result)
 
+                        new_description = self.get_internal_external_wall_description(
+                            self.INTERNALLY_INSULATED_WALL_DESCRIPTIONS
+                        )
+
                     elif material["type"] == "external_wall_insulation":
                         cost_result = self.costs.external_wall_insulation(
                             wall_area=self.property.insulation_wall_area,
@@ -349,9 +406,28 @@ class WallRecommendations(Definitions):
                         already_installed = "external_wall_insulation" in self.property.already_installed
                         if already_installed:
                             cost_result = override_costs(cost_result)
+
+                        new_description = self.get_internal_external_wall_description(
+                            self.EXTERNALLY_INSULATED_WALL_DESCRIPTIONS
+                        )
                     else:
                         raise ValueError("Invalid material type")
 
+                    wall_ending_config = WallAttributes(new_description).process()
+
+                    simulation_config = {}
+                    if self.property.data["walls-energy-eff"] not in ["Good", "Very Good"]:
+                        simulation_config = {
+                            "walls_thermal_transmittance_ending": new_u_value,
+                            "walls_energy_eff_ending": "Good"
+                        }
+
+                    walls_simulation_config = check_simulation_difference(
+                        new_config=wall_ending_config, old_config=self.property.walls, prefix="walls_"
+                    )
+
+                    simulation_config = {**simulation_config, **walls_simulation_config}
+
                     recommendations.append(
                         {
                             "phase": phase,
@@ -369,6 +445,7 @@ class WallRecommendations(Definitions):
                             "new_u_value": new_u_value,
                             "already_installed": already_installed,
                             "sap_points": None,
+                            "simulation_config": simulation_config,
                             **cost_result
                         }
                     )
diff --git a/recommendations/recommendation_utils.py b/recommendations/recommendation_utils.py
index a3043c31..c78c4f68 100644
--- a/recommendations/recommendation_utils.py
+++ b/recommendations/recommendation_utils.py
@@ -756,15 +756,18 @@ def calculate_cavity_age(newest_epc, older_epcs, cleaned):
     return cavity_age
 
 
-def check_simulation_difference(old_config, new_config):
+def check_simulation_difference(old_config, new_config, prefix=""):
     """
     Given two dictionaries, that describe the heating control configurations, this method will compare the two
     and pick out the differences. These differences will be things that have been added and things that have been
     removed. This will be used to determine how we should be updating the configuration in the simulation
     :return:
     """
-
-    differences = {key + "_ending": new_config[key] for key in new_config if old_config[key] != new_config[key]}
+    differences = {}
+    for key in new_config:
+        if old_config[key] != new_config[key]:
+            new_key = prefix + key + "_ending" if key == "is_assumed" else key + "_ending"
+            differences[new_key] = new_config[key]
 
     return differences
 

From 42f74cf2f5659f3f134105ac231a01bccf6ed54d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 28 May 2024 17:23:50 +0100
Subject: [PATCH 52/58] fix pytest issue

---
 recommendations/WindowsRecommendations.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/recommendations/WindowsRecommendations.py b/recommendations/WindowsRecommendations.py
index b7c2823a..2e820422 100644
--- a/recommendations/WindowsRecommendations.py
+++ b/recommendations/WindowsRecommendations.py
@@ -4,7 +4,7 @@ import numpy as np
 
 from backend.Property import Property
 from recommendations.Costs import Costs
-from recommendation_utils import override_costs
+from recommendations.recommendation_utils import override_costs
 
 
 class WindowsRecommendations:

From 291c6955e5aff693717a827e851f554c336ce775 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 28 May 2024 18:20:31 +0100
Subject: [PATCH 53/58] Adding welsh translations

---
 .idea/Model.iml                               |  2 +-
 .idea/misc.xml                                |  2 +-
 etl/epc_clean/app.py                          | 21 +++++++++--------
 .../epc_attributes/HotWaterAttributes.py      |  3 ++-
 .../epc_attributes/WindowAttributes.py        |  1 +
 .../epc_attributes/attribute_utils.py         |  2 +-
 recommendations/WallRecommendations.py        | 23 +++++++++++++++----
 recommendations/recommendation_utils.py       |  2 +-
 8 files changed, 37 insertions(+), 19 deletions(-)

diff --git a/.idea/Model.iml b/.idea/Model.iml
index 4413bb06..b0f9c00d 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 6f308057..1122b380 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/etl/epc_clean/app.py b/etl/epc_clean/app.py
index 3f1a1a80..59561b3c 100644
--- a/etl/epc_clean/app.py
+++ b/etl/epc_clean/app.py
@@ -2,24 +2,27 @@ from tqdm import tqdm
 import os
 import pandas as pd
 import msgpack
+import inspect
 
 from etl.epc_clean.EpcClean import EpcClean
 from etl.epc.settings import EARLIEST_EPC_DATE
 from pathlib import Path
 from utils.s3 import save_data_to_s3
 
+src_file_path = inspect.getfile(lambda: None)
+
 LAND_REGISTRY_PATHS = [
-    os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-monthly-update-new-version.csv",
-    os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2022 (1).csv",
-    os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2021.csv",
-    os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2020.csv",
-    os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2019.csv",
-    os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2018.csv",
-    os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2017-part1.csv",
-    os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2017-part2.csv",
+    os.path.abspath(os.path.dirname(src_file_path)) + "/model_data/local_data/pp-monthly-update-new-version.csv",
+    os.path.abspath(os.path.dirname(src_file_path)) + "/model_data/local_data/pp-2022 (1).csv",
+    os.path.abspath(os.path.dirname(src_file_path)) + "/model_data/local_data/pp-2021.csv",
+    os.path.abspath(os.path.dirname(src_file_path)) + "/model_data/local_data/pp-2020.csv",
+    os.path.abspath(os.path.dirname(src_file_path)) + "/model_data/local_data/pp-2019.csv",
+    os.path.abspath(os.path.dirname(src_file_path)) + "/model_data/local_data/pp-2018.csv",
+    os.path.abspath(os.path.dirname(src_file_path)) + "/model_data/local_data/pp-2017-part1.csv",
+    os.path.abspath(os.path.dirname(src_file_path)) + "/model_data/local_data/pp-2017-part2.csv",
 ]
 
-EPC_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates"
+EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates"
 
 ENVIRONMENT = os.getenv("ENVIRONMENT", "dev")
 
diff --git a/etl/epc_clean/epc_attributes/HotWaterAttributes.py b/etl/epc_clean/epc_attributes/HotWaterAttributes.py
index e8bce0bb..5603e7d5 100644
--- a/etl/epc_clean/epc_attributes/HotWaterAttributes.py
+++ b/etl/epc_clean/epc_attributes/HotWaterAttributes.py
@@ -116,7 +116,8 @@ class HotWaterAttributes(Definitions):
                                                                                                  "instantaneous at "
                                                                                                  "point of use, "
                                                                                                  "waste water heat "
-                                                                                                 "recovery"
+                                                                                                 "recovery",
+        "ogçör brif system, adfer gwres d+¦r gwastraff": "from main system, waste water heat recovery",
     }
 
     def __init__(self, description: str):
diff --git a/etl/epc_clean/epc_attributes/WindowAttributes.py b/etl/epc_clean/epc_attributes/WindowAttributes.py
index ce0b156a..5286fc5a 100644
--- a/etl/epc_clean/epc_attributes/WindowAttributes.py
+++ b/etl/epc_clean/epc_attributes/WindowAttributes.py
@@ -30,6 +30,7 @@ class WindowAttributes(Definitions):
         "gwydrau eilaidd llawn": "full secondary glazing",
         "gwydrau eilaidd mwyaf": "mostly secondary glazing",
         "gwydrau eilaidd rhannol": "partial secondary glazing",
+        "gwydrau lluosog ym mhobman": "multiple glazing throughout",
     }
 
     def __init__(self, description: str):
diff --git a/etl/epc_clean/epc_attributes/attribute_utils.py b/etl/epc_clean/epc_attributes/attribute_utils.py
index b5fc590d..60f4653e 100644
--- a/etl/epc_clean/epc_attributes/attribute_utils.py
+++ b/etl/epc_clean/epc_attributes/attribute_utils.py
@@ -24,7 +24,7 @@ def extract_thermal_transmittance(result: dict, description: str) -> Tuple[
 
     if match:
         result['thermal_transmittance'] = float(match.group(1))
-        result['thermal_transmittance_unit'] = match.group(3)
+        result['thermal_transmittance_unit'] = "w/m-¦k"  # We standardise the unit
         # Remove the match from the description
         description = re.sub(THERMAL_TRANSMITTANCE_STR, "", description)
     else:
diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py
index 8d7915cd..243a5edb 100644
--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@@ -61,6 +61,7 @@ class WallRecommendations(Definitions):
         "system_built": "System built, with external insulation",
         "granite_or_whinstone": 'Granite or whinstone, with external insulation',
         "sandstone_or_limestone": 'Sandstone or limestone, with external insulation',
+        "timber_frame": "Timber frame, with external insulation"
     }
 
     # These are the ending descriptions we consider for walls with internal insulation
@@ -70,6 +71,7 @@ class WallRecommendations(Definitions):
         "system_built": "System built, with internal insulation",
         "granite_or_whinstone": 'Granite or whinstone, with internal insulation',
         "sandstone_or_limestone": 'Sandstone or limestone, with internal insulation',
+        "timber_frame": "Timber frame, with internal insulation"
     }
 
     def __init__(
@@ -337,7 +339,7 @@ class WallRecommendations(Definitions):
 
         self.recommendations = recommendations
 
-    def get_internal_external_wall_description(self, description_map):
+    def get_internal_external_wall_description(self, description_map, new_u_value):
         if self.property.walls["is_solid_brick"]:
             return description_map["solid_brick"]
 
@@ -353,6 +355,14 @@ class WallRecommendations(Definitions):
         if self.property.walls["is_sandstone_or_limestone"]:
             return description_map["sandstone_or_limestone"]
 
+        if self.property.walls["is_timber_frame"]:
+            return description_map["timber_frame"]
+
+        if "Average thermal transmittance" in self.property.walls["clean_description"]:
+            if new_u_value is None:
+                raise ValueError("New u value is None")
+            return f'Average thermal transmittance {new_u_value} W/m-¦K'
+
         raise NotImplementedError("Not implemented yet")
 
     def _find_insulation(self, u_value, insulation_materials, non_insulation_materials, phase):
@@ -394,7 +404,7 @@ class WallRecommendations(Definitions):
                             cost_result = override_costs(cost_result)
 
                         new_description = self.get_internal_external_wall_description(
-                            self.INTERNALLY_INSULATED_WALL_DESCRIPTIONS
+                            self.INTERNALLY_INSULATED_WALL_DESCRIPTIONS, new_u_value
                         )
 
                     elif material["type"] == "external_wall_insulation":
@@ -408,7 +418,7 @@ class WallRecommendations(Definitions):
                             cost_result = override_costs(cost_result)
 
                         new_description = self.get_internal_external_wall_description(
-                            self.EXTERNALLY_INSULATED_WALL_DESCRIPTIONS
+                            self.EXTERNALLY_INSULATED_WALL_DESCRIPTIONS, new_u_value
                         )
                     else:
                         raise ValueError("Invalid material type")
@@ -418,7 +428,6 @@ class WallRecommendations(Definitions):
                     simulation_config = {}
                     if self.property.data["walls-energy-eff"] not in ["Good", "Very Good"]:
                         simulation_config = {
-                            "walls_thermal_transmittance_ending": new_u_value,
                             "walls_energy_eff_ending": "Good"
                         }
 
@@ -426,7 +435,11 @@ class WallRecommendations(Definitions):
                         new_config=wall_ending_config, old_config=self.property.walls, prefix="walls_"
                     )
 
-                    simulation_config = {**simulation_config, **walls_simulation_config}
+                    simulation_config = {
+                        **walls_simulation_config,
+                        **simulation_config,
+                        "walls_thermal_transmittance_ending": new_u_value
+                    }
 
                     recommendations.append(
                         {
diff --git a/recommendations/recommendation_utils.py b/recommendations/recommendation_utils.py
index c78c4f68..996f5c9c 100644
--- a/recommendations/recommendation_utils.py
+++ b/recommendations/recommendation_utils.py
@@ -766,7 +766,7 @@ def check_simulation_difference(old_config, new_config, prefix=""):
     differences = {}
     for key in new_config:
         if old_config[key] != new_config[key]:
-            new_key = prefix + key + "_ending" if key == "is_assumed" else key + "_ending"
+            new_key = prefix + key + "_ending" if key in ["is_assumed", "thermal_transmittance"] else key + "_ending"
             differences[new_key] = new_config[key]
 
     return differences

From 1010b24a4f8a02e3726bcb69e4479a6d094ff3a5 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 28 May 2024 19:33:46 +0100
Subject: [PATCH 54/58] extending welsh translations

---
 etl/epc_clean/epc_attributes/HotWaterAttributes.py        | 2 ++
 etl/epc_clean/epc_attributes/MainheatControlAttributes.py | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/etl/epc_clean/epc_attributes/HotWaterAttributes.py b/etl/epc_clean/epc_attributes/HotWaterAttributes.py
index 5603e7d5..b94ab092 100644
--- a/etl/epc_clean/epc_attributes/HotWaterAttributes.py
+++ b/etl/epc_clean/epc_attributes/HotWaterAttributes.py
@@ -118,6 +118,8 @@ class HotWaterAttributes(Definitions):
                                                                                                  "waste water heat "
                                                                                                  "recovery",
         "ogçör brif system, adfer gwres d+¦r gwastraff": "from main system, waste water heat recovery",
+        "twymwr tanddwr, tarriff safonol, adfer gwres d+¦r gwastraff": "electric immersion, standard tariff, waste "
+                                                                       "water heat recovery",
     }
 
     def __init__(self, description: str):
diff --git a/etl/epc_clean/epc_attributes/MainheatControlAttributes.py b/etl/epc_clean/epc_attributes/MainheatControlAttributes.py
index 23f39d08..887bdda7 100644
--- a/etl/epc_clean/epc_attributes/MainheatControlAttributes.py
+++ b/etl/epc_clean/epc_attributes/MainheatControlAttributes.py
@@ -111,7 +111,8 @@ class MainheatControlAttributes(Definitions):
         't+-ól un gyfradd, trvs': 'single rate heating, trvs',
         't+ól un gyfradd, rhaglennydd a trvs': 'single rate heating, programmer, trvs',
         't+ól un gyfradd, trvs': 'single rate heating, trvs',
-        'trvs a falf osgoi': 'trvs and bypass'
+        'trvs a falf osgoi': 'trvs and bypass',
+        'rheolaeth celect': 'celect-type control',
     }
 
     def __init__(self, description: str):

From fe9d67e36ba25eea5179fb996121d40e78426939 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 28 May 2024 19:49:04 +0100
Subject: [PATCH 55/58] another welsh translation

---
 etl/epc_clean/epc_attributes/HotWaterAttributes.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/etl/epc_clean/epc_attributes/HotWaterAttributes.py b/etl/epc_clean/epc_attributes/HotWaterAttributes.py
index b94ab092..b292daff 100644
--- a/etl/epc_clean/epc_attributes/HotWaterAttributes.py
+++ b/etl/epc_clean/epc_attributes/HotWaterAttributes.py
@@ -120,6 +120,8 @@ class HotWaterAttributes(Definitions):
         "ogçör brif system, adfer gwres d+¦r gwastraff": "from main system, waste water heat recovery",
         "twymwr tanddwr, tarriff safonol, adfer gwres d+¦r gwastraff": "electric immersion, standard tariff, waste "
                                                                        "water heat recovery",
+        "ogçör brif system, dim thermostat ar y silindr, adfer gwres nwyon ffliw": "from main system, no cylinder "
+                                                                                   "thermostat, flue gas heat recovery",
     }
 
     def __init__(self, description: str):

From 33263c7412af461bc197fd1f09e85decb9756d00 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 28 May 2024 20:07:41 +0100
Subject: [PATCH 56/58] more welsh|

---
 etl/epc_clean/epc_attributes/HotWaterAttributes.py | 2 ++
 etl/epc_clean/epc_attributes/MainheatAttributes.py | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/etl/epc_clean/epc_attributes/HotWaterAttributes.py b/etl/epc_clean/epc_attributes/HotWaterAttributes.py
index b292daff..54deaa09 100644
--- a/etl/epc_clean/epc_attributes/HotWaterAttributes.py
+++ b/etl/epc_clean/epc_attributes/HotWaterAttributes.py
@@ -122,6 +122,8 @@ class HotWaterAttributes(Definitions):
                                                                        "water heat recovery",
         "ogçör brif system, dim thermostat ar y silindr, adfer gwres nwyon ffliw": "from main system, no cylinder "
                                                                                    "thermostat, flue gas heat recovery",
+        "ogçör brif system, gydag ynnigçör haul, adfer gwres nwyon ffliw": "from main system, plus solar, flue gas "
+                                                                           "heat recovery",
     }
 
     def __init__(self, description: str):
diff --git a/etl/epc_clean/epc_attributes/MainheatAttributes.py b/etl/epc_clean/epc_attributes/MainheatAttributes.py
index 673b460a..1bd7d991 100644
--- a/etl/epc_clean/epc_attributes/MainheatAttributes.py
+++ b/etl/epc_clean/epc_attributes/MainheatAttributes.py
@@ -56,6 +56,8 @@ class MainHeatAttributes(Definitions):
         "bwyler a gwres dan y llawr, lpg": "boiler and underfloor heating, lpg",
         "bwyler a gwres dan y llawr, trydan": "boiler and underfloor heating, electric",
         "boiler and radiators, nwy prif gyflenwad, mains gas": "boiler and radiators, mains gas",
+        "bwyler a rheiddiaduron, olew, st+¦r wresogyddion trydan": "boiler and radiators, oil, electric storage "
+                                                                   "heaters",
     }
 
     REMAP = {

From c724fffd0503456a5967f93eaff0361120dbf970 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 28 May 2024 20:37:39 +0100
Subject: [PATCH 57/58] debugging another welsh translation

---
 etl/epc_clean/epc_attributes/MainheatAttributes.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/etl/epc_clean/epc_attributes/MainheatAttributes.py b/etl/epc_clean/epc_attributes/MainheatAttributes.py
index 1bd7d991..9f0931a3 100644
--- a/etl/epc_clean/epc_attributes/MainheatAttributes.py
+++ b/etl/epc_clean/epc_attributes/MainheatAttributes.py
@@ -58,6 +58,7 @@ class MainHeatAttributes(Definitions):
         "boiler and radiators, nwy prif gyflenwad, mains gas": "boiler and radiators, mains gas",
         "bwyler a rheiddiaduron, olew, st+¦r wresogyddion trydan": "boiler and radiators, oil, electric storage "
                                                                    "heaters",
+        "pwmp gwres sygçön tarddu yn yr awyr, awyr gynnes, trydan": "air source heat pump, warm air, electric",
     }
 
     REMAP = {

From 1c1b7f9e5cd8a6fbec1c2cd5bb989fcd78eaf948 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 29 May 2024 11:33:21 +0100
Subject: [PATCH 58/58] adding new aws cert identifier to db

---
 infrastructure/terraform/main.tf | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf
index 55266e10..2811f62e 100644
--- a/infrastructure/terraform/main.tf
+++ b/infrastructure/terraform/main.tf
@@ -81,6 +81,8 @@ resource "aws_db_instance" "default" {
   # We will look to change this in the future but as we are pre-MVP at the time of setting this, we don't
   # have major security demand and don't want to set this up now
   publicly_accessible = true
+  # Specify the CA certificate with the default RDS CA certificate
+  ca_cert_identifier = "rds-ca-rsa2048-g1"
 }
 
 # Set up the bucket that recieve the csv uploads of epc to be retrofit
@@ -147,7 +149,7 @@ module "route53" {
   source         = "./modules/route53"
   domain_name    = var.domain_name
   api_url_prefix = var.api_url_prefix
-  providers      = {
+  providers = {
     aws.aws_use1 = aws.aws_use1
   }
 }