From 2d7e9a3cc9bb19bb558d9fbdefc495e4b2826e26 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 10 Oct 2024 18:12:29 +0100
Subject: [PATCH 1/5] setting up code for gla proposal

---
 etl/customers/gla/__init__.py               |  0
 etl/customers/gla/proposal_investigation.py | 76 +++++++++++++++++++++
 etl/ownership/Ownership.py                  | 19 ++++--
 3 files changed, 88 insertions(+), 7 deletions(-)
 create mode 100644 etl/customers/gla/__init__.py
 create mode 100644 etl/customers/gla/proposal_investigation.py

diff --git a/etl/customers/gla/__init__.py b/etl/customers/gla/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/etl/customers/gla/proposal_investigation.py b/etl/customers/gla/proposal_investigation.py
new file mode 100644
index 00000000..e36d82b8
--- /dev/null
+++ b/etl/customers/gla/proposal_investigation.py
@@ -0,0 +1,76 @@
+"""
+This script performs some basic analysis to identify EPC data for postcodes specified in the Warmer Homes Local Grant
+"""
+from nis import match
+
+import pandas as pd
+from etl.ownership.Ownership import Ownership
+
+postcodes = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Downloads/WHLG-eligible-postcodes.xlsx", sheet_name='Eligible postcodes'
+)
+# Take just the first two columns
+postcodes = postcodes[
+    ['List of eligible postcodes via the IMD Income Decile 1-2 pathway', 'Unnamed: 1']
+]
+
+postcodes.columns = ['postcode', 'Local Authority']
+# Drop the first row
+postcodes = postcodes.drop([0, 1])
+# Since there are a large number of potcodes (425k), let's just take a few examples
+# Take postcodes that begin with "BN15"
+postcodes = postcodes[postcodes["postcode"].str.startswith("BN15")]
+
+# The Local Authority is Adur, so let's get the EPC data for this area
+# epc_data = pd.read_csv(
+#     "/Users/khalimconn-kowlessar/Documents/hestia/Model/local_data/all-domestic-certificates/domestic-E07000223-Adur"
+#     "/certificates.csv", low_memory=False
+# )
+# # Filter on these postcodes
+# epc_data = epc_data[epc_data["POSTCODE"].str.lower().isin(postcodes["postcode"].str.lower())]
+# epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
+# # Take the newest EPC for each UPRN, based on LODGEMENT_DATE
+# epc_data["LODGEMENT_DATE"] = pd.to_datetime(epc_data["LODGEMENT_DATE"])
+# epc_data = epc_data.sort_values("LODGEMENT_DATE", ascending=False).drop_duplicates("UPRN")
+#
+# # Let's look at the breakdown of EPC ratings. We want the count and the % of the total
+# ratings_distribution = epc_data.groupby("CURRENT_ENERGY_RATING").size().reset_index()
+# ratings_distribution.columns = ["Rating", "Count"]
+# ratings_distribution["Percentage"] = ratings_distribution["Count"] / ratings_distribution["Count"].sum() * 100
+
+# Can we identify the owners of these units so we can contact them?
+ownership = Ownership(
+    epc_paths=[
+        "/Users/khalimconn-kowlessar/Documents/hestia/Model/local_data/all-domestic-certificates/domestic-E07000223"
+        "-Adur/certificates.csv"
+    ],
+    domestic_ownership_path="/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_07.csv",
+    overseas_ownership_path="/Users/khalimconn-kowlessar/Downloads/OCOD_FULL_2024_07.csv",
+    land_registry_path="/Users/khalimconn-kowlessar/Downloads/pp-complete.csv",
+    project_name="gla-proposal",
+    bucket="retrofit-data-dev",
+    average_property_value=0,
+    portfolio_value=0,
+    excluded_owners=[],
+    excluded_uprns=[],
+    save=False
+)
+
+# Data will be found at ownership/gla-proposal
+ownership.source_epc_properties(column_filters={})
+
+# Step 2: Get company ownership data
+ownership.load_company_ownership()
+
+# Step 3: Prepare data for matching
+ownership.prepare_for_matching()
+
+# Step 4: Match EPC data to ownership data
+ownership.match()
+
+# We have the matches, which we now need to match to the postcodes
+matches = ownership.matched_addresses.copy()
+# filter matches on the postcodes we're interested in
+matches = matches[matches["epc_postcode"].str.lower().isin(postcodes["postcode"].str.lower())]
+# Remove any social transactions
+matches = matches[~matches["TENURE"].isin(["Rented (social)", "rental (social)"])]
diff --git a/etl/ownership/Ownership.py b/etl/ownership/Ownership.py
index 3bc4b60d..2079391c 100644
--- a/etl/ownership/Ownership.py
+++ b/etl/ownership/Ownership.py
@@ -61,6 +61,7 @@ class Ownership:
         portfolio_value: float,
         excluded_owners: List[str] = None,
         excluded_uprns: List[int] = None,
+        save=True
     ):
         """
 
@@ -115,6 +116,8 @@ class Ownership:
             f"ownership/{self.project_name}/{self.run_timestamp}/portfolio_epc_data.xlsx"
         )
 
+        self.save = save
+
         # Data
         self.epc_data = None
         self.ownership_data = None
@@ -210,12 +213,13 @@ class Ownership:
         if self.excluded_uprns:
             self.epc_data = self.epc_data[~self.epc_data["UPRN"].astype(float).isin(self.excluded_uprns)]
 
-        # We now store the data in s3
-        save_excel_to_s3(
-            df=self.epc_data,
-            bucket_name=self.bucket,
-            file_key=self.epc_data_filepath
-        )
+        if self.save:
+            # We now store the data in s3
+            save_excel_to_s3(
+                df=self.epc_data,
+                bucket_name=self.bucket,
+                file_key=self.epc_data_filepath
+            )
 
     def load_company_ownership(self):
         """
@@ -590,7 +594,8 @@ class Ownership:
                     "CURRENT_ENERGY_RATING",
                     "POSTCODE",
                     "LODGEMENT_DATE",
-                    "TRANSACTION_TYPE"
+                    "TRANSACTION_TYPE",
+                    "TENURE",
                 ]
             ].rename(
                 columns={

From a953a1f0ee215f30cb5a17953f0d8f4b167caa18 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 10 Oct 2024 18:38:08 +0100
Subject: [PATCH 2/5] Improving ownership matching algorithm

---
 etl/customers/gla/proposal_investigation.py | 29 +++++++++++++++++++++
 etl/ownership/Ownership.py                  | 10 +++----
 2 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/etl/customers/gla/proposal_investigation.py b/etl/customers/gla/proposal_investigation.py
index e36d82b8..57df0554 100644
--- a/etl/customers/gla/proposal_investigation.py
+++ b/etl/customers/gla/proposal_investigation.py
@@ -74,3 +74,32 @@ matches = ownership.matched_addresses.copy()
 matches = matches[matches["epc_postcode"].str.lower().isin(postcodes["postcode"].str.lower())]
 # Remove any social transactions
 matches = matches[~matches["TENURE"].isin(["Rented (social)", "rental (social)"])]
+
+matches.head()
+owners_count = matches.groupby(['Proprietor Name (1)', 'Company Registration No. (1)']).size().reset_index()
+owners_count.columns = ['Owner', 'Owner Registration #', 'Count']
+owners_count = owners_count.sort_values('Count', ascending=False)
+owners_count["Percentage"] = owners_count["Count"] / owners_count["Count"].sum() * 100
+
+companies_house_api_key = "1d9c2877-3271-4642-80ed-a6170971653f"
+
+import requests
+import json
+
+company_number = "13197205"
+url = f'https://api.company-information.service.gov.uk/company/{company_number}'
+
+# Make the API request
+response = requests.get(url, auth=(companies_house_api_key, ''))
+
+# Check if the request was successful
+if response.status_code == 200:
+    company_data = response.json()
+    # Pretty-print the fetched data
+    print(json.dumps(company_data, indent=4))
+else:
+    print(f"Failed to fetch data. Status code: {response.status_code}")
+
+psc_url = f'https://api.company-information.service.gov.uk/company/{company_number}/persons-with-significant-control'
+psc_response = requests.get(psc_url, auth=(companies_house_api_key, ''))
+psc_data = psc_response.json()
diff --git a/etl/ownership/Ownership.py b/etl/ownership/Ownership.py
index 2079391c..52181452 100644
--- a/etl/ownership/Ownership.py
+++ b/etl/ownership/Ownership.py
@@ -488,11 +488,11 @@ class Ownership:
                 house_no = house_no.replace(",", "")
 
             if house_no is None:
-                # It's hard for us to get a reliable match
-                # filtered = filtered[filtered["Property Address"].str.contains(address["ADDRESS1"])]
-                # if filtered.shape[0] > 1:
-                #     raise Exception("No valid - maybe we should do levenstein?")
-                continue
+                # If the house number is missing, it means that we usually have a named property so we look for an
+                # exact match on that name
+                filtered = filtered[filtered["Property Address"].str.lower().str.contains(address["ADDRESS"].lower())]
+                if filtered.shape[0] != 1:
+                    continue
 
             else:
 

From f53ce8b4302482ce54785e9da807c6b6ad9296b3 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 10 Oct 2024 18:51:12 +0100
Subject: [PATCH 3/5] allow postcode filtering

---
 etl/customers/gla/proposal_investigation.py | 25 ++++++++++++---------
 etl/ownership/Ownership.py                  |  8 ++++++-
 2 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/etl/customers/gla/proposal_investigation.py b/etl/customers/gla/proposal_investigation.py
index 57df0554..776bbc59 100644
--- a/etl/customers/gla/proposal_investigation.py
+++ b/etl/customers/gla/proposal_investigation.py
@@ -1,9 +1,12 @@
 """
 This script performs some basic analysis to identify EPC data for postcodes specified in the Warmer Homes Local Grant
 """
-from nis import match
 
+import inspect
+import requests
+import json
 import pandas as pd
+from pathlib import Path
 from etl.ownership.Ownership import Ownership
 
 postcodes = pd.read_excel(
@@ -19,7 +22,7 @@ postcodes.columns = ['postcode', 'Local Authority']
 postcodes = postcodes.drop([0, 1])
 # Since there are a large number of potcodes (425k), let's just take a few examples
 # Take postcodes that begin with "BN15"
-postcodes = postcodes[postcodes["postcode"].str.startswith("BN15")]
+# postcodes = postcodes[postcodes["postcode"].str.startswith("BN15")]
 
 # The Local Authority is Adur, so let's get the EPC data for this area
 # epc_data = pd.read_csv(
@@ -39,11 +42,14 @@ postcodes = postcodes[postcodes["postcode"].str.startswith("BN15")]
 # ratings_distribution["Percentage"] = ratings_distribution["Count"] / ratings_distribution["Count"].sum() * 100
 
 # Can we identify the owners of these units so we can contact them?
+
+file_src = inspect.getfile(lambda x: None)
+DATA_DIRECTORY = Path(file_src).parent / "local_data" / "all-domestic-certificates"
+epc_paths = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
+epc_paths = [str(entry / "certificates.csv") for entry in epc_paths]
+
 ownership = Ownership(
-    epc_paths=[
-        "/Users/khalimconn-kowlessar/Documents/hestia/Model/local_data/all-domestic-certificates/domestic-E07000223"
-        "-Adur/certificates.csv"
-    ],
+    epc_paths=epc_paths,
     domestic_ownership_path="/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_07.csv",
     overseas_ownership_path="/Users/khalimconn-kowlessar/Downloads/OCOD_FULL_2024_07.csv",
     land_registry_path="/Users/khalimconn-kowlessar/Downloads/pp-complete.csv",
@@ -53,11 +59,11 @@ ownership = Ownership(
     portfolio_value=0,
     excluded_owners=[],
     excluded_uprns=[],
-    save=False
+    save=True
 )
 
 # Data will be found at ownership/gla-proposal
-ownership.source_epc_properties(column_filters={})
+ownership.source_epc_properties(column_filters={}, postcodes=postcodes["postcode"].str.lower().tolist())
 
 # Step 2: Get company ownership data
 ownership.load_company_ownership()
@@ -83,9 +89,6 @@ owners_count["Percentage"] = owners_count["Count"] / owners_count["Count"].sum()
 
 companies_house_api_key = "1d9c2877-3271-4642-80ed-a6170971653f"
 
-import requests
-import json
-
 company_number = "13197205"
 url = f'https://api.company-information.service.gov.uk/company/{company_number}'
 
diff --git a/etl/ownership/Ownership.py b/etl/ownership/Ownership.py
index 52181452..2c04ac8a 100644
--- a/etl/ownership/Ownership.py
+++ b/etl/ownership/Ownership.py
@@ -175,7 +175,7 @@ class Ownership:
         # Prepare the final outputs:
         self.create_final_matches()
 
-    def source_epc_properties(self, column_filters=None):
+    def source_epc_properties(self, column_filters=None, postcodes=None):
         """
         This function will filter the epc data as specified by column filters, searching across all of the EPC tables
         :param column_filters: Dictionary with column names as keys and list of acceptable values as values. This
@@ -183,6 +183,7 @@ class Ownership:
                                 {"column_name": ["value1", "value2", ...]}, where column_name is the name of the column
                                 in the EPC data and ["value1", "value2", ...] is a list of acceptable values for that
                                 column. If a column is not found in the EPC data, an exception is raised.
+        :param postcodes: A list of postcodes to filter the data on
         """
 
         column_filters = {} if column_filters is None else column_filters
@@ -206,6 +207,11 @@ class Ownership:
                 else:
                     raise Exception(f"Column {column} not found in data. column_filters is malformed")
 
+            if postcodes is not None:
+                epc_data = epc_data[epc_data["POSTCODE"].str.lower().isin(postcodes)]
+            if epc_data.empty:
+                continue
+
             data.append(epc_data)
 
         self.epc_data = pd.concat(data, ignore_index=True)

From 722a3dba55271454e8482c42494baa66572dec29 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 11 Oct 2024 10:16:48 +0100
Subject: [PATCH 4/5] working on gla proposal

---
 etl/customers/gla/proposal_investigation.py | 50 +++++++++++++++---
 etl/ownership/Ownership.py                  | 56 +++++++++++----------
 2 files changed, 72 insertions(+), 34 deletions(-)

diff --git a/etl/customers/gla/proposal_investigation.py b/etl/customers/gla/proposal_investigation.py
index 776bbc59..05df6be7 100644
--- a/etl/customers/gla/proposal_investigation.py
+++ b/etl/customers/gla/proposal_investigation.py
@@ -10,16 +10,18 @@ from pathlib import Path
 from etl.ownership.Ownership import Ownership
 
 postcodes = pd.read_excel(
-    "/Users/khalimconn-kowlessar/Downloads/WHLG-eligible-postcodes.xlsx", sheet_name='Eligible postcodes'
+    "/Users/khalimconn-kowlessar/Downloads/WHLG-eligible-postcodes_RP edit.xlsx", sheet_name='Eligible postcodes'
 )
-# Take just the first two columns
+# Take just the first three columns
 postcodes = postcodes[
-    ['List of eligible postcodes via the IMD Income Decile 1-2 pathway', 'Unnamed: 1']
+    ['List of eligible postcodes via the IMD Income Decile 1-2 pathway', 'Unnamed: 1', 'Unnamed: 2']
 ]
 
-postcodes.columns = ['postcode', 'Local Authority']
+postcodes.columns = ['postcode', 'Local Authority', 'London Borough?']
 # Drop the first row
 postcodes = postcodes.drop([0, 1])
+# Take just the London Boroughs
+postcodes = postcodes[postcodes["London Borough?"] == "Yes"]
 # Since there are a large number of potcodes (425k), let's just take a few examples
 # Take postcodes that begin with "BN15"
 # postcodes = postcodes[postcodes["postcode"].str.startswith("BN15")]
@@ -74,22 +76,46 @@ ownership.prepare_for_matching()
 # Step 4: Match EPC data to ownership data
 ownership.match()
 
+from utils.s3 import save_excel_to_s3
+
+# Save the data to S3
+save_excel_to_s3(
+    df=ownership.matched_addresses,
+    bucket_name=ownership.bucket,
+    file_key=ownership.matched_addresses_pre_filter_filepath
+)
+
 # We have the matches, which we now need to match to the postcodes
 matches = ownership.matched_addresses.copy()
 # filter matches on the postcodes we're interested in
 matches = matches[matches["epc_postcode"].str.lower().isin(postcodes["postcode"].str.lower())]
 # Remove any social transactions
-matches = matches[~matches["TENURE"].isin(["Rented (social)", "rental (social)"])]
+matches = matches[~matches["TENURE"].isin(
+    ["Rented (social)", "rental (social)",
+     "Not defined - use in the case of a new dwelling for which the intended tenure in not known. It is not to be "
+     "used for an existing dwelling", "NO DATA!"])
+]
+# Look at the EPC ratings
+epc_ratings = matches.groupby(["CURRENT_ENERGY_RATING"]).size().reset_index()
+epc_ratings.columns = ["EPC Rating", "Count"]
+epc_ratings["Percentage"] = epc_ratings["Count"] / epc_ratings["Count"].sum() * 100
+
+# Take properties that are below an EPC C rating, as defined by the guidance and remove any new builds
+matches = matches[matches["CURRENT_ENERGY_RATING"].isin(["D", "E", "F", "G"])]
+# 11,694 properties
 
-matches.head()
 owners_count = matches.groupby(['Proprietor Name (1)', 'Company Registration No. (1)']).size().reset_index()
 owners_count.columns = ['Owner', 'Owner Registration #', 'Count']
 owners_count = owners_count.sort_values('Count', ascending=False)
 owners_count["Percentage"] = owners_count["Count"] / owners_count["Count"].sum() * 100
 
+# Take an example postal region
+matches = matches.sort_values("epc_postcode", ascending=True)
+example = matches[matches["epc_postcode"].str.startswith("BR1 ")].copy()
+
 companies_house_api_key = "1d9c2877-3271-4642-80ed-a6170971653f"
 
-company_number = "13197205"
+company_number = example.head(1)["Company Registration No. (1)"].values[0]
 url = f'https://api.company-information.service.gov.uk/company/{company_number}'
 
 # Make the API request
@@ -102,7 +128,17 @@ if response.status_code == 200:
     print(json.dumps(company_data, indent=4))
 else:
     print(f"Failed to fetch data. Status code: {response.status_code}")
+    # Try appending a zero the beginning of the company number
+    company_number = f"0{company_number}"
+    url = f'https://api.company-information.service.gov.uk/company/{company_number}'
+    response = requests.get(url, auth=(companies_house_api_key, ''))
+    company_data = response.json()
+
+from pprint import pprint
+
+pprint(company_data)
 
 psc_url = f'https://api.company-information.service.gov.uk/company/{company_number}/persons-with-significant-control'
 psc_response = requests.get(psc_url, auth=(companies_house_api_key, ''))
 psc_data = psc_response.json()
+pprint(psc_data)
diff --git a/etl/ownership/Ownership.py b/etl/ownership/Ownership.py
index 2c04ac8a..68dee9ed 100644
--- a/etl/ownership/Ownership.py
+++ b/etl/ownership/Ownership.py
@@ -161,16 +161,17 @@ class Ownership:
         # Step 5: Match land registry data to existing matches
         self.match_with_land_registry()
         # We store this data in s3 before we perform any filtering
-        save_excel_to_s3(
-            df=self.matched_addresses,
-            bucket_name=self.bucket,
-            file_key=self.matched_addresses_pre_filter_filepath
-        )
-        save_excel_to_s3(
-            df=self.combined_matching_lookup,
-            bucket_name=self.bucket,
-            file_key=self.combined_matching_lookup_pre_filter_filepath
-        )
+        if self.save:
+            save_excel_to_s3(
+                df=self.matched_addresses,
+                bucket_name=self.bucket,
+                file_key=self.matched_addresses_pre_filter_filepath
+            )
+            save_excel_to_s3(
+                df=self.combined_matching_lookup,
+                bucket_name=self.bucket,
+                file_key=self.combined_matching_lookup_pre_filter_filepath
+            )
 
         # Prepare the final outputs:
         self.create_final_matches()
@@ -1013,25 +1014,26 @@ class Ownership:
         if self.portfolio_properties["UPRN"].nunique() != self.portfolio_epc_data["UPRN"].nunique():
             raise ValueError("Portfolio properties and epc data don't match")
 
-        logger.info("Storing final outpus")
-        # Store data
-        save_excel_to_s3(
-            df=self.portfolio_owners,
-            bucket_name=self.bucket,
-            file_key=self.portfolio_owners_filepath,
-        )
+        if self.save:
+            logger.info("Storing final outpus")
+            # Store data
+            save_excel_to_s3(
+                df=self.portfolio_owners,
+                bucket_name=self.bucket,
+                file_key=self.portfolio_owners_filepath,
+            )
 
-        save_excel_to_s3(
-            df=self.portfolio_properties,
-            bucket_name=self.bucket,
-            file_key=self.portfolio_properties_filepath,
-        )
+            save_excel_to_s3(
+                df=self.portfolio_properties,
+                bucket_name=self.bucket,
+                file_key=self.portfolio_properties_filepath,
+            )
 
-        save_excel_to_s3(
-            df=self.portfolio_epc_data,
-            bucket_name=self.bucket,
-            file_key=self.portfolio_epc_data_filepath,
-        )
+            save_excel_to_s3(
+                df=self.portfolio_epc_data,
+                bucket_name=self.bucket,
+                file_key=self.portfolio_epc_data_filepath,
+            )
 
     def get_asset_list(self):
         """

From dadbb0ef61d4cb402029158ffb0acad3cec2ad22 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 16 Oct 2024 10:19:35 +0100
Subject: [PATCH 5/5] Finished GLA proposal

---
 backend/Property.py                         |  2 +-
 backend/app/plan/router.py                  |  6 +++-
 backend/app/plan/schemas.py                 |  3 ++
 backend/ml_models/Valuation.py              |  2 ++
 etl/customers/gla/example_model_outputs.py  | 38 ++++++++++++++++++++
 etl/customers/gla/proposal_investigation.py | 39 ++++++++++++++++++---
 6 files changed, 83 insertions(+), 7 deletions(-)
 create mode 100644 etl/customers/gla/example_model_outputs.py

diff --git a/backend/Property.py b/backend/Property.py
index ab8930c5..79108dc1 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -1204,7 +1204,7 @@ class Property:
             return False
 
         suitable_house = self.data["property-type"] == "House" and self.data["built-form"] in [
-            "Detached", "Semi-Detached",
+            "Detached", "Semi-Detached", "End-Terrace",
         ]
 
         suitable_bungalow = self.data["property-type"] == "Bungalow" and self.data["built-form"] in [
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index f4924c71..3b91a461 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -543,7 +543,11 @@ async def trigger_plan(body: PlanTriggerRequest):
         representative_recommendations = {}
         for p in tqdm(input_properties):
             recommender = Recommendations(
-                property_instance=p, materials=materials, exclusions=body.exclusions, inclusions=body.inclusions
+                property_instance=p,
+                materials=materials,
+                exclusions=body.exclusions,
+                inclusions=body.inclusions,
+                default_u_values=body.default_u_values
             )
             property_recommendations, property_representative_recommendations = recommender.recommend()
 
diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py
index 0d58c7e9..4b43db80 100644
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@@ -89,6 +89,9 @@ class PlanTriggerRequest(BaseModel):
     # if False, allows optimisation to be switched off
     optimise: Optional[bool] = True
 
+    # If True, uses default u-values for models
+    default_u_values: Optional[bool] = True
+
     _allowed_goals = {"Increasing EPC"}
 
     _allowed_housing_types = {"Social", "Private"}
diff --git a/backend/ml_models/Valuation.py b/backend/ml_models/Valuation.py
index cbcebb9f..68432577 100644
--- a/backend/ml_models/Valuation.py
+++ b/backend/ml_models/Valuation.py
@@ -103,6 +103,8 @@ class PropertyValuation:
         # Vander Elliot Intrusive surveys
         12103116: 1_537_000,
         12103117: 1_404_000,
+        # GLA Proposal
+        100020606627: 409_000
     }
 
     # We base our valuation uplifts on a number of sources
diff --git a/etl/customers/gla/example_model_outputs.py b/etl/customers/gla/example_model_outputs.py
new file mode 100644
index 00000000..e239c43d
--- /dev/null
+++ b/etl/customers/gla/example_model_outputs.py
@@ -0,0 +1,38 @@
+import pandas as pd
+from utils.s3 import save_csv_to_s3
+
+asset_list = [
+    {
+        "address": "4, King Henrys Drive",
+        "postcode": "CR0 0PA"
+    },
+]
+portfolio_id = 110
+user_id = 8
+
+asset_list = pd.DataFrame(asset_list)
+
+filename = f"{user_id}/{portfolio_id}/asset_list.csv"
+save_csv_to_s3(
+    dataframe=asset_list,
+    bucket_name="retrofit-plan-inputs-dev",
+    file_name=filename
+)
+
+body1 = {
+    "portfolio_id": str(portfolio_id),
+    "housing_type": "Private",
+    "goal": "Increasing EPC",
+    "goal_value": "A",
+    "trigger_file_path": filename,
+    "already_installed_file_path": "",
+    "patches_file_path": "",
+    "non_invasive_recommendations_file_path": "",
+    "inclusions": [
+        "cavity_wall_insulation", "loft_insulation", "air_source_heat_pump", "solar_pv"
+    ],
+    "budget": None,
+    "scenario_name": "Whole House",
+    "multi_plan": False,
+}
+print(body1)
diff --git a/etl/customers/gla/proposal_investigation.py b/etl/customers/gla/proposal_investigation.py
index 05df6be7..f6a87af1 100644
--- a/etl/customers/gla/proposal_investigation.py
+++ b/etl/customers/gla/proposal_investigation.py
@@ -76,13 +76,20 @@ ownership.prepare_for_matching()
 # Step 4: Match EPC data to ownership data
 ownership.match()
 
-from utils.s3 import save_excel_to_s3
+from utils.s3 import save_excel_to_s3, read_excel_from_s3
 
 # Save the data to S3
-save_excel_to_s3(
-    df=ownership.matched_addresses,
+# save_excel_to_s3(
+#     df=ownership.matched_addresses,
+#     bucket_name=ownership.bucket,
+#     file_key=ownership.matched_addresses_pre_filter_filepath
+# )
+
+# Read in matches
+matches = read_excel_from_s3(
     bucket_name=ownership.bucket,
-    file_key=ownership.matched_addresses_pre_filter_filepath
+    file_key="ownership/gla-proposal/2024-10-10 19:02:34.131365/matched_addresses_pre_filter.xlsx",
+    header_row=0
 )
 
 # We have the matches, which we now need to match to the postcodes
@@ -95,6 +102,7 @@ matches = matches[~matches["TENURE"].isin(
      "Not defined - use in the case of a new dwelling for which the intended tenure in not known. It is not to be "
      "used for an existing dwelling", "NO DATA!"])
 ]
+matches["is_prs"] = matches["TENURE"].isin(["rental (private)", "Rented (private)"])
 # Look at the EPC ratings
 epc_ratings = matches.groupby(["CURRENT_ENERGY_RATING"]).size().reset_index()
 epc_ratings.columns = ["EPC Rating", "Count"]
@@ -103,6 +111,8 @@ epc_ratings["Percentage"] = epc_ratings["Count"] / epc_ratings["Count"].sum() *
 # Take properties that are below an EPC C rating, as defined by the guidance and remove any new builds
 matches = matches[matches["CURRENT_ENERGY_RATING"].isin(["D", "E", "F", "G"])]
 # 11,694 properties
+matches["epc_postcode"].nunique()
+# 6899
 
 owners_count = matches.groupby(['Proprietor Name (1)', 'Company Registration No. (1)']).size().reset_index()
 owners_count.columns = ['Owner', 'Owner Registration #', 'Count']
@@ -111,7 +121,26 @@ owners_count["Percentage"] = owners_count["Count"] / owners_count["Count"].sum()
 
 # Take an example postal region
 matches = matches.sort_values("epc_postcode", ascending=True)
-example = matches[matches["epc_postcode"].str.startswith("BR1 ")].copy()
+# BR1, BR5
+example = matches[matches["epc_postcode"].str.startswith("CR0 ")].copy()
+example = example[example["TENURE"].isin(["rental (private)", "Rented (private)"])]
+
+pd.set_option('display.max_rows', 500)
+pd.set_option('display.max_columns', 500)
+pd.set_option('display.width', 1000)
+example[
+    ["epc_address", "epc_postcode", "CURRENT_ENERGY_RATING", "CURRENT_ENERGY_EFFICIENCY", "Proprietor Name (1)",
+     "Company Registration No. (1)"]
+].head(4)
+
+ownership.epc_data["UPRN"] = ownership.epc_data["UPRN"].astype(int)
+example = example.merge(
+    ownership.epc_data[["UPRN", "BUILT_FORM", "PROPERTY_TYPE", "WALLS_DESCRIPTION", "ROOF_DESCRIPTION"]],
+    on="UPRN",
+    how="left"
+)
+z = example[example["CURRENT_ENERGY_RATING"] == "E"]
+z = z[z["TENURE"].isin(["rental (private)", "Rented (private)"])]
 
 companies_house_api_key = "1d9c2877-3271-4642-80ed-a6170971653f"