From 325748524817708da77c30c9f1155470d7020e1f Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 24 Jul 2024 18:46:39 +0100
Subject: [PATCH 001/182] xml extraction wip

---
 etl/xml_survey_extraction/XmlParser.py |  540 ++++++++++++
 etl/xml_survey_extraction/app.py       |   43 +-
 etl/xml_survey_extraction/pcdb.py      | 1129 ++++++++++++++++++++++++
 utils/s3.py                            |   83 ++
 4 files changed, 1794 insertions(+), 1 deletion(-)
 create mode 100644 etl/xml_survey_extraction/XmlParser.py
 create mode 100644 etl/xml_survey_extraction/pcdb.py

diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py
new file mode 100644
index 00000000..de7e35f8
--- /dev/null
+++ b/etl/xml_survey_extraction/XmlParser.py
@@ -0,0 +1,540 @@
+import re
+import usaddress
+from xml.dom.minidom import parseString
+from backend.app.utils import sap_to_epc
+from etl.xml_survey_extraction.pcdb import heating_data
+
+PROPERTY_TYPE_LOOKUP = {
+    "0": "House",
+    "House": "House",
+}
+
+
+def get_house_number(address: str) -> str | None:
+    """
+    This method will use the usaddress library to parse an address and extract the house number
+    :return:
+    """
+
+    parsed = usaddress.parse(address)
+    parsed_house_number = [x for x in parsed if (x[1] == "AddressNumber")]
+    parsed_house_number = parsed_house_number[0][0] if parsed_house_number else None
+
+    if parsed_house_number is None:
+        # Because usaddress isn't optimal for parsing addresses with some prefixes such as 'Flat',
+        # we also add a custom approach
+
+        # Pattern to look for 'Flat' or 'Apartment' followed by a number, or just a number at the beginning
+        pattern = r'(?i)(?:flat|apartment)\s*(\d+)|^\s*(\d+)'
+
+        match = re.search(pattern, address)
+
+        if match:
+            # Return the first non-None group found
+            return next(g for g in match.groups() if g is not None)
+        else:
+            return None
+
+    # Remove training commas
+    parsed_house_number = parsed_house_number.replace(",", "")
+
+    return parsed_house_number
+
+
+class XmlParser:
+    uprn = None
+    property_type = None
+    current_energy_efficiency = None
+    current_energy_rating = None
+
+    # heating/emissions information
+    space_heating_kwh = None
+    water_heating_kwh = None
+    co2_emissions_current = None
+    heating_cost_current = None
+    hot_water_cost_current = None
+    lighting_cost_current = None
+    energy_consumption_current = None
+    heating_system = None
+    heating_controls = None
+
+    # Assessor details
+    surveyor_name = None
+
+    # Addresses
+    address1 = None
+    address2 = None
+    address3 = None
+    posttown = None
+    postcode = None
+    address = None
+
+    # Dates
+    survey_date = None
+
+    # Building Fabric
+    # Walls
+    walls_description = None
+    walls_classification = None
+    walls_energy_rating = None
+    # Roof
+    roof_description = None
+    roof_energy_rating = None
+    is_loft = None
+    # Floor
+    floor_description = None
+    floor_energy_rating = None
+    # Windows
+    windows_description = None
+    windows_energy_rating = None
+    # main heating
+    main_heating_description = None
+    main_heating_energy_rating = None
+    # Heating controls
+    main_heating_controls_description = None
+    main_heating_controls_energy_rating = None
+    # Hot water
+    hot_water_description = None
+    hot_water_energy_rating = None
+    # Lighting
+    lighting_description = None
+    lighting_energy_rating = None
+    # Second Heating
+    second_heating_description = None
+    second_heating_energy_rating = None
+
+    number_of_doors = None
+    number_of_insulated_doors = None
+    photo_supply = None
+
+    # Property dimensions
+    number_of_floors = None
+    perimeter = None
+    heat_loss_perimeter = None
+    party_wall_length = None
+    total_floor_area = None
+    ground_floor_area = None
+    is_there_party_wall = None
+    floor_height = None
+    insulation_wall_area = None
+
+    rrn = None
+
+    database_data = None
+
+    # We assume that the insulation wall area is 85% of the total wall area, as a standard estimate
+    INSULATION_WALL_AREA_FACTOR = 0.85
+
+    # The value of the URPN tells us about the file type that we're parsing
+    UPRN_FILETYPE_MAP = {
+        0: "EPR",
+        -1: "RDSAP_EPR"
+    }
+
+    RATINGS_MAP = {
+        "0": "N/A",
+        "1": "Very Poor",
+        "2": "Poor",
+        "3": "Average",
+        "4": "Good",
+        "5": "Very Good"
+    }
+
+    def __init__(self, file, filekey, uprn=None):
+        file.seek(0)  # Ensure the file pointer is at the beginning
+        xml_string = file.read().decode('utf-8')
+        self.xml = parseString(xml_string)
+        self.filekey = filekey
+
+        # The xml parser is use to parse the EPC and EPR xmls and different file types will contain different
+        # information
+        # In order to identify the file type, we can look for the presence of the 'UPRN' tag
+        # If the UPRN tag is present, we can assume that the file is an EPC
+        # If the UPRN tag is not present, we can assume that the file is an EPR
+        self.get_uprn()
+
+        self.file_type = self.UPRN_FILETYPE_MAP.get(self.uprn, "EPC")
+
+    @staticmethod
+    def get_node(node):
+        """
+        Utility function to get the node value from the xml, where data might be optional
+        :return:
+        """
+
+        node_first_child = node.firstChild
+        if node_first_child is None:
+            return None
+
+        return node_first_child.nodeValue
+
+    def run(self):
+        if self.file_type == "RDSAP_EPR":
+            # This file type contains just limited information compared to a regular EPR/EPC, and so we just exit
+            # unless we learn something else that determines that we need information from this file
+            return
+        self.get_property_type()
+        self.get_sap()
+        self.get_property_address()
+        self.get_dates()
+        self.get_assessor_details()
+
+        self.get_heating_and_emissions_data()
+        self.get_detailed_heating_specs()
+
+        # Building fabric
+        self.get_walls()
+        self.get_roof()
+        self.get_floor()
+        self.get_windows()
+        self.get_heating()
+        self.get_hot_water()
+        self.get_lighting()
+        self.get_doors()
+        self.get_photo_supply()
+
+        # Property dimensions
+        self.get_property_dimensions()
+
+    def get_uprn(self, uprn):
+
+        if uprn is not None:
+            self.uprn = uprn
+            return
+
+        uprn_tag = self.xml.getElementsByTagName('UPRN')[0].firstChild
+        if uprn_tag is None:
+            self.uprn = -1
+            return
+
+        self.uprn = uprn_tag.nodeValue
+        # If all of the characters in the UPRN are 0, then there is not set UPRN
+        if self.uprn.count("0") == len(self.uprn):
+            self.uprn = 0
+        else:
+            self.uprn = self.uprn.lower().split("uprn-")[1]
+
+    def get_property_type(self):
+        if not self.xml:
+            raise ValueError("You need to read the file first")
+
+        property_type = self.xml.getElementsByTagName('Property-Type')
+        if not property_type:
+            property_type = self.xml.getElementsByTagName('PropertyType1')
+
+        self.property_type = PROPERTY_TYPE_LOOKUP[property_type[0].firstChild.nodeValue]
+
+    def get_sap(self):
+        sap_score = self.xml.getElementsByTagName('Energy-Rating-Current')
+        sap_score = int(sap_score[0].firstChild.nodeValue)
+        epc_rating = sap_to_epc(sap_score)
+        self.current_energy_efficiency = str(sap_score)
+        self.current_energy_rating = epc_rating
+
+    def get_heating_and_emissions_data(self):
+        """
+        This method will extract the following pieces of information:
+        1) Space heating requirement
+        2) Water heating requirement
+        3) CO2 emissions
+        4) Heat demand per square meter per year
+        5) Bills
+
+        :return:
+        """
+
+        self.space_heating_kwh = self.xml.getElementsByTagName(
+            'Space-Heating-Existing-Dwelling'
+        )[0].firstChild.nodeValue
+
+        self.water_heating_kwh = self.xml.getElementsByTagName('Water-Heating')[0].firstChild.nodeValue
+
+        self.co2_emissions_current = self.xml.getElementsByTagName('CO2-Emissions-Current')[0].firstChild.nodeValue
+        self.heating_cost_current = self.xml.getElementsByTagName('Heating-Cost-Current')[0].firstChild.nodeValue
+        self.hot_water_cost_current = self.xml.getElementsByTagName('Hot-Water-Cost-Current')[0].firstChild.nodeValue
+        self.lighting_cost_current = self.xml.getElementsByTagName('Lighting-Cost-Current')[0].firstChild.nodeValue
+        self.energy_consumption_current = (
+            self.xml.getElementsByTagName("Energy-Consumption-Current")[0].firstChild.nodeValue
+        )
+
+    def get_detailed_heating_specs(self):
+        """
+        Given the heating data that is found in the <SAP-Heating> tag, we extract the detailed about the heating
+        system
+        :return:
+        """
+        sap_main_heating_details = (
+            self.xml.getElementsByTagName('SAP-Heating')[0]
+            .getElementsByTagName("Main-Heating-Details")[0]
+            .getElementsByTagName("Main-Heating")[0]
+        )
+
+        heating_code = sap_main_heating_details.getElementsByTagName("SAP-Main-Heating-Code")[0].firstChild.nodeValue
+
+        # Get the heating system
+        heating_system = heating_data[heating_data["code"] == int(heating_code)]["description"]
+        heating_system = heating_system.values[0] if not heating_system.empty else f"Heating code: {heating_code}"
+
+        # Get the heating controls
+        heating_controls_code = (
+            sap_main_heating_details.getElementsByTagName("Main-Heating-Control")[0].firstChild.nodeValue
+        )
+
+        heating_controls = heating_data[heating_data["code"] == int(heating_controls_code)]["description"]
+        heating_controls = (
+            heating_controls.values[0] if not heating_controls.empty else f"Heating Controls code: {heating_code}"
+        )
+
+        self.heating_system = heating_system
+        self.heating_controls = heating_controls
+
+    def get_walls(self):
+
+        wall_xml_data = self.xml.getElementsByTagName('Property-Summary')[0].getElementsByTagName('Wall')[0]
+
+        self.walls_description = (
+            wall_xml_data
+            .getElementsByTagName("Description")[0]
+            .firstChild.nodeValue
+        )
+
+        self.walls_energy_rating = (
+            wall_xml_data
+            .getElementsByTagName("Energy-Efficiency-Rating")[0]
+            .firstChild.nodeValue
+        )
+
+        is_cavity = "cavity wall" in self.walls_description.lower()
+        is_empty = "no insulation" in self.walls_description.lower()
+        is_partial = "partial insulation" in self.walls_description.lower()
+
+        if not is_cavity:
+            self.walls_classification = "NON CAVITY"
+            return
+
+        if is_empty:
+            self.walls_classification = "EMPTY"
+            return
+
+        if is_partial:
+            self.walls_classification = "PARTIAL"
+            return
+
+        if is_cavity and not is_empty and not is_partial:
+            self.walls_classification = "FULL"
+            return
+
+        raise NotImplementedError("Implement me")
+
+    def get_roof(self):
+
+        room_xml_data = self.xml.getElementsByTagName('Property-Summary')[0].getElementsByTagName('Roof')[0]
+
+        self.roof_description = (
+            room_xml_data
+            .getElementsByTagName("Description")[0]
+            .firstChild.nodeValue
+        )
+
+        self.roof_energy_rating = (
+            room_xml_data
+            .getElementsByTagName("Energy-Efficiency-Rating")[0]
+            .firstChild.nodeValue
+        )
+
+        loft_recommendation_tag = self.xml.getElementsByTagName("Impact-Of-Loft-Insulation")
+        description_contains_loft = "loft" in self.roof_description.lower()
+
+        if not loft_recommendation_tag and not description_contains_loft:
+            self.is_loft = "No"
+            return
+
+        self.is_loft = "Yes"
+        return
+
+    def get_floor(self):
+
+        floor_xml_data = self.xml.getElementsByTagName('Property-Summary')[0].getElementsByTagName('Floor')[0]
+
+        self.floor_description = (
+            floor_xml_data
+            .getElementsByTagName("Description")[0]
+            .firstChild.nodeValue
+        )
+
+        self.floor_energy_rating = (
+            floor_xml_data
+            .getElementsByTagName("Energy-Efficiency-Rating")[0]
+            .firstChild.nodeValue
+        )
+
+    def get_windows(self):
+
+        windows_xml_data = self.xml.getElementsByTagName('Property-Summary')[0].getElementsByTagName('Window')[0]
+
+        self.windows_description = (
+            windows_xml_data
+            .getElementsByTagName("Description")[0]
+            .firstChild.nodeValue
+        )
+
+        self.windows_energy_rating = (
+            windows_xml_data
+            .getElementsByTagName("Energy-Efficiency-Rating")[0]
+            .firstChild.nodeValue
+        )
+
+    def get_heating(self):
+        """
+        This function will retrieve the main heating and the main heating controls
+        :return:
+        """
+        mainheating_xml_data = self.xml.getElementsByTagName('Main-Heating')[0]
+
+        self.main_heating_description = (
+            mainheating_xml_data.getElementsByTagName('Description')[0].firstChild.nodeValue
+        )
+
+        self.main_heating_energy_rating = (
+            mainheating_xml_data.getElementsByTagName('Energy-Efficiency-Rating')[0].firstChild.nodeValue
+        )
+
+        mainheating_controls_xml_data = self.xml.getElementsByTagName('Main-Heating-Controls')[0]
+
+        self.main_heating_controls_description = (
+            mainheating_controls_xml_data.getElementsByTagName('Description')[0].firstChild.nodeValue
+        )
+
+        self.main_heating_controls_energy_rating = (
+            mainheating_controls_xml_data.getElementsByTagName('Energy-Efficiency-Rating')[0].firstChild.nodeValue
+        )
+
+        second_heating_xml_data = self.xml.getElementsByTagName('Secondary-Heating')[0]
+
+        self.second_heating_description = (
+            second_heating_xml_data.getElementsByTagName('Description')[0].firstChild.nodeValue
+        )
+
+        self.second_heating_energy_rating = (
+            second_heating_xml_data.getElementsByTagName('Energy-Efficiency-Rating')[0].firstChild.nodeValue
+        )
+
+    def get_hot_water(self):
+        hot_water_xml_data = self.xml.getElementsByTagName('Hot-Water')[0]
+
+        self.hot_water_description = (
+            hot_water_xml_data.getElementsByTagName('Description')[0].firstChild.nodeValue
+        )
+
+        self.hot_water_energy_rating = (
+            hot_water_xml_data.getElementsByTagName('Energy-Efficiency-Rating')[0].firstChild.nodeValue
+        )
+
+    def get_lighting(self):
+        lighting_xml_data = self.xml.getElementsByTagName('Lighting')[0]
+
+        self.lighting_description = (
+            lighting_xml_data.getElementsByTagName('Description')[0].firstChild.nodeValue
+        )
+
+        self.lighting_energy_rating = (
+            lighting_xml_data.getElementsByTagName('Energy-Efficiency-Rating')[0].firstChild.nodeValue
+        )
+
+    def get_doors(self):
+
+        # Doors can be found in the SAP-Property-Details tag
+        self.number_of_doors = int(
+            self.xml.getElementsByTagName('SAP-Property-Details')[0]
+            .getElementsByTagName('Door-Count')[0]
+            .firstChild.nodeValue
+        )
+
+        self.number_of_insulated_doors = int(
+            self.xml.getElementsByTagName('SAP-Property-Details')[0]
+            .getElementsByTagName('Insulated-Door-Count')[0]
+            .firstChild.nodeValue
+        )
+
+    def get_photo_supply(self):
+        self.photo_supply = float(
+            self.xml.getElementsByTagName('Photovoltaic-Supply')[0]
+            .getElementsByTagName('Percent-Roof-Area')[0]
+            .firstChild.nodeValue
+        )
+
+    def get_assessor_details(self):
+
+        energy_assessor_tag = self.xml.getElementsByTagName('Energy-Assessor')[0]
+
+        self.surveyor_name = (
+            energy_assessor_tag.getElementsByTagName("Name")[0].firstChild.nodeValue
+        )
+
+    def get_property_address(self):
+
+        property_tag = self.xml.getElementsByTagName("Property")[0]
+
+        self.address1 = self.get_node(property_tag.getElementsByTagName("Address-Line-1")[0])
+        self.address2 = self.get_node(property_tag.getElementsByTagName("Address-Line-2")[0])
+        self.address3 = self.get_node(property_tag.getElementsByTagName("Address-Line-3")[0])
+        self.posttown = self.get_node(property_tag.getElementsByTagName("Post-Town")[0])
+        self.postcode = self.get_node(property_tag.getElementsByTagName("Postcode")[0])
+        self.address = ", ".join(
+            [x for x in [self.address1, self.address2, self.address3, self.posttown, self.postcode] if x is not None]
+        )
+
+    def get_dates(self):
+        self.survey_date = (
+            self.xml.getElementsByTagName("Inspection-Date")[0].firstChild.nodeValue
+        )
+
+    def get_property_dimensions(self):
+        """
+        This function will extract the relevant property dimensions including the floor area,
+        number of floors, perimeter, party wall length and the insulation_wall_area.
+
+        insulation_wall_area is typically simplified down to perimeter * height * 0.85
+        :return:
+        """
+
+        # Each floor has its own SAP-Floor-Dimension tag
+        floor_dimensions = (
+            self.xml.getElementsByTagName("SAP-Floor-Dimensions")[0]
+            .getElementsByTagName("SAP-Floor-Dimension")
+        )
+
+        self.number_of_floors = len(floor_dimensions)
+
+        self.heat_loss_perimeter = float(
+            floor_dimensions[0].getElementsByTagName("Heat-Loss-Perimeter")[0].firstChild.nodeValue
+        )
+
+        self.party_wall_length = float(
+            floor_dimensions[0].getElementsByTagName("Party-Wall-Length")[0].firstChild.nodeValue
+        )
+
+        party_wall_construction_tag = (
+            self.xml.getElementsByTagName("Party-Wall-Construction")[0].firstChild.nodeValue.replace("\n", "").strip()
+        )
+
+        self.is_there_party_wall = (
+            "Yes" if (self.party_wall_length > 0) or (party_wall_construction_tag != "") else "No"
+        )
+
+        # We pull out all of the floor areas
+        floor_areas = [
+            float(x.getElementsByTagName("Total-Floor-Area")[0].firstChild.nodeValue) for x in floor_dimensions
+        ]
+
+        self.total_floor_area = sum(floor_areas)
+        self.ground_floor_area = floor_areas[0]
+
+        self.floor_height = float(
+            floor_dimensions[0]
+            .getElementsByTagName("Room-Height")[0]
+            .firstChild.nodeValue
+        )
+
+        self.insulation_wall_area = self.heat_loss_perimeter * self.floor_height * self.INSULATION_WALL_AREA_FACTOR
+        self.perimeter = self.heat_loss_perimeter + self.party_wall_length
diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py
index 6f53e4e2..9bcbb168 100644
--- a/etl/xml_survey_extraction/app.py
+++ b/etl/xml_survey_extraction/app.py
@@ -1,3 +1,16 @@
+from utils.s3 import read_from_s3, list_files_and_subfolders_in_s3_folder, list_xmls_in_s3_folder
+from utils.logger import setup_logger
+from etl.xml_survey_extraction.XmlParser import XmlParser
+import os
+from io import BytesIO
+
+logger = setup_logger()
+
+SURVEYORS = "JAFFERSONS ENERGY CONSULTANTS"
+PROJECT_CODE = "VDE001"
+BUCKET = "retrofit-energy-assessments-dev"
+
+
 def main():
     """
     This function executes the main process, which will retrieve data from the specified locations, extract the data
@@ -6,4 +19,32 @@ def main():
     """
 
     # TODO: Build solution to get this data from Onedrive and store what we need in S3
-    #       In s3, we have a bucket called retrofit-energy-assessments-{stage} which
+    #       In s3, we have a bucket called retrofit-energy-assessments-{stage} which contains the data we need
+    #      The data is stored in a folder called {surveyors}/{project_code}/{uprn}
+    #       We'll need to get the uprn from the folder name, which we can do with EpcSearcher class
+
+    #
+    energy_assessments = list_files_and_subfolders_in_s3_folder(
+        bucket_name=BUCKET, folder_name=f"{SURVEYORS}/{PROJECT_CODE}/"
+    )
+
+    logger.info(f"Found {len(energy_assessments)} energy assessments for {SURVEYORS} and {PROJECT_CODE}")
+    assessments_map = {}
+    for assessment in energy_assessments:
+        uploaded_xmls = list_xmls_in_s3_folder(
+            bucket_name=BUCKET, folder_name=os.path.join(assessment, "docs & plans")
+        )
+        uprn = int(assessment.rstrip("/").split("/")[-1])
+        assessments_map[uprn] = uploaded_xmls
+
+    logger.info(f"Exatracted XMLS for the energy assessments")
+
+    # For each property, we download the xmls and extract the data
+    for uprn, xmls in assessments_map.items():
+        extracted_data = {}
+        for xml in xmls:
+            xml_data = read_from_s3(bucket_name=BUCKET, s3_file_name=xml)
+            xml_data_io = BytesIO(xml_data)
+            xml_parser = XmlParser(file=xml_data_io, filekey=xml, uprn=uprn)
+            xml_parser.run()
+            logger.info(f"Extracted data from {xml}")
diff --git a/etl/xml_survey_extraction/pcdb.py b/etl/xml_survey_extraction/pcdb.py
new file mode 100644
index 00000000..64d65708
--- /dev/null
+++ b/etl/xml_survey_extraction/pcdb.py
@@ -0,0 +1,1129 @@
+"""
+This script contains the systems data, contained in the BRE product characteristics database (PCDB).
+
+For SAP 10.2, this can be found in the following document:
+https://files.bregroup.com/SAP/SAP%2010.2%20-%2017-12-2021.pdf
+
+From page 157 onwards
+"""
+import pandas as pd
+
+no_heating_system = [
+    {
+        "category": "No heating system present",
+        "description": "Electric heaters (assumed)",
+        "efficiency": 100,
+        "heating_type": 1,
+        "responsiveness": 1.0,
+        "code": 699
+    }
+]
+
+boiler_systems_with_radiators_or_underfloor_heating = [
+    # Solid fuel boilers
+    {
+        "category": "Boiler systems with radiators or underfloor heating",
+        "description": "Solid fuel boiler - Manual feed independent boiler",
+        "efficiency_A": 65,
+        "efficiency_B": 60,
+        "heating_type": 2,
+        "responsiveness": 0.75,
+        "code": 151
+    },
+    {
+        "category": "Boiler systems with radiators or underfloor heating",
+        "description": "Solid fuel boiler - Auto (gravity) feed independent boiler",
+        "efficiency_A": 70,
+        "efficiency_B": 65,
+        "heating_type": 2,
+        "responsiveness": 0.75,
+        "code": 153
+    },
+    {
+        "category": "Boiler systems with radiators or underfloor heating",
+        "description": "Solid fuel boiler - Wood chip/pellet independent boiler",
+        "efficiency_A": 75,
+        "efficiency_B": 70,
+        "heating_type": 2,
+        "responsiveness": 0.75,
+        "code": 155
+    },
+    {
+        "category": "Boiler systems with radiators or underfloor heating",
+        "description": "Solid fuel boiler - Open fire with back boiler to radiators",
+        "efficiency_A": 63,
+        "efficiency_B": 55,
+        "heating_type": 3,
+        "responsiveness": 0.50,
+        "code": 156
+    },
+    {
+        "category": "Boiler systems with radiators or underfloor heating",
+        "description": "Solid fuel boiler - Closed room heater with boiler to radiators",
+        "efficiency_A": 67,
+        "efficiency_B": 65,
+        "heating_type": 3,
+        "responsiveness": 0.50,
+        "code": 158
+    },
+    {
+        "category": "Boiler systems with radiators or underfloor heating",
+        "description": "Solid fuel boiler - Stove (pellet-fired) with boiler to radiators",
+        "efficiency_A": 75,
+        "efficiency_B": 70,
+        "heating_type": 2,
+        "responsiveness": 0.75,
+        "code": 159
+    },
+    {
+        "category": "Boiler systems with radiators or underfloor heating",
+        "description": "Solid fuel boiler - Range cooker boiler (integral oven and boiler)",
+        "efficiency_A": 50,
+        "efficiency_B": 45,
+        "heating_type": 3,
+        "responsiveness": 0.50,
+        "code": 160
+    },
+    {
+        "category": "Boiler systems with radiators or underfloor heating",
+        "description": "Solid fuel boiler - Range cooker boiler (independent oven and boiler)",
+        "efficiency_A": 60,
+        "efficiency_B": 55,
+        "heating_type": 3,
+        "responsiveness": 0.50,
+        "code": 161
+    },
+    # Electric boilers
+    {
+        "category": "Boiler systems with radiators or underfloor heating",
+        "description": "Electric boiler - Direct acting electric boiler",
+        "efficiency": 100,
+        "heating_type": "From Table 4d",
+        "responsiveness": None,
+        "code": 191
+    },
+    {
+        "category": "Boiler systems with radiators or underfloor heating",
+        "description": "Electric boiler - CPSU in heated space – radiators or underfloor",
+        "efficiency": 100,
+        "heating_type": 1,
+        "responsiveness": 1.0,
+        "code": 192
+    },
+    {
+        "category": "Boiler systems with radiators or underfloor heating",
+        "description": "Electric boiler - Dry core storage boiler in heated space",
+        "efficiency": 100,
+        "heating_type": 2,
+        "responsiveness": 0.75,
+        "code": 193
+    },
+    {
+        "category": "Boiler systems with radiators or underfloor heating",
+        "description": "Electric boiler - Dry core storage boiler in unheated space",
+        "efficiency": 85,
+        "heating_type": 2,
+        "responsiveness": 0.75,
+        "code": 194
+    },
+    {
+        "category": "Boiler systems with radiators or underfloor heating",
+        "description": "Electric boiler - Water storage boiler in heated space",
+        "efficiency": 100,
+        "heating_type": 2,
+        "responsiveness": 0.75,
+        "code": 195
+    },
+    {
+        "category": "Boiler systems with radiators or underfloor heating",
+        "description": "Electric boiler - Water storage boiler in unheated space",
+        "efficiency": 85,
+        "heating_type": 2,
+        "responsiveness": 0.75,
+        "code": 196
+    }
+]
+
+heat_pumps_with_radiators_or_underfloor_heating = [
+    # Electric heat pumps
+    {
+        "category": "Heat pumps with radiators or underfloor heating",
+        "description": "Electric heat pumps - Ground source heat pump with flow temperature <= 35°C",
+        "space": 230,
+        "water": 170,
+        "heating_type": "From Table 4d",  # Replace with specific value as needed
+        "responsiveness": None,  # Not provided, assuming 'None'
+        "code": 211
+    },
+    {
+        "category": "Heat pumps with radiators or underfloor heating",
+        "description": "Electric heat pumps - Water source heat pump with flow temperature <= 35°C",
+        "space": 230,
+        "water": 170,
+        "heating_type": "From Table 4d",  # Replace with specific value as needed
+        "responsiveness": None,  # Not provided, assuming 'None'
+        "code": 213
+    },
+    {
+        "category": "Heat pumps with radiators or underfloor heating",
+        "description": "Electric heat pumps - Air source heat pump with flow temperature <= 35°C",
+        "space": 170,
+        "water": 170,
+        "heating_type": "From Table 4d",  # Replace with specific value as needed
+        "responsiveness": None,  # Not provided, assuming 'None'
+        "code": 214
+    },
+    {
+        "category": "Heat pumps with radiators or underfloor heating",
+        "description": "Electric heat pumps - Ground source heat pump in other cases",
+        "space": 170,
+        "water": 170,
+        "heating_type": "From Table 4d",  # Replace with specific value as needed
+        "responsiveness": None,  # Not provided, assuming 'None'
+        "code": 221
+    },
+    {
+        "category": "Heat pumps with radiators or underfloor heating",
+        "description": "Electric heat pumps - Water source heat pump, in other cases",
+        "space": 170,
+        "water": 170,
+        "heating_type": "From Table 4d",  # Replace with specific value as needed
+        "responsiveness": None,  # Not provided, assuming 'None'
+        "code": 223
+    },
+    {
+        "category": "Heat pumps with radiators or underfloor heating",
+        "description": "Electric heat pumps - Air source heat pump in other cases",
+        "space": 170,
+        "water": 170,
+        "heating_type": "From Table 4d",  # Replace with specific value as needed
+        "responsiveness": None,  # Not provided, assuming 'None'
+        "code": 224
+    },
+    # Gast fired heat pumps
+    {
+        "category": "Heat pumps with radiators or underfloor heating",
+        "description": "Gas-fired heat pumps - Ground source heat pump with flow temperature <= 35°C",
+        "space": 120,
+        "water": 84,
+        "heating_type": "From Table 4d",  # Replace with specific value as needed
+        "responsiveness": None,  # Not provided, assuming 'None'
+        "code": 215
+    },
+    {
+        "category": "Heat pumps with radiators or underfloor heating",
+        "description": "Gas-fired heat pumps - Water source heat pump with flow temperature <= 35°C",
+        "space": 120,
+        "water": 84,
+        "heating_type": "From Table 4d",  # Replace with specific value as needed
+        "responsiveness": None,  # Not provided, assuming 'None'
+        "code": 216
+    },
+    {
+        "category": "Heat pumps with radiators or underfloor heating",
+        "description": "Gas-fired heat pumps - Air source heat pump with flow temperature <= 35°C",
+        "space": 110,
+        "water": 77,
+        "heating_type": "From Table 4d",  # Replace with specific value as needed
+        "responsiveness": None,  # Not provided, assuming 'None'
+        "code": 217
+    },
+    {
+        "category": "Heat pumps with radiators or underfloor heating",
+        "description": "Gas-fired heat pumps - Ground source heat pump in other cases",
+        "space": 84,
+        "water": 84,
+        "heating_type": "From Table 4d",  # Replace with specific value as needed
+        "responsiveness": None,  # Not provided, assuming 'None'
+        "code": 225
+    },
+    {
+        "category": "Heat pumps with radiators or underfloor heating",
+        "description": "Gas-fired heat pumps - Water source heat pump in other cases",
+        "space": 84,
+        "water": 84,
+        "heating_type": "From Table 4d",  # Replace with specific value as needed
+        "responsiveness": None,  # Not provided, assuming 'None'
+        "code": 226
+    },
+    {
+        "category": "Heat pumps with radiators or underfloor heating",
+        "description": "Gas-fired heat pumps - Air source heat pump in other cases",
+        "space": 77,
+        "water": 77,
+        "heating_type": "From Table 4d",  # Replace with specific value as needed
+        "responsiveness": None,  # Not provided, assuming 'None'
+        "code": 227
+    }
+]
+
+electric_heat_pumps_warm_air_distribution = [
+    {
+        "category": "Heat pumps with warm air distribution",
+        "description": "Electric heat pumps - Ground source heat pump",
+        "space": 230,
+        "water": 170,
+        "heating_type": 1,
+        "responsiveness": 1.0,
+        "code": 521
+    },
+    {
+        "category": "Heat pumps with warm air distribution",
+        "description": "Electric heat pumps - Water source heat pump",
+        "space": 230,
+        "water": 170,
+        "heating_type": 1,
+        "responsiveness": 1.0,
+        "code": 523
+    },
+    {
+        "category": "Heat pumps with warm air distribution",
+        "description": "Electric heat pumps - Air source heat pump",
+        "space": 170,
+        "water": 170,
+        "heating_type": 1,
+        "responsiveness": 1.0,
+        "code": 524
+    }
+]
+
+gas_fired_heat_pumps_warm_air_distribution = [
+    {
+        "category": "Heat pumps with warm air distribution",
+        "description": "Gas-fired heat pumps - Ground source heat pump",
+        "space": 120,
+        "water": 84,
+        "heating_type": 1,
+        "responsiveness": 1.0,
+        "code": 525
+    },
+    {
+        "category": "Heat pumps with warm air distribution",
+        "description": "Gas-fired heat pumps - Water source heat pump",
+        "space": 120,
+        "water": 84,
+        "heating_type": 1,
+        "responsiveness": 1.0,
+        "code": 526
+    },
+    {
+        "category": "Heat pumps with warm air distribution",
+        "description": "Gas-fired heat pumps - Air source heat pump",
+        "space": 110,
+        "water": 77,
+        "heating_type": 1,
+        "responsiveness": 1.0,
+        "code": 527
+    }
+]
+
+heat_networks = [
+    {
+        "category": "Heat networks",
+        "description": "Boilers (SAP)",
+        "efficiency": 80,
+        "heating_type": "From table 4d",  # Replace with specific value as needed
+        "code": 2
+    },
+    {
+        "category": "Heat networks",
+        "description": "CHP (SAP)",
+        "efficiency": 75,
+        "heating_type": "From table 4d",  # Replace with specific value as needed
+        "code": 1
+    },
+    {
+        "category": "Heat networks",
+        "description": "Waste heat from power station (SAP)",
+        "efficiency": 100,
+        "heating_type": "From table 4d",  # Replace with specific value as needed
+        "code": 4
+    },
+    {
+        "category": "Heat networks",
+        "description": "Heat pump (SAP)",
+        "efficiency": 300,
+        "heating_type": "From table 4d",  # Replace with specific value as needed
+        "code": 3
+    },
+    {
+        "category": "Heat networks",
+        "description": "Geothermal heat source (SAP)",
+        "efficiency": 100,
+        "heating_type": "From table 4d",  # Replace with specific value as needed
+        "code": 5
+    },
+    {
+        "category": "Heat networks",
+        "description": "Boilers only (RdSAP)",
+        "efficiency": 80,
+        "heating_type": "From table 4d",  # Replace with specific value as needed
+        "code": 301
+    },
+    {
+        "category": "Heat networks",
+        "description": "CHP and boilers (RdSAP)",
+        "efficiency": 75,
+        "heating_type": "From table 4d",  # Replace with specific value as needed
+        "code": 302
+    },
+    {
+        "category": "Heat networks",
+        "description": "Heat pump (RdSAP)",
+        "efficiency": 300,
+        "heating_type": "From table 4d",  # Replace with specific value as needed
+        "code": 304
+    }
+]
+
+electric_storage_systems = [
+    {
+        "category": "Electric Storage Systems",
+        "description": "Old (large volume) storage heaters",
+        "efficiency": 100,
+        "heating_type": 6,
+        "responsiveness": 0.0,
+        "code": 401
+    },
+    {
+        "category": "Electric Storage Systems",
+        "description": "Slimline storage heaters",
+        "code": 402,
+        "options": [
+            {"sub_description": "Off-peak tariffs", "efficiency": 100, "heating_type": 5, "responsiveness": 0.2},
+            {"sub_description": "24-hour heating tariff", "efficiency": 100, "heating_type": 4, "responsiveness": 0.4}
+        ]
+    },
+    {
+        "category": "Electric Storage Systems",
+        "description": "Convector storage heaters",
+        "code": 403,
+        "options": [
+            {"sub_description": "Off-peak tariffs", "efficiency": 100, "heating_type": 5, "responsiveness": 0.2},
+            {"sub_description": "24-hour heating tariff", "efficiency": 100, "heating_type": 4, "responsiveness": 0.4}
+        ]
+    },
+    {
+        "category": "Electric Storage Systems",
+        "description": "Fan storage heaters",
+        "code": 404,
+        "options": [
+            {"sub_description": "Off-peak tariffs", "efficiency": 100, "heating_type": 4, "responsiveness": 0.4},
+            {"sub_description": "24-hour heating tariff", "efficiency": 100, "heating_type": 4, "responsiveness": 0.4}
+        ]
+    },
+    {
+        "category": "Electric Storage Systems",
+        "description": "Slimline storage heaters with Celect-type control",
+        "code": 405,
+        "options": [
+            {"sub_description": "Off-peak tariffs", "efficiency": 100, "heating_type": 4, "responsiveness": 0.4},
+            {"sub_description": "24-hour heating tariff", "efficiency": 100, "heating_type": 3, "responsiveness": 0.6}
+        ]
+    },
+    {
+        "category": "Electric Storage Systems",
+        "description": "Convector storage heaters with Celect-type control",
+        "code": 406,
+        "options": [
+            {"sub_description": "Off-peak tariffs", "efficiency": 100, "heating_type": 4, "responsiveness": 0.4},
+            {"sub_description": "24-hour heating tariff", "efficiency": 100, "heating_type": 3, "responsiveness": 0.6}
+        ]
+    },
+    {
+        "category": "Electric Storage Systems",
+        "description": "Fan storage heaters with Celect-type control",
+        "code": 407,
+        "options": [
+            {"sub_description": "Off-peak tariffs", "efficiency": 100, "heating_type": 3, "responsiveness": 0.6},
+            {"sub_description": "24-hour heating tariff", "efficiency": 100, "heating_type": 3, "responsiveness": 0.6}
+        ]
+    },
+    {
+        "category": "Electric Storage Systems",
+        "description": "Integrated storage + direct-acting heater",
+        "efficiency": 100,
+        "heating_type": 3,
+        "responsiveness": 0.6,
+        "code": 408
+    },
+    {
+        "category": "Electric Storage Systems",
+        "description": "High heat retention storage heaters",
+        "code": 409,
+        "options": [
+            {"sub_description": "Off-peak tariffs", "efficiency": 100, "heating_type": 2, "responsiveness": 0.8},
+            {"sub_description": "24-hour heating tariff", "efficiency": 100, "heating_type": 2, "responsiveness": 0.8}
+        ]
+    }
+]
+
+off_peak_tariffs_electric_underfloor_heating = [
+    {
+        "category": "Electric Underfloor Heating",
+        "description": "Off-peak tariffs - In concrete slab (off-peak only)",
+        "efficiency": 100,
+        "heating_type": 5,
+        "responsiveness": 0.0,
+        "code": 421
+    },
+    {
+        "category": "Electric Underfloor Heating",
+        "description": "Off-peak tariffs - Integrated (storage+direct-acting)",
+        "efficiency": 100,
+        "heating_type": 4,
+        "responsiveness": 0.25,
+        "code": 422
+    },
+    {
+        "category": "Electric Underfloor Heating",
+        "description": "Off-peak tariffs - Integrated (storage+direct-acting) with low (off-peak) tariff control",
+        "efficiency": 100,
+        "heating_type": 3,
+        "responsiveness": 0.50,
+        "code": 423
+    }
+]
+
+standard_or_off_peak_tariff_electric_underfloor_heating = [
+    {
+        "category": "Electric Underfloor Heating",
+        "description": "Standard or off-peak tariff - In screed above insulation",
+        "efficiency": 100,
+        "heating_type": 2,
+        "responsiveness": 0.75,
+        "code": 424
+    },
+    {
+        "category": "Electric Underfloor Heating",
+        "description": "Standard or off-peak tariff - In timber floor, or immediately below floor covering",
+        "efficiency": 100,
+        "heating_type": 1,
+        "responsiveness": 1.0,
+        "code": 425
+    }
+]
+
+gas_fired_warm_air_fan_assisted = [
+    {
+        "category": "Warm Air Systems",
+        "description": "Gas-fired warm air with fan-assisted flue - Ducted, on-off control, pre 1998", "efficiency": 70,
+        "heating_type": 1,
+        "responsiveness": 1.0,
+        "code": 501
+    },
+    {
+        "category": "Warm Air Systems",
+        "description": "Gas-fired warm air with fan-assisted flue - Ducted, on-off control, 1998 or later",
+        "efficiency": 76,
+        "heating_type": 1,
+        "responsiveness": 1.0,
+        "code": 502
+    },
+    {
+        "category": "Warm Air Systems",
+        "description": "Gas-fired warm air with fan-assisted flue - Ducted, modulating control, pre 1998",
+        "efficiency": 72,
+        "heating_type": 1,
+        "responsiveness": 1.0,
+        "code": 503
+    },
+    {
+        "category": "Warm Air Systems",
+        "description": "Gas-fired warm air with fan-assisted flue - Ducted, modulating control, 1998 or later",
+        "efficiency": 78,
+        "heating_type": 1,
+        "responsiveness": 1.0,
+        "code": 504
+    },
+    {
+        "category": "Warm Air Systems",
+        "description": "Gas-fired warm air with fan-assisted flue - Room heater with in-floor ducts",
+        "efficiency": 69,
+        "heating_type": 1,
+        "responsiveness": 1.0,
+        "code": 505
+    },
+    {
+        "category": "Warm Air Systems",
+        "description": "Gas-fired warm air with fan-assisted flue - Condensing",
+        "efficiency": 81,
+        "heating_type": 1,
+        "responsiveness": 1.0,
+        "code": 520
+    }
+]
+
+gas_fired_warm_air_balanced_or_open_flue = [
+    {"category": "Warm Air Systems",
+     "description": "Gas-fired warm air with balanced or open flue - Ducted or stub-ducted, on-off control, pre 1998",
+     "efficiency": 70, "heating_type": 1, "responsiveness": 1.0, "code": 506},
+    {"category": "Warm Air Systems",
+     "description": "Gas-fired warm air with balanced or open flue - Ducted or stub-ducted, on-off control, "
+                    "1998 or later",
+     "efficiency": 76, "heating_type": 1, "responsiveness": 1.0, "code": 507},
+    {"category": "Warm Air Systems",
+     "description": "Gas-fired warm air with balanced or open flue - Ducted or stub-ducted, modulating control, "
+                    "pre 1998",
+     "efficiency": 72, "heating_type": 1, "responsiveness": 1.0, "code": 508},
+    {"category": "Warm Air Systems",
+     "description": "Gas-fired warm air with balanced or open flue - Ducted or stub-ducted, modulating control, "
+                    "1998 or later",
+     "efficiency": 78, "heating_type": 1, "responsiveness": 1.0, "code": 509},
+    {"category": "Warm Air Systems",
+     "description": "Gas-fired warm air with balanced or open flue - Ducted or stub-ducted with flue heat recovery",
+     "efficiency": 85, "heating_type": 1, "responsiveness": 1.0, "code": 510},
+    {"category": "Warm Air Systems", "description": "Gas-fired warm air with balanced or open flue - Condensing",
+     "efficiency": 81, "heating_type": 1, "responsiveness": 1.0, "code": 511}
+]
+
+liquid_fired_warm_air = [
+    {"category": "Warm Air Systems", "description": "Liquid-fired warm air - Ducted output (on/off control)",
+     "efficiency": 70, "heating_type": 1, "responsiveness": 1.0, "code": 512},
+    {"category": "Warm Air Systems", "description": "Liquid-fired warm air - Ducted output (modulating control)",
+     "efficiency": 72, "heating_type": 1, "responsiveness": 1.0, "code": 513},
+    {"category": "Warm Air Systems", "description": "Liquid-fired warm air - Stub duct system", "efficiency": 70,
+     "heating_type": 1, "responsiveness": 1.0, "code": 514}
+]
+
+electric_warm_air_systems = [
+    {
+        "category": "Warm Air Systems",
+        "description": "Electric warm air - Electricaire system",
+        "efficiency": 100,
+        "heating_type": 2,
+        "responsiveness": 0.75,
+        "code": 515
+    }
+]
+
+room_heaters = [
+    # Gas (including LPG and biogas) room heaters
+    {"category": "Room Heaters",
+     "description": "Gas (including LPG and biogas) room heaters - Gas fire, open flue, pre-1980 (open fronted)",
+     "flue": "OF", "efficiency_A": 50, "efficiency_B": 50, "heating_type": 1, "responsiveness": 1.0, "code": 601},
+    {"category": "Room Heaters",
+     "description": "Gas (including LPG and biogas) room heaters - Gas fire, open flue, pre-1980 (open fronted), "
+                    "with back boiler unit",
+     "flue": "OF*", "efficiency_A": 50, "efficiency_B": 50, "heating_type": 1, "responsiveness": 1.0, "code": 602},
+    {"category": "Room Heaters",
+     "description": "Gas (including LPG and biogas) room heaters - Gas fire, open flue, 1980 or later (open fronted), "
+                    "sitting proud of, and sealed to, fireplace opening",
+     "flue": "OF", "efficiency_A": 63, "efficiency_B": 64, "heating_type": 1, "responsiveness": 1.0, "code": 603},
+    {"category": "Room Heaters",
+     "description": "Gas (including LPG and biogas) room heaters - Gas fire, open flue, 1980 or later (open fronted), "
+                    "sitting proud of, and sealed to, fireplace opening, with back boiler unit",
+     "flue": "OF*", "efficiency_A": 63, "efficiency_B": 64, "heating_type": 1, "responsiveness": 1.0, "code": 604},
+    {"category": "Room Heaters",
+     "description": "Gas (including LPG and biogas) room heaters - Flush fitting Live Fuel Effect gas fire (open "
+                    "fronted), sealed to fireplace opening",
+     "flue": "OF", "efficiency_A": 40, "efficiency_B": 41, "heating_type": 1, "responsiveness": 1.0, "code": 605},
+    {"category": "Room Heaters",
+     "description": "Gas (including LPG and biogas) room heaters - Flush fitting Live Fuel Effect gas fire (open "
+                    "fronted), sealed to fireplace opening, with back boiler unit",
+     "flue": "OF*", "efficiency_A": 40, "efficiency_B": 41, "heating_type": 1, "responsiveness": 1.0, "code": 606},
+    {"category": "Room Heaters",
+     "description": "Gas (including LPG and biogas) room heaters - Flush fitting Live Fuel Effect gas fire (open "
+                    "fronted), fan assisted, sealed to fireplace opening",
+     "flue": "OF", "efficiency_A": 45, "efficiency_B": 46, "heating_type": 1, "responsiveness": 1.0, "code": 607},
+    {"category": "Room Heaters",
+     "description": "Gas (including LPG and biogas) room heaters - Gas fire or wall heater, balanced flue",
+     "flue": "RS", "efficiency_A": 58, "efficiency_B": 60, "heating_type": 1, "responsiveness": 1.0, "code": 609},
+    {"category": "Room Heaters",
+     "description": "Gas (including LPG and biogas) room heaters - Gas fire, closed fronted, fan assisted",
+     "flue": "RS", "efficiency_A": 72, "efficiency_B": 73, "heating_type": 1, "responsiveness": 1.0, "code": 610},
+    {"category": "Room Heaters", "description": "Gas (including LPG and biogas) room heaters - Condensing gas fire",
+     "flue": "RS", "efficiency_A": 85, "efficiency_B": 85, "heating_type": 1, "responsiveness": 1.0, "code": 611},
+    {"category": "Room Heaters",
+     "description": "Gas (including LPG and biogas) room heaters - Decorative Fuel Effect gas fire, open to chimney",
+     "flue": "C", "efficiency_A": 20, "efficiency_B": 20, "heating_type": 1, "responsiveness": 1.0, "code": 612},
+    {"category": "Room Heaters",
+     "description": "Gas (including LPG and biogas) room heaters - Flueless gas fire, secondary heating only",
+     "flue": "none", "efficiency_A": 90, "efficiency_B": 92, "heating_type": 1, "responsiveness": 1.0, "code": 613},
+
+    # Liquid fuel room heaters
+    {"category": "Room Heaters", "description": "Liquid fuel room heaters - Room heater, pre 2000", "efficiency": 55,
+     "heating_type": 1, "responsiveness": 1.0, "code": 621},
+    {"category": "Room Heaters",
+     "description": "Liquid fuel room heaters - Room heater, pre 2000, with boiler (no radiators)", "efficiency": 65,
+     "heating_type": 1, "responsiveness": 1.0, "code": 622},
+    {"category": "Room Heaters", "description": "Liquid fuel room heaters - Room heater, 2000 or later",
+     "efficiency": 60, "heating_type": 1, "responsiveness": 1.0, "code": 623},
+    {"category": "Room Heaters",
+     "description": "Liquid fuel room heaters - Room heater, 2000 or later with boiler (no radiators)",
+     "efficiency": 70, "heating_type": 1, "responsiveness": 1.0, "code": 624},
+    {"category": "Room Heaters", "description": "Liquid fuel room heaters - Bioethanol heater, secondary heating only",
+     "efficiency": 94, "heating_type": 1, "responsiveness": 1.0, "code": 625},
+
+    # Solid fuel room heaters
+    {"category": "Room Heaters", "description": "Solid fuel room heaters - Open fire in grate", "efficiency_A": 37,
+     "efficiency_B": 32, "heating_type": 3, "responsiveness": 0.5, "code": 631},
+    {"category": "Room Heaters", "description": "Solid fuel room heaters - Open fire with back boiler (no radiators)",
+     "efficiency_A": 50, "efficiency_B": 50, "heating_type": 3, "responsiveness": 0.5, "code": 632},
+    {"category": "Room Heaters", "description": "Solid fuel room heaters - Closed room heater", "efficiency_A": 65,
+     "efficiency_B": 60, "heating_type": 3, "responsiveness": 0.5, "code": 633},
+    {"category": "Room Heaters",
+     "description": "Solid fuel room heaters - Closed room heater with boiler (no radiators)", "efficiency_A": 67,
+     "efficiency_B": 65, "heating_type": 3, "responsiveness": 0.5, "code": 634},
+    {"category": "Room Heaters", "description": "Solid fuel room heaters - Stove (pellet fired)", "efficiency_A": 70,
+     "efficiency_B": 65, "heating_type": 2, "responsiveness": 0.75, "code": 635},
+    {"category": "Room Heaters",
+     "description": "Solid fuel room heaters - Stove (pellet fired) with boiler (no radiators)", "efficiency_A": 75,
+     "efficiency_B": 70, "heating_type": 2, "responsiveness": 0.75, "code": 636},
+
+    # Electric (direct acting) room heaters
+    {"category": "Room Heaters",
+     "description": "Electric (direct acting) room heaters - Panel, convector or radiant heaters", "efficiency": 100,
+     "heating_type": 1, "responsiveness": 1.0, "code": 691},
+    {"category": "Room Heaters",
+     "description": "Electric (direct acting) room heaters - Water- or oil-filled radiators", "efficiency": 100,
+     "heating_type": 1, "responsiveness": 1.0, "code": 694},
+    {"category": "Room Heaters", "description": "Electric (direct acting) room heaters - Fan heaters",
+     "efficiency": 100, "heating_type": 1, "responsiveness": 1.0, "code": 692},
+    {"category": "Room Heaters", "description": "Electric (direct acting) room heaters - Portable electric heaters",
+     "efficiency": 100, "heating_type": 1, "responsiveness": 1.0, "code": 693}
+]
+
+other_space_heating_systems = [
+    {
+        "category": "Other Space Heating Systems",
+        "description": "Electric ceiling heating",
+        "efficiency": 100,
+        "heating_type": 2,
+        "responsiveness": 0.75,
+        "code": 701
+    }
+]
+
+hot_water_systems = [
+    {"category": "Hot Water Systems", "description": "No hot water system present - electric immersion assumed",
+     "efficiency": 100, "code": 999},
+    {
+        "category": "Hot Water Systems",
+        "description": "HWP from the primary heating system",
+        "code": 901,
+        "options": [
+            {"sub_description": "Back boiler (hot water only), gas*", "efficiency": 65},
+            {"sub_description": "Circulator built into a gas warm air system, pre 1998", "efficiency": 65},
+            {"sub_description": "Circulator built into a gas warm air system, 1998 or later", "efficiency": 73},
+            {"sub_description": "Heat exchanger in a gas warm air system, condensing unit", "efficiency": 74},
+        ]
+    },
+    {"category": "Hot Water Systems",
+     "description": "From second main system", "efficiency": None,
+     "code": 914},
+    {"category": "Hot Water Systems", "description": "From secondary system",
+     "efficiency": None, "code": 902},
+    {"category": "Hot Water Systems", "description": "Electric immersion", "efficiency": 100, "code": 903},
+    {"category": "Hot Water Systems",
+     "description": "Single-point gas-fired water heater (instantaneous at point of use)", "efficiency": 70,
+     "code": 907},
+    {"category": "Hot Water Systems",
+     "description": "Multi-point gas-fired water heater (instantaneous serving several taps)", "efficiency": 65,
+     "code": 908},
+    {"category": "Hot Water Systems", "description": "Electric instantaneous at point of use", "efficiency": 100,
+     "code": 909},
+    {"category": "Hot Water Systems", "description": "Gas boiler/circulator for water heating only*", "efficiency": 65,
+     "code": 911},
+    {"category": "Hot Water Systems", "description": "Liquid fuel boiler/circulator for water heating only*",
+     "efficiency": 70, "code": 912},
+    {"category": "Hot Water Systems", "description": "Solid fuel boiler/circulator for water heating only",
+     "efficiency": 55, "code": 913},
+    # Range cookers with boiler for water heating only
+    {"category": "Hot Water Systems",
+     "description": "Range cooker with boiler for water heating only: Gas, single burner with permanent pilot",
+     "efficiency": 46, "code": 921},
+    {"category": "Hot Water Systems",
+     "description": "Range cooker with boiler for water heating only: Gas, single burner with automatic ignition",
+     "efficiency": 50,
+     "code": 922},
+    {"category": "Hot Water Systems",
+     "description": "Range cooker with boiler for water heating only: Gas, twin burner with permanent pilot pre 1998",
+     "efficiency": 60,
+     "code": 923},
+    {"category": "Hot Water Systems",
+     "description": "Range cooker with boiler for water heating only: Gas, twin burner with automatic ignition pre "
+                    "1998",
+     "efficiency": 65, "code": 924},
+    {"category": "Hot Water Systems",
+     "description": "Range cooker with boiler for water heating only: Gas, twin burner with permanent pilot 1998 or "
+                    "later",
+     "efficiency": 65, "code": 925},
+    {"category": "Hot Water Systems",
+     "description": "Range cooker with boiler for water heating only: Gas, twin burner with automatic ignition 1998 "
+                    "or later",
+     "efficiency": 70, "code": 926},
+    {"category": "Hot Water Systems",
+     "description": "Range cooker with boiler for water heating only: Liquid fuel, single burner", "efficiency": 60,
+     "code": 927},
+    {"category": "Hot Water Systems",
+     "description": "Range cooker with boiler for water heating only: Liquid fuel, twin burner pre 1998",
+     "efficiency": 70,
+     "code": 928},
+    {"category": "Hot Water Systems",
+     "description": "Range cooker with boiler for water heating only: Liquid fuel, twin burner 1998 or later",
+     "efficiency": 75,
+     "code": 929},
+    {"category": "Hot Water Systems",
+     "description": "Range cooker with boiler for water heating only: Solid fuel, integral oven and boiler",
+     "efficiency": 45,
+     "code": 930},
+    {"category": "Hot Water Systems",
+     "description": "Range cooker with boiler for water heating only: Solid fuel, independent oven and boiler",
+     "efficiency": 55,
+     "code": 931},
+    # Electric heat pump for water heating only
+    {"category": "Hot Water Systems", "description": "Electric heat pump for water heating only*", "efficiency": 170,
+     "code": 941},
+    # Hot-water only heat network
+    # Remove the SAP version
+    # {"category": "Hot Water Systems",
+    #  "description": "Hot-water only heat network (SAP)", "efficiency": None,
+    #  "code": 950},
+    {"category": "Hot Water Systems", "description": "Hot-water only heat network (RdSAP) - boilers", "efficiency": 80,
+     "code": 950},
+    {"category": "Hot Water Systems", "description": "Hot-water only heat network (RdSAP) - CHP", "efficiency": 75,
+     "code": 951},
+    {"category": "Hot Water Systems", "description": "Hot-water only heat network (RdSAP) - heat pump",
+     "efficiency": 300, "code": 952}
+]
+
+boilers_seasonal = [
+    {"category": "Boilers - seasonal",
+     "description": "Gas boilers (including mains gas, LPG and biogas) 1998 or later - Regular non-condensing with "
+                    "automatic ignition",
+     "efficiency_winter": 74, "efficiency_summer": 64, "code": 101},
+    {"category": "Boilers - seasonal",
+     "description": "Gas boilers (including mains gas, LPG and biogas) 1998 or later - Regular condensing with "
+                    "automatic ignition",
+     "efficiency_winter": 84, "efficiency_summer": 74, "code": 102},
+    {"category": "Boilers - seasonal",
+     "description": "Gas boilers (including mains gas, LPG and biogas) 1998 or later - Non-condensing combi with "
+                    "automatic ignition",
+     "efficiency_winter": 74, "efficiency_summer": 65, "code": 103},
+    {"category": "Boilers - seasonal",
+     "description": "Gas boilers (including mains gas, LPG and biogas) 1998 or later - Condensing combi with "
+                    "automatic ignition",
+     "efficiency_winter": 84, "efficiency_summer": 75, "code": 104},
+    {"category": "Boilers - seasonal",
+     "description": "Gas boilers (including mains gas, LPG and biogas) 1998 or later - Regular non-condensing with "
+                    "permanent pilot light",
+     "efficiency_winter": 70, "efficiency_summer": 60, "code": 105},
+    {"category": "Boilers - seasonal",
+     "description": "Gas boilers (including mains gas, LPG and biogas) 1998 or later - Regular condensing with "
+                    "permanent pilot light",
+     "efficiency_winter": 80, "efficiency_summer": 70, "code": 106},
+    {"category": "Boilers - seasonal",
+     "description": "Gas boilers (including mains gas, LPG and biogas) 1998 or later - Non-condensing combi with "
+                    "permanent pilot light",
+     "efficiency_winter": 70, "efficiency_summer": 61, "code": 107},
+    {"category": "Boilers - seasonal",
+     "description": "Gas boilers (including mains gas, LPG and biogas) 1998 or later - Condensing combi with "
+                    "permanent pilot light",
+     "efficiency_winter": 80, "efficiency_summer": 71, "code": 108},
+    {"category": "Boilers - seasonal",
+     "description": "Gas boilers (including mains gas, LPG and biogas) 1998 or later - Back boiler to radiators",
+     "efficiency_winter": 66, "efficiency_summer": 56, "code": 109},
+    {"category": "Boilers - seasonal",
+     "description": "Gas boilers (including mains gas, LPG and biogas) pre-1998, with fan-assisted flue - Regular, "
+                    "low thermal capacity",
+     "efficiency_winter": 73, "efficiency_summer": 63, "code": 110},
+    {"category": "Boilers - seasonal",
+     "description": "Gas boilers (including mains gas, LPG and biogas) pre-1998, with fan-assisted flue - Regular, "
+                    "high or unknown thermal capacity",
+     "efficiency_winter": 69, "efficiency_summer": 59, "code": 111},
+    {"category": "Boilers - seasonal",
+     "description": "Gas boilers (including mains gas, LPG and biogas) pre-1998, with fan-assisted flue - Combi",
+     "efficiency_winter": 71, "efficiency_summer": 62, "code": 112},
+    {"category": "Boilers - seasonal",
+     "description": "Gas boilers (including mains gas, LPG and biogas) pre-1998, with fan-assisted flue - Condensing "
+                    "combi",
+     "efficiency_winter": 84, "efficiency_summer": 75, "code": 113},
+    {"category": "Boilers - seasonal",
+     "description": "Gas boilers (including mains gas, LPG and biogas) pre-1998, with fan-assisted flue - Regular, "
+                    "condensing",
+     "efficiency_winter": 84, "efficiency_summer": 74, "code": 114},
+    {"category": "Boilers - seasonal",
+     "description": "Gas boilers (including mains gas, LPG and biogas) pre-1998, with balanced or open flue - "
+                    "Regular, wall mounted",
+     "efficiency_winter": 66, "efficiency_summer": 56, "code": 115},
+    {"category": "Boilers - seasonal",
+     "description": "Gas boilers (including mains gas, LPG and biogas) pre-1998, with balanced or open flue - "
+                    "Regular, floor mounted, pre 1979",
+     "efficiency_winter": 56, "efficiency_summer": 46, "code": 116},
+    {"category": "Boilers - seasonal",
+     "description": "Gas boilers (including mains gas, LPG and biogas) pre-1998, with balanced or open flue - "
+                    "Regular, floor mounted, 1979 to 1997",
+     "efficiency_winter": 66, "efficiency_summer": 56, "code": 117},
+    {"category": "Boilers - seasonal",
+     "description": "Gas boilers (including mains gas, LPG and biogas) pre-1998, with balanced or open flue - Combi",
+     "efficiency_winter": 66, "efficiency_summer": 57, "code": 118},
+    {"category": "Boilers - seasonal",
+     "description": "Gas boilers (including mains gas, LPG and biogas) pre-1998, with balanced or open flue - Back "
+                    "boiler to radiators",
+     "efficiency_winter": 66, "efficiency_summer": 56, "code": 119},
+    {"category": "Boilers - seasonal",
+     "description": "Combined Primary Storage Units (CPSU) (mains gas, LPG and biogas) - With automatic ignition ("
+                    "non-condensing)",
+     "efficiency_winter": 74, "efficiency_summer": 72, "code": 120},
+    {"category": "Boilers - seasonal",
+     "description": "Combined Primary Storage Units (CPSU) (mains gas, LPG and biogas) - With automatic ignition ("
+                    "condensing)",
+     "efficiency_winter": 83, "efficiency_summer": 81, "code": 121},
+    {"category": "Boilers - seasonal",
+     "description": "Combined Primary Storage Units (CPSU) (mains gas, LPG and biogas) - With permanent pilot ("
+                    "non-condensing)",
+     "efficiency_winter": 70, "efficiency_summer": 68, "code": 122},
+    {"category": "Boilers - seasonal",
+     "description": "Combined Primary Storage Units (CPSU) (mains gas, LPG and biogas) - With permanent pilot ("
+                    "condensing)",
+     "efficiency_winter": 79, "efficiency_summer": 77, "code": 123},
+    {"category": "Boilers - seasonal", "description": "Liquid fuel boilers - Standard oil boiler pre-1985",
+     "efficiency_winter": 66, "efficiency_summer": 54, "code": 124},
+    {"category": "Boilers - seasonal", "description": "Liquid fuel boilers - Standard oil boiler 1985 to 1997",
+     "efficiency_winter": 71, "efficiency_summer": 59, "code": 125},
+    {"category": "Boilers - seasonal", "description": "Liquid fuel boilers - Standard oil boiler, 1998 or later",
+     "efficiency_winter": 80, "efficiency_summer": 68, "code": 126},
+    {"category": "Boilers - seasonal", "description": "Liquid fuel boilers - Condensing oil boiler",
+     "efficiency_winter": 84, "efficiency_summer": 72, "code": 127},
+    {"category": "Boilers - seasonal", "description": "Liquid fuel boilers - Combi oil boiler, pre-1998",
+     "efficiency_winter": 71, "efficiency_summer": 62, "code": 128},
+    {"category": "Boilers - seasonal", "description": "Liquid fuel boilers - Combi oil boiler, 1998 or later",
+     "efficiency_winter": 77, "efficiency_summer": 68, "code": 129},
+    {"category": "Boilers - seasonal", "description": "Liquid fuel boilers - Condensing combi oil boiler",
+     "efficiency_winter": 82, "efficiency_summer": 73, "code": 130},
+    {"category": "Boilers - seasonal",
+     "description": "Liquid fuel boilers - Oil room heater with boiler to radiators, pre 2000", "efficiency_winter": 66,
+     "efficiency_summer": 54, "code": 131},
+    {"category": "Boilers - seasonal",
+     "description": "Liquid fuel boilers - Oil room heater with boiler to radiators, 2000 or later",
+     "efficiency_winter": 71, "efficiency_summer": 59, "code": 132},
+    {"category": "Boilers - seasonal",
+     "description": "Range cooker boilers (mains gas, LPG and biogas) - Single burner with permanent pilot",
+     "efficiency_winter": 47, "efficiency_summer": 37, "code": 133},
+    {"category": "Boilers - seasonal",
+     "description": "Range cooker boilers (mains gas, LPG and biogas) - Single burner with automatic ignition",
+     "efficiency_winter": 51, "efficiency_summer": 41, "code": 134},
+    {"category": "Boilers - seasonal",
+     "description": "Range cooker boilers (mains gas, LPG and biogas) - Twin burner with permanent pilot ("
+                    "non-condensing) pre 1998",
+     "efficiency_winter": 61, "efficiency_summer": 51, "code": 135},
+    {"category": "Boilers - seasonal",
+     "description": "Range cooker boilers (mains gas, LPG and biogas) - Twin burner with automatic ignition ("
+                    "non-condensing) pre 1998",
+     "efficiency_winter": 66, "efficiency_summer": 56, "code": 136},
+    {"category": "Boilers - seasonal",
+     "description": "Range cooker boilers (mains gas, LPG and biogas) - Twin burner with permanent pilot ("
+                    "non-condensing) 1998 or later",
+     "efficiency_winter": 66, "efficiency_summer": 56, "code": 137},
+    {"category": "Boilers - seasonal",
+     "description": "Range cooker boilers (mains gas, LPG and biogas) - Twin burner with automatic ignition ("
+                    "non-condensing) 1998 or later",
+     "efficiency_winter": 71, "efficiency_summer": 61, "code": 138},
+    {"category": "Boilers - seasonal", "description": "Range cooker boilers (liquid fuel) - Single burner",
+     "efficiency_winter": 61, "efficiency_summer": 49, "code": 139},
+    {"category": "Boilers - seasonal",
+     "description": "Range cooker boilers (liquid fuel) - Twin burner (non-condensing) pre 1998",
+     "efficiency_winter": 71, "efficiency_summer": 59, "code": 140},
+    {"category": "Boilers - seasonal",
+     "description": "Range cooker boilers (liquid fuel) - Twin burner (non-condensing) 1998 or later",
+     "efficiency_winter": 76, "efficiency_summer": 64, "code": 141},
+]
+
+# Heating controls
+no_heating_system_controls = [
+    {
+        "category": "No heating system present",
+        "description": "None",
+        "control": 2,
+        "temperature_adjustment_c": "+0.3",
+        "code": 2699
+    }
+]
+
+boiler_system_controls = [
+    {"category": "Boiler Systems with Radiators or Underfloor Heating",
+     "description": "Not applicable", "control": None, "temperature_adjustment_c": None,
+     "code": 2100},
+    {"category": "Boiler Systems with Radiators or Underfloor Heating",
+     "description": "No time or thermostatic control of room temperature", "control": 1,
+     "temperature_adjustment_c": "+0.6", "code": 2101},
+    {"category": "Boiler Systems with Radiators or Underfloor Heating", "description": "Programmer, no room thermostat",
+     "control": 1, "temperature_adjustment_c": "+0.6", "code": 2102},
+    {"category": "Boiler Systems with Radiators or Underfloor Heating", "description": "Room thermostat only",
+     "control": 1, "temperature_adjustment_c": "0", "code": 2103},
+    {"category": "Boiler Systems with Radiators or Underfloor Heating", "description": "Programmer and room thermostat",
+     "control": 1, "temperature_adjustment_c": "0", "code": 2104},
+    {"category": "Boiler Systems with Radiators or Underfloor Heating",
+     "description": "Programmer and at least two room thermostats", "control": 2, "temperature_adjustment_c": "0",
+     "code": 2105},
+    {"category": "Boiler Systems with Radiators or Underfloor Heating", "description": "Room thermostat and TRVs",
+     "control": 2, "temperature_adjustment_c": "0", "code": 2113},
+    {"category": "Boiler Systems with Radiators or Underfloor Heating",
+     "description": "Programmer, room thermostat and TRVs", "control": 2, "temperature_adjustment_c": "0",
+     "code": 2106},
+    {"category": "Boiler Systems with Radiators or Underfloor Heating", "description": "TRVs and bypass", "control": 2,
+     "temperature_adjustment_c": "0", "code": 2111},
+    {"category": "Boiler Systems with Radiators or Underfloor Heating", "description": "Programmer, TRVs and bypass",
+     "control": 2, "temperature_adjustment_c": "0", "code": 2107},
+    {"category": "Boiler Systems with Radiators or Underfloor Heating",
+     "description": "Programmer, TRVs and flow switch", "control": 2, "temperature_adjustment_c": "0", "code": 2108},
+    {"category": "Boiler Systems with Radiators or Underfloor Heating",
+     "description": "Programmer, TRVs and boiler energy manager", "control": 2, "temperature_adjustment_c": "0",
+     "code": 2109},
+    {"category": "Boiler Systems with Radiators or Underfloor Heating",
+     "description": "Time and temperature zone control by arrangement of plumbing and electrical services",
+     "control": 3, "temperature_adjustment_c": "0", "code": 2110},
+    {"category": "Boiler Systems with Radiators or Underfloor Heating",
+     "description": "Time and temperature zone control by device in PCDB", "control": 3,
+     "temperature_adjustment_c": "0", "code": 2112},
+]
+
+heat_pump_controls = [
+    # We have a previous 2100 code for not applicable
+    # {"category": "Heat Pumps with Radiators or Underfloor Heating",
+    #  "description": "Not applicable (heat pump provides DHW only)", "control": None, "temperature_adjustment_c": None,
+    #  "code": 2100},
+    {"category": "Heat Pumps with Radiators or Underfloor Heating",
+     "description": "No time or thermostatic control of room temperature", "control": 1,
+     "temperature_adjustment_c": "+0.3", "code": 2201},
+    {"category": "Heat Pumps with Radiators or Underfloor Heating", "description": "Programmer, no room thermostat",
+     "control": 1, "temperature_adjustment_c": "+0.3", "code": 2202},
+    {"category": "Heat Pumps with Radiators or Underfloor Heating", "description": "Room thermostat only", "control": 1,
+     "temperature_adjustment_c": "0", "code": 2203},
+    {"category": "Heat Pumps with Radiators or Underfloor Heating", "description": "Programmer and room thermostat",
+     "control": 1, "temperature_adjustment_c": "0", "code": 2204},
+    {"category": "Heat Pumps with Radiators or Underfloor Heating",
+     "description": "Programmer and at least two room thermostats", "control": 2, "temperature_adjustment_c": "0",
+     "code": 2205},
+    {"category": "Heat Pumps with Radiators or Underfloor Heating", "description": "Room thermostat and TRVs",
+     "control": 2, "temperature_adjustment_c": "0", "code": 2209},
+    {"category": "Heat Pumps with Radiators or Underfloor Heating",
+     "description": "Programmer, room thermostat and TRVs", "control": 2, "temperature_adjustment_c": "0",
+     "code": 2210},
+    {"category": "Heat Pumps with Radiators or Underfloor Heating", "description": "Programmer, TRVs and bypass",
+     "control": 2, "temperature_adjustment_c": "0", "code": 2206},
+    {"category": "Heat Pumps with Radiators or Underfloor Heating",
+     "description": "Time and temperature zone control by arrangement of plumbing and electrical services",
+     "control": 3, "temperature_adjustment_c": "0", "code": 2207},
+    {"category": "Heat Pumps with Radiators or Underfloor Heating",
+     "description": "Time and temperature zone control by device in PCDB", "control": 3,
+     "temperature_adjustment_c": "0", "code": 2208},
+]
+
+heat_network_controls = [
+    {"category": "Heat Networks", "description": "Flat rate charging*, no thermostatic control of room temperature",
+     "control": 1, "temperature_adjustment_c": "+0.3", "code": 2301},
+    {"category": "Heat Networks", "description": "Flat rate charging*, programmer, no room thermostat", "control": 1,
+     "temperature_adjustment_c": "+0.3", "code": 2302},
+    {"category": "Heat Networks", "description": "Flat rate charging*, room thermostat only", "control": 1,
+     "temperature_adjustment_c": "0", "code": 2303},
+    {"category": "Heat Networks", "description": "Flat rate charging*, programmer and room thermostat", "control": 1,
+     "temperature_adjustment_c": "0", "code": 2304},
+    {"category": "Heat Networks", "description": "Flat rate charging*, room thermostat and TRVs", "control": 2,
+     "temperature_adjustment_c": "0", "code": 2313},
+    {"category": "Heat Networks", "description": "Flat rate charging*, TRVs", "control": 2,
+     "temperature_adjustment_c": "0", "code": 2307},
+    {"category": "Heat Networks", "description": "Flat rate charging*, programmer and TRVs", "control": 2,
+     "temperature_adjustment_c": "0", "code": 2305},
+    {"category": "Heat Networks", "description": "Flat rate charging*, programmer and at least two room thermostats",
+     "control": 2, "temperature_adjustment_c": "0", "code": 2311},
+    {"category": "Heat Networks", "description": "Charging system linked to use of heating, room thermostat only",
+     "control": 2, "temperature_adjustment_c": "0", "code": 2308},
+    {"category": "Heat Networks",
+     "description": "Charging system linked to use of heating, programmer and room thermostat", "control": 2,
+     "temperature_adjustment_c": "0", "code": 2309},
+    {"category": "Heat Networks", "description": "Charging system linked to use of heating, room thermostat and TRVs",
+     "control": 3, "temperature_adjustment_c": "0", "code": 2314},
+    {"category": "Heat Networks", "description": "Charging system linked to use of heating, TRVs", "control": 3,
+     "temperature_adjustment_c": "0", "code": 2310},
+    {"category": "Heat Networks", "description": "Charging system linked to use of heating, programmer and TRVs",
+     "control": 3, "temperature_adjustment_c": "0", "code": 2306},
+    {"category": "Heat Networks",
+     "description": "Charging system linked to use of heating, programmer and at least two room thermostats",
+     "control": 3, "temperature_adjustment_c": "0", "code": 2312},
+]
+
+electric_storage_systems_controls = [
+    {"category": "Electric Storage Systems", "description": "Manual charge control", "control": 3,
+     "temperature_adjustment_c": "+0.7", "code": 2401},
+    {"category": "Electric Storage Systems", "description": "Automatic charge control", "control": 3,
+     "temperature_adjustment_c": "+0.4", "code": 2402},
+    {"category": "Electric Storage Systems", "description": "Celect-type controls", "control": 3,
+     "temperature_adjustment_c": "+0.4", "code": 2403},
+    {"category": "Electric Storage Systems", "description": "Controls for high heat retention storage heaters §",
+     "control": 3, "temperature_adjustment_c": "0", "code": 2404},
+]
+
+warm_air_systems_controls = [
+    {"category": "Warm Air Systems", "description": "No time or thermostatic control of room temperature", "control": 1,
+     "temperature_adjustment_c": "+0.3", "code": 2501},
+    {"category": "Warm Air Systems", "description": "Programmer, no room thermostat", "control": 1,
+     "temperature_adjustment_c": "+0.3", "code": 2502},
+    {"category": "Warm Air Systems", "description": "Room thermostat only", "control": 1,
+     "temperature_adjustment_c": "0", "code": 2503},
+    {"category": "Warm Air Systems", "description": "Programmer and room thermostat", "control": 1,
+     "temperature_adjustment_c": "0", "code": 2504},
+    {"category": "Warm Air Systems", "description": "Programmer and at least two room thermostats", "control": 2,
+     "temperature_adjustment_c": "0", "code": 2505},
+    {"category": "Warm Air Systems", "description": "Time and temperature zone control", "control": 3,
+     "temperature_adjustment_c": "0", "code": 2506},
+]
+
+room_heater_systems_controls = [
+    {"category": "Room Heater Systems", "description": "No thermostatic control of room temperature", "control": 2,
+     "temperature_adjustment_c": "+0.3", "code": 2601},
+    {"category": "Room Heater Systems", "description": "Appliance thermostats", "control": 3,
+     "temperature_adjustment_c": "0", "code": 2602},
+    {"category": "Room Heater Systems", "description": "Programmer and appliance thermostats", "control": 3,
+     "temperature_adjustment_c": "0", "code": 2603},
+    {"category": "Room Heater Systems", "description": "Room thermostats only", "control": 3,
+     "temperature_adjustment_c": "0", "code": 2604},
+    {"category": "Room Heater Systems", "description": "Programmer and room thermostats", "control": 3,
+     "temperature_adjustment_c": "0", "code": 2605},
+]
+
+other_systems_controls = [
+    {"category": "Other Systems", "description": "No time or thermostatic control of room temperature", "control": 1,
+     "temperature_adjustment_c": "+0.3", "code": 2701},
+    {"category": "Other Systems", "description": "Programmer, no room thermostat", "control": 1,
+     "temperature_adjustment_c": "+0.3", "code": 2702},
+    {"category": "Other Systems", "description": "Room thermostat only", "control": 1, "temperature_adjustment_c": "0",
+     "code": 2703},
+    {"category": "Other Systems", "description": "Programmer and room thermostat", "control": 1,
+     "temperature_adjustment_c": "0", "code": 2704},
+    {"category": "Other Systems", "description": "Temperature zone control", "control": 2,
+     "temperature_adjustment_c": "0", "code": 2705},
+    {"category": "Other Systems", "description": "Time and temperature zone control", "control": 3,
+     "temperature_adjustment_c": "0", "code": 2706},
+]
+
+heating_data = (
+    no_heating_system +
+    boiler_systems_with_radiators_or_underfloor_heating +
+    heat_pumps_with_radiators_or_underfloor_heating +
+    electric_heat_pumps_warm_air_distribution +
+    gas_fired_heat_pumps_warm_air_distribution +
+    heat_networks +
+    electric_storage_systems +
+    off_peak_tariffs_electric_underfloor_heating +
+    standard_or_off_peak_tariff_electric_underfloor_heating +
+    gas_fired_warm_air_fan_assisted +
+    gas_fired_warm_air_balanced_or_open_flue +
+    liquid_fired_warm_air +
+    electric_warm_air_systems +
+    room_heaters +
+    other_space_heating_systems +
+    hot_water_systems +
+    boilers_seasonal +
+    no_heating_system_controls +
+    boiler_system_controls +
+    heat_pump_controls +
+    heat_network_controls +
+    electric_storage_systems_controls +
+    warm_air_systems_controls +
+    room_heater_systems_controls +
+    other_systems_controls
+)
+
+heating_data = pd.DataFrame(heating_data)
diff --git a/utils/s3.py b/utils/s3.py
index 1b14ca97..b3553824 100644
--- a/utils/s3.py
+++ b/utils/s3.py
@@ -276,3 +276,86 @@ def list_files_in_s3_folder(bucket_name, folder_name):
     except Exception as e:
         logger.error(f'Failed to list files in folder {folder_name} in bucket {bucket_name}: {str(e)}')
         return []
+
+
+def list_files_and_subfolders_in_s3_folder(bucket_name, folder_name):
+    """
+    List all files and immediate subfolders in a given folder in an S3 bucket.
+
+    E.g. if we have a folder structure in S3 like this:
+    - folder1/
+        - file1.csv
+        - file2.csv
+        - subfolder1/
+            - file3.csv
+
+    Then calling list_files_and_subfolders_in_s3_folder(bucket_name='my-bucket', folder_name='folder1/')
+    would return ['folder1/file1.csv', 'folder1/file2.csv', 'folder1/subfolder1/'].
+
+    Namely, the nested files are not included in the list, only the immediate files and subfolders.
+
+    :param bucket_name: The name of the S3 bucket.
+    :param folder_name: The folder name within the S3 bucket.
+    :return: A list of file keys and subfolder prefixes in the specified S3 folder.
+    """
+
+    # For this function, folder_name should end with a forward slash
+    if not folder_name.endswith('/'):
+        folder_name += '/'
+
+    try:
+        s3 = boto3.client('s3')
+        response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_name, Delimiter='/')
+
+        items = []
+
+        # Add files to the list
+        if 'Contents' in response:
+            items.extend([content['Key'] for content in response['Contents'] if content['Key'] != folder_name])
+
+        # Add immediate subfolders to the list
+        if 'CommonPrefixes' in response:
+            items.extend([prefix['Prefix'] for prefix in response['CommonPrefixes']])
+
+        return items
+
+    except NoCredentialsError:
+        logger.error("Credentials not available.")
+        return []
+    except PartialCredentialsError:
+        logger.error("Incomplete credentials provided.")
+        return []
+    except Exception as e:
+        logger.error(f'Failed to list files and subfolders in folder {folder_name} in bucket {bucket_name}: {str(e)}')
+        return []
+
+
+def list_xmls_in_s3_folder(bucket_name, folder_name):
+    """
+    List all XML files in a given folder in an S3 bucket.
+
+    :param bucket_name: The name of the S3 bucket.
+    :param folder_name: The folder name within the S3 bucket.
+    :return: A list of XML file keys in the specified S3 folder.
+    """
+    try:
+        s3 = boto3.client('s3')
+        response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_name)
+
+        if 'Contents' not in response:
+            logger.info(f"No files found in folder {folder_name} in bucket {bucket_name}.")
+            return []
+
+        # Filter XML files
+        xml_files = [content['Key'] for content in response['Contents'] if content['Key'].endswith('.xml')]
+        return xml_files
+
+    except NoCredentialsError:
+        logger.error("Credentials not available.")
+        return []
+    except PartialCredentialsError:
+        logger.error("Incomplete credentials provided.")
+        return []
+    except Exception as e:
+        logger.error(f'Failed to list XML files in folder {folder_name} in bucket {bucket_name}: {str(e)}')
+        return []

From 791e22146e6354291ebf56b61aeee3423286a609 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 25 Jul 2024 12:18:48 +0100
Subject: [PATCH 002/182] set up fundamental epc extraction

---
 etl/bill_savings/data_collection.py    |   8 +-
 etl/bill_savings/data_combining.py     |   2 +-
 etl/xml_survey_extraction/XmlParser.py | 200 ++++++++++++++++++++++++-
 3 files changed, 198 insertions(+), 12 deletions(-)

diff --git a/etl/bill_savings/data_collection.py b/etl/bill_savings/data_collection.py
index d2283ac4..6095741f 100644
--- a/etl/bill_savings/data_collection.py
+++ b/etl/bill_savings/data_collection.py
@@ -133,8 +133,8 @@ def app():
     energy_consumption_data = []
     for i, directory in tqdm(enumerate(epc_directories), total=len(epc_directories)):
         # Skip the first 50
-        if i < 250:
-            continue
+        # if i < 344:
+        #     continue
 
         data = pd.read_csv(directory / "certificates.csv", low_memory=False)
         # Rename the columns to the same format as the api returns
@@ -146,12 +146,12 @@ def app():
         # Take just the newest EPC per uprn, based on lodgement-date
         data = data.sort_values("lodgement-date", ascending=False).drop_duplicates("uprn")
 
-        data = data.sample(sample_size)
+        data = data.sample(sample_size, replace=False)
         # We use the addreess data to find the related information
 
         collected_data = []
         for _, property_data in data.iterrows():
-            time.sleep(np.random.uniform(0.3, 2))
+            time.sleep(np.random.uniform(0.2, 1.5))
 
             uprn = int(property_data["uprn"])
             address = property_data["address1"]
diff --git a/etl/bill_savings/data_combining.py b/etl/bill_savings/data_combining.py
index 11366360..d3a8d679 100644
--- a/etl/bill_savings/data_combining.py
+++ b/etl/bill_savings/data_combining.py
@@ -94,7 +94,7 @@ def app():
 
     # We also estimate the energy consumption reduction from this data, by band
     df["total_consumption"] = df["heating_kwh"] + df["hot_water_kwh"]
-    consumption_averages = df.groupby("current-energy-rating")["total_consumption"].meam().reset_index()
+    consumption_averages = df.groupby("current-energy-rating")["total_consumption"].mean().reset_index()
 
     # Save the consumption averages back to s3
     save_dataframe_to_s3_parquet(
diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py
index de7e35f8..973ea5e8 100644
--- a/etl/xml_survey_extraction/XmlParser.py
+++ b/etl/xml_survey_extraction/XmlParser.py
@@ -55,6 +55,7 @@ class XmlParser:
     hot_water_cost_current = None
     lighting_cost_current = None
     energy_consumption_current = None
+    energy_consumption_potential = None
     heating_system = None
     heating_controls = None
 
@@ -140,6 +141,30 @@ class XmlParser:
         "5": "Very Good"
     }
 
+    MECHANICAL_VENTILATION_MAP = {
+        "0": "natural"
+    }
+
+    BUILT_FORM_MAP = {
+        "1": "Detached",
+    }
+
+    GLAZED_AREA_MAP = {
+        "4": "Much More Than Typical"
+    }
+
+    FUEL_TYPE_MAP = {
+        "26": "mains gas (not community)"
+    }
+
+    TRANSACTION_TYPE_MAP = {
+        "13": "ECO assessment"
+    }
+
+    TENURE_MAP = {
+        '1': "Owner-occupied"
+    }
+
     def __init__(self, file, filekey, uprn=None):
         file.seek(0)  # Ensure the file pointer is at the beginning
         xml_string = file.read().decode('utf-8')
@@ -151,7 +176,7 @@ class XmlParser:
         # In order to identify the file type, we can look for the presence of the 'UPRN' tag
         # If the UPRN tag is present, we can assume that the file is an EPC
         # If the UPRN tag is not present, we can assume that the file is an EPR
-        self.get_uprn()
+        self.get_uprn(uprn)
 
         self.file_type = self.UPRN_FILETYPE_MAP.get(self.uprn, "EPC")
 
@@ -180,6 +205,7 @@ class XmlParser:
         self.get_assessor_details()
 
         self.get_heating_and_emissions_data()
+
         self.get_detailed_heating_specs()
 
         # Building fabric
@@ -191,11 +217,160 @@ class XmlParser:
         self.get_hot_water()
         self.get_lighting()
         self.get_doors()
-        self.get_photo_supply()
 
         # Property dimensions
         self.get_property_dimensions()
 
+        # Get all of the EPC data
+        self.extract_epc()
+
+    def extract_epc(self):
+        # Property Summary
+        low_energy_fixed_light_count = None
+        construction_age_band = None
+        self.epc = {
+            "low-energy-fixed-light-count": self.get_node_value('Low-Energy-Fixed-Lighting-Outlets-Count'),
+            # TODO: Needs to be done more carefully
+            # "floor-height" = self.get_node_value_from_floor_dimensions('Room-Height'),
+            "construction-age-band": self.get_node_value('Construction-Age-Band'),
+            "mainheat-energy-eff": self.RATINGS_MAP[
+                self.get_property_summary_value('Main-Heating', 'Energy-Efficiency-Rating')
+            ],
+            "windows-env-eff": self.RATINGS_MAP[
+                self.get_property_summary_value('Window', 'Environmental-Efficiency-Rating')
+            ],
+            "lighting-energy-eff": self.RATINGS_MAP[
+                self.get_property_summary_value('Lighting', 'Energy-Efficiency-Rating')
+            ],
+            "environment-impact-potential": self.get_energy_assessment_value('Environmental-Impact-Potential'),
+            # TODO: Needs to be done more careully since we have multiple windows
+            # "glazed-type": self.get_node_value('Glazing-Type'),
+            "mainheatcont-description":
+                self.get_property_summary_value('Main-Heating-Controls', 'Description'),
+            "sheating-energy-eff": self.RATINGS_MAP[
+                self.get_property_summary_value('Secondary-Heating', 'Energy-Efficiency-Rating'),
+            ],
+            # TODO: Doesn't seem to be included in the xml
+            # "local-authority": self.get_node_value('Local-Authority'),
+            "local-authority-label": self.get_node_value('Local-Authority-Label'),
+            "fixed-lighting-outlets-count": self.get_node_value('Fixed-Lighting-Outlets-Count'),
+            # TODO: Doesn't seem to be included in the xml
+            # "energy-tariff": self.get_node_value('Energy-Tariff'),
+            "mechanical-ventilation": self.MECHANICAL_VENTILATION_MAP[self.get_node_value('Mechanical-Ventilation')],
+            "solar-water-heating-flag": self.get_node_value('Solar-Water-Heating'),
+            "co2-emissions-potential": self.get_energy_assessment_value('CO2-Emissions-Potential'),
+            "number-heated-rooms": self.get_node_value('Heated-Room-Count'),
+            "floor-description": self.get_property_summary_value('Floor', 'Description'),
+            "energy-consumption-potential": self.get_energy_assessment_value('Energy-Consumption-Potential'),
+            "built-form": self.BUILT_FORM_MAP[self.get_node_value('Built-Form')],
+            "number-open-fireplaces": self.get_node_value('Open-Fireplaces-Count'),
+            "windows-description": self.get_property_summary_value('Window', 'Description'),
+            "glazed-area": self.GLAZED_AREA_MAP[self.get_node_value('Glazed-Area')],
+            "inspection-date": self.get_node_value('Inspection-Date'),
+            "mains-gas-flag": self.get_node_value('Mains-Gas'),
+            "co2-emiss-curr-per-floor-area": self.get_energy_assessment_value('CO2-Emissions-Current-Per-Floor-Area'),
+            # TODO: Not included in the xml for houses - need an example of flats
+            # "heat-loss-corridor": self.get_node_value('Heat-Loss-Perimeter'),
+            # TODO: Need an example of flats
+            # "flat-storey-count": self.get_node_value('Flat-Storey-Count'),
+            "roof-energy-eff": self.RATINGS_MAP[
+                self.get_property_summary_value('Roof', 'Energy-Efficiency-Rating')
+            ],
+            "total-floor-area": self.get_node_value('Total-Floor-Area'),
+            "environment-impact-current": self.get_energy_assessment_value('Environmental-Impact-Current'),
+            "roof-description": self.get_property_summary_value('Roof', 'Description'),
+            "floor-energy-eff": self.RATINGS_MAP[
+                self.get_property_summary_value('Floor', 'Energy-Efficiency-Rating')
+            ],
+            "number-habitable-rooms": self.get_node_value('Habitable-Room-Count'),
+            "hot-water-env-eff": self.RATINGS_MAP[
+                self.get_property_summary_value('Hot-Water', 'Environmental-Efficiency-Rating')
+            ],
+            "mainheatc-energy-eff": self.RATINGS_MAP[
+                self.get_property_summary_value('Main-Heating-Controls', 'Energy-Efficiency-Rating')
+            ],
+            "main-fuel": self.FUEL_TYPE_MAP[self.get_node_value('Main-Fuel-Type')],
+            "lighting-env-eff": self.RATINGS_MAP[
+                self.get_property_summary_value('Lighting', 'Environmental-Efficiency-Rating')
+            ],
+            "windows-energy-eff": self.RATINGS_MAP[
+                self.get_property_summary_value('Window', 'Energy-Efficiency-Rating')
+            ],
+            "floor-env-eff": self.RATINGS_MAP[
+                self.get_property_summary_value('Floor', 'Environmental-Efficiency-Rating')
+            ],
+            "sheating-env-eff": self.RATINGS_MAP[
+                self.get_property_summary_value('Secondary-Heating', 'Environmental-Efficiency-Rating')
+            ],
+            "lighting_description": self.get_property_summary_value('Lighting', 'Description'),
+            "roof-env-eff": self.RATINGS_MAP[
+                self.get_property_summary_value('Roof', 'Environmental-Efficiency-Rating')
+            ],
+            "walls-energy-eff": self.RATINGS_MAP[
+                self.get_property_summary_value('Wall', 'Energy-Efficiency-Rating')
+            ],
+            "photo-supply": self.get_photo_supply(),
+            "lighting-cost-potential": self.get_energy_assessment_value('Lighting-Cost-Potential'),
+            "mainheat-env-eff": self.RATINGS_MAP[
+                self.get_property_summary_value('Main-Heating', 'Environmental-Efficiency-Rating')
+            ],
+            "multi-glaze-proportion": self.get_node_value('Multiple-Glazed-Proportion'),
+            "main-heating-controls": self.get_property_summary_value('Main-Heating-Controls', 'Description'),
+            # TODO: NEdd an example of flats
+            # "flat-top-storey": self.get_node_value('Flat-Top-Storey'),
+            "secondheat-description": self.get_property_summary_value('Secondary-Heating', 'Description'),
+            "walls-env-eff": self.RATINGS_MAP[
+                self.get_property_summary_value('Wall', 'Environmental-Efficiency-Rating')
+            ],
+            "transaction-type": self.TRANSACTION_TYPE_MAP[self.get_node_value('Transaction-Type')],
+            "extension-count": self.get_node_value('Extensions-Count'),
+            "mainheatc-env-eff": self.RATINGS_MAP[
+                self.get_property_summary_value('Main-Heating-Controls', 'Environmental-Efficiency-Rating')
+            ],
+            "lmk-key": "",  # Doesn't exist for non-EPC xmls
+            "wind-turbines-count": self.get_node_value('Wind-Turbines-Count'),
+            "tenure": self.TENURE_MAP[self.get_node_value('Tenure')],
+            # TODO: Need an example of flats
+            # "floor-level": self.get_node_value('Floor-Level'),
+            "potential-energy-efficiency": self.get_energy_assessment_value('Energy-Rating-Potential'),
+            "hot-water-energy-eff": self.RATINGS_MAP[
+                self.get_property_summary_value('Hot-Water', 'Energy-Efficiency-Rating')
+            ],
+            "low-energy-lighting": self.get_node_value('Low-Energy-Lighting'),
+            "walls-description": self.get_property_summary_value('Wall', 'Description'),
+            "hotwater-description": self.get_property_summary_value('Hot-Water', 'Description'),
+        }
+
+    def get_node_value(self, tag_name):
+        nodes = self.xml.getElementsByTagName(tag_name)
+        if nodes and nodes[0].firstChild:
+            return nodes[0].firstChild.nodeValue
+        return None
+
+    def get_node_value_from_floor_dimensions(self, tag_name):
+        nodes = self.xml.getElementsByTagName('SAP-Floor-Dimension')
+        if nodes:
+            tag = nodes[0].getElementsByTagName(tag_name)
+            if tag and tag[0].firstChild:
+                return tag[0].firstChild.nodeValue
+        return None
+
+    def get_property_summary_value(self, section, tag_name):
+        nodes = self.xml.getElementsByTagName('Property-Summary')[0].getElementsByTagName(section)
+        if nodes:
+            tag = nodes[0].getElementsByTagName(tag_name)
+            if tag and tag[0].firstChild:
+                return tag[0].firstChild.nodeValue
+        return None
+
+    def get_energy_assessment_value(self, tag_name):
+        nodes = self.xml.getElementsByTagName('Energy-Assessment')[0]
+        if nodes:
+            tag = nodes.getElementsByTagName(tag_name)
+            if tag and tag[0].firstChild:
+                return tag[0].firstChild.nodeValue
+        return None
+
     def get_uprn(self, uprn):
 
         if uprn is not None:
@@ -253,9 +428,14 @@ class XmlParser:
         self.heating_cost_current = self.xml.getElementsByTagName('Heating-Cost-Current')[0].firstChild.nodeValue
         self.hot_water_cost_current = self.xml.getElementsByTagName('Hot-Water-Cost-Current')[0].firstChild.nodeValue
         self.lighting_cost_current = self.xml.getElementsByTagName('Lighting-Cost-Current')[0].firstChild.nodeValue
+
+        # Energy consumption
         self.energy_consumption_current = (
             self.xml.getElementsByTagName("Energy-Consumption-Current")[0].firstChild.nodeValue
         )
+        self.energy_consumption_potential = (
+            self.xml.getElementsByTagName("Energy-Consumption-Potential")[0].firstChild.nodeValue
+        )
 
     def get_detailed_heating_specs(self):
         """
@@ -457,11 +637,17 @@ class XmlParser:
         )
 
     def get_photo_supply(self):
-        self.photo_supply = float(
-            self.xml.getElementsByTagName('Photovoltaic-Supply')[0]
-            .getElementsByTagName('Percent-Roof-Area')[0]
-            .firstChild.nodeValue
-        )
+        photo_supply_tag = self.xml.getElementsByTagName("Photovoltaic-Supply")[0]
+        # Check if the "None-Or-No-Details" tag is present
+        if photo_supply_tag.getElementsByTagName("None-Or-No-Details"):
+            return (
+                photo_supply_tag.
+                getElementsByTagName("None-Or-No-Details")[0].
+                getElementsByTagName("Percent-Roof-Area")[0].
+                firstChild.nodeValue
+            )
+        else:
+            raise NotImplementedError("Implement me")
 
     def get_assessor_details(self):
 

From 9a343db93bcc66aa15142d1e839b33e90a672349 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 25 Jul 2024 12:25:53 +0100
Subject: [PATCH 003/182] handled flats for the moment

---
 etl/xml_survey_extraction/XmlParser.py | 33 ++++++++++++++++----------
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py
index 973ea5e8..dccc0a9f 100644
--- a/etl/xml_survey_extraction/XmlParser.py
+++ b/etl/xml_survey_extraction/XmlParser.py
@@ -198,7 +198,6 @@ class XmlParser:
             # This file type contains just limited information compared to a regular EPR/EPC, and so we just exit
             # unless we learn something else that determines that we need information from this file
             return
-        self.get_property_type()
         self.get_sap()
         self.get_property_address()
         self.get_dates()
@@ -225,9 +224,20 @@ class XmlParser:
         self.extract_epc()
 
     def extract_epc(self):
-        # Property Summary
-        low_energy_fixed_light_count = None
-        construction_age_band = None
+
+        property_type = self.get_property_type()
+
+        if property_type == "Flat":
+            raise NotImplementedError(
+                "Need to handle: heat-loss-corridor, unheated-corridor-length, flat-storey-count, flat-top-storey, "
+                "floor-level"
+            )
+        heat_loss_corridor = "NO DATA!"
+        unheated_corridor_length = ""
+        flat_storey_count = ""
+        flat_top_storey = ""
+        floor_level = "NO DATA!"
+
         self.epc = {
             "low-energy-fixed-light-count": self.get_node_value('Low-Energy-Fixed-Lighting-Outlets-Count'),
             # TODO: Needs to be done more carefully
@@ -269,10 +279,9 @@ class XmlParser:
             "inspection-date": self.get_node_value('Inspection-Date'),
             "mains-gas-flag": self.get_node_value('Mains-Gas'),
             "co2-emiss-curr-per-floor-area": self.get_energy_assessment_value('CO2-Emissions-Current-Per-Floor-Area'),
-            # TODO: Not included in the xml for houses - need an example of flats
-            # "heat-loss-corridor": self.get_node_value('Heat-Loss-Perimeter'),
-            # TODO: Need an example of flats
-            # "flat-storey-count": self.get_node_value('Flat-Storey-Count'),
+            "heat-loss-corridor": heat_loss_corridor,
+            "unheated-corridor-length": unheated_corridor_length,
+            "flat-storey-count": flat_storey_count,
             "roof-energy-eff": self.RATINGS_MAP[
                 self.get_property_summary_value('Roof', 'Energy-Efficiency-Rating')
             ],
@@ -316,8 +325,7 @@ class XmlParser:
             ],
             "multi-glaze-proportion": self.get_node_value('Multiple-Glazed-Proportion'),
             "main-heating-controls": self.get_property_summary_value('Main-Heating-Controls', 'Description'),
-            # TODO: NEdd an example of flats
-            # "flat-top-storey": self.get_node_value('Flat-Top-Storey'),
+            "flat-top-storey": flat_top_storey,
             "secondheat-description": self.get_property_summary_value('Secondary-Heating', 'Description'),
             "walls-env-eff": self.RATINGS_MAP[
                 self.get_property_summary_value('Wall', 'Environmental-Efficiency-Rating')
@@ -330,8 +338,7 @@ class XmlParser:
             "lmk-key": "",  # Doesn't exist for non-EPC xmls
             "wind-turbines-count": self.get_node_value('Wind-Turbines-Count'),
             "tenure": self.TENURE_MAP[self.get_node_value('Tenure')],
-            # TODO: Need an example of flats
-            # "floor-level": self.get_node_value('Floor-Level'),
+            "floor-level": floor_level,
             "potential-energy-efficiency": self.get_energy_assessment_value('Energy-Rating-Potential'),
             "hot-water-energy-eff": self.RATINGS_MAP[
                 self.get_property_summary_value('Hot-Water', 'Energy-Efficiency-Rating')
@@ -397,7 +404,7 @@ class XmlParser:
         if not property_type:
             property_type = self.xml.getElementsByTagName('PropertyType1')
 
-        self.property_type = PROPERTY_TYPE_LOOKUP[property_type[0].firstChild.nodeValue]
+        return PROPERTY_TYPE_LOOKUP[property_type[0].firstChild.nodeValue]
 
     def get_sap(self):
         sap_score = self.xml.getElementsByTagName('Energy-Rating-Current')

From d3e7c60009fabf486999f26fa9b39a7f87479586 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 25 Jul 2024 12:47:41 +0100
Subject: [PATCH 004/182] Added missing variables

---
 etl/xml_survey_extraction/XmlParser.py | 63 +++++++++++++-------------
 1 file changed, 32 insertions(+), 31 deletions(-)

diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py
index dccc0a9f..ef18c6db 100644
--- a/etl/xml_survey_extraction/XmlParser.py
+++ b/etl/xml_survey_extraction/XmlParser.py
@@ -198,9 +198,6 @@ class XmlParser:
             # This file type contains just limited information compared to a regular EPR/EPC, and so we just exit
             # unless we learn something else that determines that we need information from this file
             return
-        self.get_sap()
-        self.get_property_address()
-        self.get_dates()
         self.get_assessor_details()
 
         self.get_heating_and_emissions_data()
@@ -239,6 +236,10 @@ class XmlParser:
         floor_level = "NO DATA!"
 
         self.epc = {
+            "uprn": self.uprn,
+            "property-type": property_type,
+            **self.get_sap(),
+            **self.get_property_address(),
             "low-energy-fixed-light-count": self.get_node_value('Low-Energy-Fixed-Lighting-Outlets-Count'),
             # TODO: Needs to be done more carefully
             # "floor-height" = self.get_node_value_from_floor_dimensions('Room-Height'),
@@ -260,9 +261,8 @@ class XmlParser:
             "sheating-energy-eff": self.RATINGS_MAP[
                 self.get_property_summary_value('Secondary-Heating', 'Energy-Efficiency-Rating'),
             ],
-            # TODO: Doesn't seem to be included in the xml
-            # "local-authority": self.get_node_value('Local-Authority'),
-            "local-authority-label": self.get_node_value('Local-Authority-Label'),
+            "local-authority": "",  # Not included in the xml
+            "local-authority-label": "",
             "fixed-lighting-outlets-count": self.get_node_value('Fixed-Lighting-Outlets-Count'),
             # TODO: Doesn't seem to be included in the xml
             # "energy-tariff": self.get_node_value('Energy-Tariff'),
@@ -346,6 +346,13 @@ class XmlParser:
             "low-energy-lighting": self.get_node_value('Low-Energy-Lighting'),
             "walls-description": self.get_property_summary_value('Wall', 'Description'),
             "hotwater-description": self.get_property_summary_value('Hot-Water', 'Description'),
+            "co2-emissions-current": self.get_node_value('CO2-Emissions-Current'),
+            "heating-cost-current": self.get_node_value('Heating-Cost-Current'),
+            "hot-water-cost-current": self.get_node_value('Hot-Water-Cost-Current'),
+            "lighting-cost-current": self.get_node_value('Lighting-Cost-Current'),
+            "energy-consumption-current": self.get_node_value('Energy-Consumption-Current'),
+            "lodgement-date": self.get_node_value('Inspection-Date'),
+
         }
 
     def get_node_value(self, tag_name):
@@ -410,8 +417,11 @@ class XmlParser:
         sap_score = self.xml.getElementsByTagName('Energy-Rating-Current')
         sap_score = int(sap_score[0].firstChild.nodeValue)
         epc_rating = sap_to_epc(sap_score)
-        self.current_energy_efficiency = str(sap_score)
-        self.current_energy_rating = epc_rating
+
+        return {
+            "current-energy-efficiency": str(sap_score),
+            "current-energy-rating": epc_rating
+        }
 
     def get_heating_and_emissions_data(self):
         """
@@ -431,19 +441,6 @@ class XmlParser:
 
         self.water_heating_kwh = self.xml.getElementsByTagName('Water-Heating')[0].firstChild.nodeValue
 
-        self.co2_emissions_current = self.xml.getElementsByTagName('CO2-Emissions-Current')[0].firstChild.nodeValue
-        self.heating_cost_current = self.xml.getElementsByTagName('Heating-Cost-Current')[0].firstChild.nodeValue
-        self.hot_water_cost_current = self.xml.getElementsByTagName('Hot-Water-Cost-Current')[0].firstChild.nodeValue
-        self.lighting_cost_current = self.xml.getElementsByTagName('Lighting-Cost-Current')[0].firstChild.nodeValue
-
-        # Energy consumption
-        self.energy_consumption_current = (
-            self.xml.getElementsByTagName("Energy-Consumption-Current")[0].firstChild.nodeValue
-        )
-        self.energy_consumption_potential = (
-            self.xml.getElementsByTagName("Energy-Consumption-Potential")[0].firstChild.nodeValue
-        )
-
     def get_detailed_heating_specs(self):
         """
         Given the heating data that is found in the <SAP-Heating> tag, we extract the detailed about the heating
@@ -668,19 +665,23 @@ class XmlParser:
 
         property_tag = self.xml.getElementsByTagName("Property")[0]
 
-        self.address1 = self.get_node(property_tag.getElementsByTagName("Address-Line-1")[0])
-        self.address2 = self.get_node(property_tag.getElementsByTagName("Address-Line-2")[0])
-        self.address3 = self.get_node(property_tag.getElementsByTagName("Address-Line-3")[0])
-        self.posttown = self.get_node(property_tag.getElementsByTagName("Post-Town")[0])
-        self.postcode = self.get_node(property_tag.getElementsByTagName("Postcode")[0])
-        self.address = ", ".join(
+        address1 = self.get_node(property_tag.getElementsByTagName("Address-Line-1")[0])
+        address2 = self.get_node(property_tag.getElementsByTagName("Address-Line-2")[0])
+        address3 = self.get_node(property_tag.getElementsByTagName("Address-Line-3")[0])
+        posttown = self.get_node(property_tag.getElementsByTagName("Post-Town")[0])
+        postcode = self.get_node(property_tag.getElementsByTagName("Postcode")[0])
+        address = ", ".join(
             [x for x in [self.address1, self.address2, self.address3, self.posttown, self.postcode] if x is not None]
         )
 
-    def get_dates(self):
-        self.survey_date = (
-            self.xml.getElementsByTagName("Inspection-Date")[0].firstChild.nodeValue
-        )
+        return {
+            "address1": address1,
+            "address2": address2,
+            "address3": address3,
+            "posttown": posttown,
+            "postcode": postcode,
+            "address": address
+        }
 
     def get_property_dimensions(self):
         """

From a32f479e10634983fe578247bb4f4bbb1e419c9a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 25 Jul 2024 12:50:13 +0100
Subject: [PATCH 005/182] removed basic attributes that are in the epc

---
 etl/xml_survey_extraction/XmlParser.py | 160 -------------------------
 1 file changed, 160 deletions(-)

diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py
index ef18c6db..4ca1eb50 100644
--- a/etl/xml_survey_extraction/XmlParser.py
+++ b/etl/xml_survey_extraction/XmlParser.py
@@ -205,13 +205,6 @@ class XmlParser:
         self.get_detailed_heating_specs()
 
         # Building fabric
-        self.get_walls()
-        self.get_roof()
-        self.get_floor()
-        self.get_windows()
-        self.get_heating()
-        self.get_hot_water()
-        self.get_lighting()
         self.get_doors()
 
         # Property dimensions
@@ -472,159 +465,6 @@ class XmlParser:
         self.heating_system = heating_system
         self.heating_controls = heating_controls
 
-    def get_walls(self):
-
-        wall_xml_data = self.xml.getElementsByTagName('Property-Summary')[0].getElementsByTagName('Wall')[0]
-
-        self.walls_description = (
-            wall_xml_data
-            .getElementsByTagName("Description")[0]
-            .firstChild.nodeValue
-        )
-
-        self.walls_energy_rating = (
-            wall_xml_data
-            .getElementsByTagName("Energy-Efficiency-Rating")[0]
-            .firstChild.nodeValue
-        )
-
-        is_cavity = "cavity wall" in self.walls_description.lower()
-        is_empty = "no insulation" in self.walls_description.lower()
-        is_partial = "partial insulation" in self.walls_description.lower()
-
-        if not is_cavity:
-            self.walls_classification = "NON CAVITY"
-            return
-
-        if is_empty:
-            self.walls_classification = "EMPTY"
-            return
-
-        if is_partial:
-            self.walls_classification = "PARTIAL"
-            return
-
-        if is_cavity and not is_empty and not is_partial:
-            self.walls_classification = "FULL"
-            return
-
-        raise NotImplementedError("Implement me")
-
-    def get_roof(self):
-
-        room_xml_data = self.xml.getElementsByTagName('Property-Summary')[0].getElementsByTagName('Roof')[0]
-
-        self.roof_description = (
-            room_xml_data
-            .getElementsByTagName("Description")[0]
-            .firstChild.nodeValue
-        )
-
-        self.roof_energy_rating = (
-            room_xml_data
-            .getElementsByTagName("Energy-Efficiency-Rating")[0]
-            .firstChild.nodeValue
-        )
-
-        loft_recommendation_tag = self.xml.getElementsByTagName("Impact-Of-Loft-Insulation")
-        description_contains_loft = "loft" in self.roof_description.lower()
-
-        if not loft_recommendation_tag and not description_contains_loft:
-            self.is_loft = "No"
-            return
-
-        self.is_loft = "Yes"
-        return
-
-    def get_floor(self):
-
-        floor_xml_data = self.xml.getElementsByTagName('Property-Summary')[0].getElementsByTagName('Floor')[0]
-
-        self.floor_description = (
-            floor_xml_data
-            .getElementsByTagName("Description")[0]
-            .firstChild.nodeValue
-        )
-
-        self.floor_energy_rating = (
-            floor_xml_data
-            .getElementsByTagName("Energy-Efficiency-Rating")[0]
-            .firstChild.nodeValue
-        )
-
-    def get_windows(self):
-
-        windows_xml_data = self.xml.getElementsByTagName('Property-Summary')[0].getElementsByTagName('Window')[0]
-
-        self.windows_description = (
-            windows_xml_data
-            .getElementsByTagName("Description")[0]
-            .firstChild.nodeValue
-        )
-
-        self.windows_energy_rating = (
-            windows_xml_data
-            .getElementsByTagName("Energy-Efficiency-Rating")[0]
-            .firstChild.nodeValue
-        )
-
-    def get_heating(self):
-        """
-        This function will retrieve the main heating and the main heating controls
-        :return:
-        """
-        mainheating_xml_data = self.xml.getElementsByTagName('Main-Heating')[0]
-
-        self.main_heating_description = (
-            mainheating_xml_data.getElementsByTagName('Description')[0].firstChild.nodeValue
-        )
-
-        self.main_heating_energy_rating = (
-            mainheating_xml_data.getElementsByTagName('Energy-Efficiency-Rating')[0].firstChild.nodeValue
-        )
-
-        mainheating_controls_xml_data = self.xml.getElementsByTagName('Main-Heating-Controls')[0]
-
-        self.main_heating_controls_description = (
-            mainheating_controls_xml_data.getElementsByTagName('Description')[0].firstChild.nodeValue
-        )
-
-        self.main_heating_controls_energy_rating = (
-            mainheating_controls_xml_data.getElementsByTagName('Energy-Efficiency-Rating')[0].firstChild.nodeValue
-        )
-
-        second_heating_xml_data = self.xml.getElementsByTagName('Secondary-Heating')[0]
-
-        self.second_heating_description = (
-            second_heating_xml_data.getElementsByTagName('Description')[0].firstChild.nodeValue
-        )
-
-        self.second_heating_energy_rating = (
-            second_heating_xml_data.getElementsByTagName('Energy-Efficiency-Rating')[0].firstChild.nodeValue
-        )
-
-    def get_hot_water(self):
-        hot_water_xml_data = self.xml.getElementsByTagName('Hot-Water')[0]
-
-        self.hot_water_description = (
-            hot_water_xml_data.getElementsByTagName('Description')[0].firstChild.nodeValue
-        )
-
-        self.hot_water_energy_rating = (
-            hot_water_xml_data.getElementsByTagName('Energy-Efficiency-Rating')[0].firstChild.nodeValue
-        )
-
-    def get_lighting(self):
-        lighting_xml_data = self.xml.getElementsByTagName('Lighting')[0]
-
-        self.lighting_description = (
-            lighting_xml_data.getElementsByTagName('Description')[0].firstChild.nodeValue
-        )
-
-        self.lighting_energy_rating = (
-            lighting_xml_data.getElementsByTagName('Energy-Efficiency-Rating')[0].firstChild.nodeValue
-        )
-
     def get_doors(self):
 
         # Doors can be found in the SAP-Property-Details tag

From 445b76d50a2277d21d92ef77d9ec657d5b0b7531 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 25 Jul 2024 14:46:11 +0100
Subject: [PATCH 006/182] Added full extraction of floor dimensions

---
 etl/xml_survey_extraction/XmlParser.py | 69 ++++++++++++++++++++++++--
 etl/xml_survey_extraction/app.py       |  3 ++
 2 files changed, 67 insertions(+), 5 deletions(-)

diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py
index 4ca1eb50..7f317f29 100644
--- a/etl/xml_survey_extraction/XmlParser.py
+++ b/etl/xml_survey_extraction/XmlParser.py
@@ -1,5 +1,6 @@
 import re
 import usaddress
+from datetime import datetime
 from xml.dom.minidom import parseString
 from backend.app.utils import sap_to_epc
 from etl.xml_survey_extraction.pcdb import heating_data
@@ -119,6 +120,8 @@ class XmlParser:
     floor_height = None
     insulation_wall_area = None
 
+    floor_dimensions = None
+
     rrn = None
 
     database_data = None
@@ -230,7 +233,9 @@ class XmlParser:
 
         self.epc = {
             "uprn": self.uprn,
+            "uprn-source": "Address Matched",
             "property-type": property_type,
+            "building-reference-number": "",
             **self.get_sap(),
             **self.get_property_address(),
             "low-energy-fixed-light-count": self.get_node_value('Low-Energy-Fixed-Lighting-Outlets-Count'),
@@ -252,7 +257,7 @@ class XmlParser:
             "mainheatcont-description":
                 self.get_property_summary_value('Main-Heating-Controls', 'Description'),
             "sheating-energy-eff": self.RATINGS_MAP[
-                self.get_property_summary_value('Secondary-Heating', 'Energy-Efficiency-Rating'),
+                self.get_property_summary_value('Secondary-Heating', 'Energy-Efficiency-Rating')
             ],
             "local-authority": "",  # Not included in the xml
             "local-authority-label": "",
@@ -304,7 +309,7 @@ class XmlParser:
             "sheating-env-eff": self.RATINGS_MAP[
                 self.get_property_summary_value('Secondary-Heating', 'Environmental-Efficiency-Rating')
             ],
-            "lighting_description": self.get_property_summary_value('Lighting', 'Description'),
+            "lighting-description": self.get_property_summary_value('Lighting', 'Description'),
             "roof-env-eff": self.RATINGS_MAP[
                 self.get_property_summary_value('Roof', 'Environmental-Efficiency-Rating')
             ],
@@ -329,10 +334,11 @@ class XmlParser:
                 self.get_property_summary_value('Main-Heating-Controls', 'Environmental-Efficiency-Rating')
             ],
             "lmk-key": "",  # Doesn't exist for non-EPC xmls
-            "wind-turbines-count": self.get_node_value('Wind-Turbines-Count'),
+            "wind-turbine-count": self.get_node_value('Wind-Turbines-Count'),
             "tenure": self.TENURE_MAP[self.get_node_value('Tenure')],
             "floor-level": floor_level,
             "potential-energy-efficiency": self.get_energy_assessment_value('Energy-Rating-Potential'),
+            "potentual-energy-rating": sap_to_epc(float(self.get_energy_assessment_value('Energy-Rating-Potential'))),
             "hot-water-energy-eff": self.RATINGS_MAP[
                 self.get_property_summary_value('Hot-Water', 'Energy-Efficiency-Rating')
             ],
@@ -341,10 +347,15 @@ class XmlParser:
             "hotwater-description": self.get_property_summary_value('Hot-Water', 'Description'),
             "co2-emissions-current": self.get_node_value('CO2-Emissions-Current'),
             "heating-cost-current": self.get_node_value('Heating-Cost-Current'),
+            "heating-cost-potential": self.get_energy_assessment_value('Heating-Cost-Potential'),
             "hot-water-cost-current": self.get_node_value('Hot-Water-Cost-Current'),
+            "hot-water-cost-potential": self.get_energy_assessment_value('Hot-Water-Cost-Potential'),
             "lighting-cost-current": self.get_node_value('Lighting-Cost-Current'),
             "energy-consumption-current": self.get_node_value('Energy-Consumption-Current'),
             "lodgement-date": self.get_node_value('Inspection-Date'),
+            "lodgement-datetime":
+                datetime.strptime(self.get_node_value('Inspection-Date'), "%Y-%m-%d").isoformat(),
+            "mainheat-description": self.get_property_summary_value('Main-Heating', 'Description'),
 
         }
 
@@ -511,8 +522,15 @@ class XmlParser:
         posttown = self.get_node(property_tag.getElementsByTagName("Post-Town")[0])
         postcode = self.get_node(property_tag.getElementsByTagName("Postcode")[0])
         address = ", ".join(
-            [x for x in [self.address1, self.address2, self.address3, self.posttown, self.postcode] if x is not None]
+            [x for x in [address1, address2, address3] if x is not None]
         )
+        county = property_tag.getElementsByTagName("County")
+        if county:
+            county = county[0].firstChild.nodeValue
+
+        # Seems to be unavailable in the xml
+        constituency = None
+        constituency_label = None
 
         return {
             "address1": address1,
@@ -520,7 +538,10 @@ class XmlParser:
             "address3": address3,
             "posttown": posttown,
             "postcode": postcode,
-            "address": address
+            "address": address,
+            "county": county,
+            "constituency": constituency,
+            "constituency-label": constituency_label
         }
 
     def get_property_dimensions(self):
@@ -572,3 +593,41 @@ class XmlParser:
 
         self.insulation_wall_area = self.heat_loss_perimeter * self.floor_height * self.INSULATION_WALL_AREA_FACTOR
         self.perimeter = self.heat_loss_perimeter + self.party_wall_length
+
+    def get_floor_dimensions(self):
+
+        """
+        Extracts physical measurements of the property such as the floor area, room height, etc.
+        across the main dwelling and any extensions.
+        :return:
+        """
+
+        def get_part_value(node, tag_name):
+            element = node.getElementsByTagName(tag_name)
+            if element and element[0].firstChild:
+                return element[0].firstChild.nodeValue
+            return None
+
+        # Each part will correspond to the main
+        sap_building_parts = self.xml.getElementsByTagName("SAP-Building-Part")
+
+        floor_dimensions = []
+        for building_part in sap_building_parts:
+            building_part_identifier = building_part.getElementsByTagName("Identifier")[0].firstChild.nodeValue
+            sap_floor_dimensions = building_part.getElementsByTagName("SAP-Floor-Dimension")
+
+            data = [
+                {
+                    'building_part_identifier': building_part_identifier,
+                    'floor': get_part_value(floor_dimension, 'Floor'),
+                    'floor_construction': get_part_value(floor_dimension, 'Floor-Construction'),
+                    'floor_insulation': get_part_value(floor_dimension, 'Floor-Insulation'),
+                    'heat_loss-perimeter': get_part_value(floor_dimension, 'Heat-Loss-Perimeter'),
+                    'party_wall-length': get_part_value(floor_dimension, 'Party-Wall-Length'),
+                    'total_floor-area': get_part_value(floor_dimension, 'Total-Floor-Area'),
+                    'room_height': get_part_value(floor_dimension, 'Room-Height')
+                } for floor_dimension in sap_floor_dimensions
+            ]
+            floor_dimensions.extend(data)
+
+        self.floor_dimensions = floor_dimensions
diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py
index 9bcbb168..c70097d4 100644
--- a/etl/xml_survey_extraction/app.py
+++ b/etl/xml_survey_extraction/app.py
@@ -9,6 +9,7 @@ logger = setup_logger()
 SURVEYORS = "JAFFERSONS ENERGY CONSULTANTS"
 PROJECT_CODE = "VDE001"
 BUCKET = "retrofit-energy-assessments-dev"
+PORTFOLIO_ID = None
 
 
 def main():
@@ -48,3 +49,5 @@ def main():
             xml_parser = XmlParser(file=xml_data_io, filekey=xml, uprn=uprn)
             xml_parser.run()
             logger.info(f"Extracted data from {xml}")
+
+    # TODO: Set a portfolio ID, Target and Automatically upload the asset list and create the event for the portfolio

From eaa1c3bca4e97c88b1908a0ba329043ac9cfc0cd Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 25 Jul 2024 14:47:02 +0100
Subject: [PATCH 007/182] etracting floor dimensions

---
 etl/xml_survey_extraction/XmlParser.py | 52 --------------------------
 1 file changed, 52 deletions(-)

diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py
index 7f317f29..0d9dc512 100644
--- a/etl/xml_survey_extraction/XmlParser.py
+++ b/etl/xml_survey_extraction/XmlParser.py
@@ -44,70 +44,18 @@ def get_house_number(address: str) -> str | None:
 
 class XmlParser:
     uprn = None
-    property_type = None
-    current_energy_efficiency = None
-    current_energy_rating = None
 
     # heating/emissions information
     space_heating_kwh = None
     water_heating_kwh = None
-    co2_emissions_current = None
-    heating_cost_current = None
-    hot_water_cost_current = None
-    lighting_cost_current = None
-    energy_consumption_current = None
-    energy_consumption_potential = None
     heating_system = None
     heating_controls = None
 
     # Assessor details
     surveyor_name = None
 
-    # Addresses
-    address1 = None
-    address2 = None
-    address3 = None
-    posttown = None
-    postcode = None
-    address = None
-
-    # Dates
-    survey_date = None
-
-    # Building Fabric
-    # Walls
-    walls_description = None
-    walls_classification = None
-    walls_energy_rating = None
-    # Roof
-    roof_description = None
-    roof_energy_rating = None
-    is_loft = None
-    # Floor
-    floor_description = None
-    floor_energy_rating = None
-    # Windows
-    windows_description = None
-    windows_energy_rating = None
-    # main heating
-    main_heating_description = None
-    main_heating_energy_rating = None
-    # Heating controls
-    main_heating_controls_description = None
-    main_heating_controls_energy_rating = None
-    # Hot water
-    hot_water_description = None
-    hot_water_energy_rating = None
-    # Lighting
-    lighting_description = None
-    lighting_energy_rating = None
-    # Second Heating
-    second_heating_description = None
-    second_heating_energy_rating = None
-
     number_of_doors = None
     number_of_insulated_doors = None
-    photo_supply = None
 
     # Property dimensions
     number_of_floors = None

From a3c2ff06a8a2cf4317fc8e89285756fa5a49d398 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 25 Jul 2024 15:55:13 +0100
Subject: [PATCH 008/182] retrieved all epc fields

---
 etl/xml_survey_extraction/XmlParser.py | 74 ++++++++++++++++++++++++--
 etl/xml_survey_extraction/app.py       |  8 +++
 2 files changed, 78 insertions(+), 4 deletions(-)

diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py
index 0d9dc512..1533d4c7 100644
--- a/etl/xml_survey_extraction/XmlParser.py
+++ b/etl/xml_survey_extraction/XmlParser.py
@@ -1,4 +1,5 @@
 import re
+import numpy as np
 import usaddress
 from datetime import datetime
 from xml.dom.minidom import parseString
@@ -43,6 +44,7 @@ def get_house_number(address: str) -> str | None:
 
 
 class XmlParser:
+    epc = None
     uprn = None
 
     # heating/emissions information
@@ -56,6 +58,7 @@ class XmlParser:
 
     number_of_doors = None
     number_of_insulated_doors = None
+    windows = None
 
     # Property dimensions
     number_of_floors = None
@@ -153,7 +156,7 @@ class XmlParser:
 
         self.get_heating_and_emissions_data()
 
-        self.get_detailed_heating_specs()
+        # self.get_detailed_heating_specs()
 
         # Building fabric
         self.get_doors()
@@ -161,11 +164,21 @@ class XmlParser:
         # Property dimensions
         self.get_property_dimensions()
 
+        self.get_floor_dimensions()
+
+        self.get_windows()
+
         # Get all of the EPC data
         self.extract_epc()
 
     def extract_epc(self):
 
+        if self.floor_dimensions is None:
+            raise ValueError("Run get_floor_dimensions() first")
+
+        if self.windows is None:
+            raise ValueError("Run get_windows() first")
+
         property_type = self.get_property_type()
 
         if property_type == "Flat":
@@ -178,6 +191,15 @@ class XmlParser:
         flat_storey_count = ""
         flat_top_storey = ""
         floor_level = "NO DATA!"
+        energy_tariff = "NO DATA!"
+
+        floor_height = np.mean([
+            float(x['room_height']) for x in self.floor_dimensions if x['building_part_identifier'] == 'Main Dwelling'
+        ])
+
+        # Take the most prevelant glazing type
+        glazed_type = [w["glazing_type"] for w in self.windows if w['window_location'] == '0']
+        glazed_type = max(glazed_type, key=glazed_type.count)
 
         self.epc = {
             "uprn": self.uprn,
@@ -286,7 +308,7 @@ class XmlParser:
             "tenure": self.TENURE_MAP[self.get_node_value('Tenure')],
             "floor-level": floor_level,
             "potential-energy-efficiency": self.get_energy_assessment_value('Energy-Rating-Potential'),
-            "potentual-energy-rating": sap_to_epc(float(self.get_energy_assessment_value('Energy-Rating-Potential'))),
+            "potential-energy-rating": sap_to_epc(float(self.get_energy_assessment_value('Energy-Rating-Potential'))),
             "hot-water-energy-eff": self.RATINGS_MAP[
                 self.get_property_summary_value('Hot-Water', 'Energy-Efficiency-Rating')
             ],
@@ -304,7 +326,9 @@ class XmlParser:
             "lodgement-datetime":
                 datetime.strptime(self.get_node_value('Inspection-Date'), "%Y-%m-%d").isoformat(),
             "mainheat-description": self.get_property_summary_value('Main-Heating', 'Description'),
-
+            "floor-height": floor_height,
+            "glazed-type": glazed_type,
+            "energy-tariff": energy_tariff,
         }
 
     def get_node_value(self, tag_name):
@@ -405,7 +429,7 @@ class XmlParser:
             .getElementsByTagName("Main-Heating")[0]
         )
 
-        heating_code = sap_main_heating_details.getElementsByTagName("SAP-Main-Heating-Code")[0].firstChild.nodeValue
+        heating_code = sap_main_heating_details.getElementsByTagName("Main-Heating-Number")[0].firstChild.nodeValue
 
         # Get the heating system
         heating_system = heating_data[heating_data["code"] == int(heating_code)]["description"]
@@ -579,3 +603,45 @@ class XmlParser:
             floor_dimensions.extend(data)
 
         self.floor_dimensions = floor_dimensions
+
+    def get_windows(self):
+        """
+        Extracts data about the windows in the property, including the number of windows and the window type.
+        :return:
+        """
+
+        sap_windows = self.xml.getElementsByTagName("SAP-Windows")[0].getElementsByTagName("SAP-Window")
+
+        # This is the data in each sap window:
+        # <Window-Location>2</Window-Location>
+        #           <Window-Area quantity="square metres">1.55</Window-Area>
+        #           <Window-Type>1</Window-Type>
+        #           <Glazing-Type>3</Glazing-Type>
+        #           <PVC-Frame>true</PVC-Frame>
+        #           <Glazing-Gap>16+</Glazing-Gap>
+        #           <Orientation>7</Orientation>
+
+        glazing_type_lookup = {
+            "3": "double glazing, unknown install date"
+        }
+
+        orientation_lookup = {
+            "3": "East",
+            "5": "South",
+            "1": "North",
+            "7": "West",
+        }
+
+        self.windows = [
+            {
+                "window_location": window.getElementsByTagName("Window-Location")[0].firstChild.nodeValue,
+                "window_area": window.getElementsByTagName("Window-Area")[0].firstChild.nodeValue,
+                "window_type": window.getElementsByTagName("Window-Type")[0].firstChild.nodeValue,
+                "glazing_type": glazing_type_lookup[
+                    window.getElementsByTagName("Glazing-Type")[0].firstChild.nodeValue
+                ],
+                "pvc_frame": window.getElementsByTagName("PVC-Frame")[0].firstChild.nodeValue,
+                "glazing_gap": window.getElementsByTagName("Glazing-Gap")[0].firstChild.nodeValue,
+                "orientation": orientation_lookup[window.getElementsByTagName("Orientation")[0].firstChild.nodeValue]
+            } for window in sap_windows
+        ]
diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py
index c70097d4..c32bd787 100644
--- a/etl/xml_survey_extraction/app.py
+++ b/etl/xml_survey_extraction/app.py
@@ -1,3 +1,5 @@
+import pandas as pd
+
 from utils.s3 import read_from_s3, list_files_and_subfolders_in_s3_folder, list_xmls_in_s3_folder
 from utils.logger import setup_logger
 from etl.xml_survey_extraction.XmlParser import XmlParser
@@ -51,3 +53,9 @@ def main():
             logger.info(f"Extracted data from {xml}")
 
     # TODO: Set a portfolio ID, Target and Automatically upload the asset list and create the event for the portfolio
+
+    # TODO: In order to get the full data associated to the heating system, we need to download and parse the pcdb which
+    #       can be found here: https://www.ncm-pcdb.org.uk/pcdb/pcdb10.dat
+    #                          https://www.ncm-pcdb.org.uk/sap/download
+    #       However retrieving this data is not a priority, so we can leave this for now as parsing the database
+    #       is a non-trivial task

From bc84ed2c2a95dad1926e632bee8e9e6406f7e115 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 25 Jul 2024 18:04:09 +0100
Subject: [PATCH 009/182] cleaning up epc data and adding additional:

---
 etl/xml_survey_extraction/XmlParser.py | 184 ++++++++++++++-----------
 etl/xml_survey_extraction/app.py       |   2 +-
 2 files changed, 101 insertions(+), 85 deletions(-)

diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py
index 1533d4c7..53f7e859 100644
--- a/etl/xml_survey_extraction/XmlParser.py
+++ b/etl/xml_survey_extraction/XmlParser.py
@@ -45,6 +45,7 @@ def get_house_number(address: str) -> str | None:
 
 class XmlParser:
     epc = None
+    additional_data = None
     uprn = None
 
     # heating/emissions information
@@ -66,20 +67,11 @@ class XmlParser:
     heat_loss_perimeter = None
     party_wall_length = None
     total_floor_area = None
-    ground_floor_area = None
-    is_there_party_wall = None
     floor_height = None
     insulation_wall_area = None
 
     floor_dimensions = None
 
-    rrn = None
-
-    database_data = None
-
-    # We assume that the insulation wall area is 85% of the total wall area, as a standard estimate
-    INSULATION_WALL_AREA_FACTOR = 0.85
-
     # The value of the URPN tells us about the file type that we're parsing
     UPRN_FILETYPE_MAP = {
         0: "EPR",
@@ -119,6 +111,10 @@ class XmlParser:
         '1': "Owner-occupied"
     }
 
+    TARIFF_MAP = {
+        "2": "Single"
+    }
+
     def __init__(self, file, filekey, uprn=None):
         file.seek(0)  # Ensure the file pointer is at the beginning
         xml_string = file.read().decode('utf-8')
@@ -161,9 +157,6 @@ class XmlParser:
         # Building fabric
         self.get_doors()
 
-        # Property dimensions
-        self.get_property_dimensions()
-
         self.get_floor_dimensions()
 
         self.get_windows()
@@ -171,6 +164,9 @@ class XmlParser:
         # Get all of the EPC data
         self.extract_epc()
 
+        # Put together all of the additional data we capture
+        self.extract_additional_data()
+
     def extract_epc(self):
 
         if self.floor_dimensions is None:
@@ -191,16 +187,23 @@ class XmlParser:
         flat_storey_count = ""
         flat_top_storey = ""
         floor_level = "NO DATA!"
-        energy_tariff = "NO DATA!"
 
         floor_height = np.mean([
-            float(x['room_height']) for x in self.floor_dimensions if x['building_part_identifier'] == 'Main Dwelling'
+            float(x['room_height']) for x in self.floor_dimensions if
+            x['building_part_identifier'] == 'Main Dwelling' and not x['room_roof']
         ])
 
         # Take the most prevelant glazing type
         glazed_type = [w["glazing_type"] for w in self.windows if w['window_location'] == '0']
         glazed_type = max(glazed_type, key=glazed_type.count)
 
+        energy_tariff = (
+            self.xml.getElementsByTagName("SAP-Energy-Source")[0]
+            .getElementsByTagName("Meter-Type")[0]
+            .firstChild.nodeValue
+        )
+        energy_tariff = self.TARIFF_MAP[energy_tariff]
+
         self.epc = {
             "uprn": self.uprn,
             "uprn-source": "Address Matched",
@@ -209,8 +212,6 @@ class XmlParser:
             **self.get_sap(),
             **self.get_property_address(),
             "low-energy-fixed-light-count": self.get_node_value('Low-Energy-Fixed-Lighting-Outlets-Count'),
-            # TODO: Needs to be done more carefully
-            # "floor-height" = self.get_node_value_from_floor_dimensions('Room-Height'),
             "construction-age-band": self.get_node_value('Construction-Age-Band'),
             "mainheat-energy-eff": self.RATINGS_MAP[
                 self.get_property_summary_value('Main-Heating', 'Energy-Efficiency-Rating')
@@ -222,8 +223,6 @@ class XmlParser:
                 self.get_property_summary_value('Lighting', 'Energy-Efficiency-Rating')
             ],
             "environment-impact-potential": self.get_energy_assessment_value('Environmental-Impact-Potential'),
-            # TODO: Needs to be done more careully since we have multiple windows
-            # "glazed-type": self.get_node_value('Glazing-Type'),
             "mainheatcont-description":
                 self.get_property_summary_value('Main-Heating-Controls', 'Description'),
             "sheating-energy-eff": self.RATINGS_MAP[
@@ -232,8 +231,7 @@ class XmlParser:
             "local-authority": "",  # Not included in the xml
             "local-authority-label": "",
             "fixed-lighting-outlets-count": self.get_node_value('Fixed-Lighting-Outlets-Count'),
-            # TODO: Doesn't seem to be included in the xml
-            # "energy-tariff": self.get_node_value('Energy-Tariff'),
+            "energy-tariff": energy_tariff,
             "mechanical-ventilation": self.MECHANICAL_VENTILATION_MAP[self.get_node_value('Mechanical-Ventilation')],
             "solar-water-heating-flag": self.get_node_value('Solar-Water-Heating'),
             "co2-emissions-potential": self.get_energy_assessment_value('CO2-Emissions-Potential'),
@@ -328,7 +326,47 @@ class XmlParser:
             "mainheat-description": self.get_property_summary_value('Main-Heating', 'Description'),
             "floor-height": floor_height,
             "glazed-type": glazed_type,
-            "energy-tariff": energy_tariff,
+        }
+
+    def get_insulation_wall_area(self):
+        """
+        Extracts the insulation wall area for the main dwelling
+        :return:
+        """
+
+        main_dwelling_floors = [
+            f for f in self.floor_dimensions if f["building_part_identifier"] == "Main Dwelling" and not f["room_roof"]
+        ]
+        main_dwelling_windows = [
+            w for w in self.windows if w["window_location"] == "0"
+        ]
+
+        wall_areas = sum([float(f["heat_loss_perimeter"]) * float(f["room_height"]) for f in main_dwelling_floors])
+        window_areas = sum([float(w["window_area"]) for w in main_dwelling_windows])
+        return wall_areas - window_areas
+
+    def extract_additional_data(self):
+
+        self.insulation_wall_area = self.get_insulation_wall_area()
+
+        self.additional_data = {
+            "file_location": self.filekey,
+            "surveyor_name": self.surveyor_name,
+            "space_heating_kwh": self.space_heating_kwh,
+            "water_heating_kwh": self.water_heating_kwh,
+            # "heating_system": self.heating_system,
+            # "heating_controls": self.heating_controls,
+            "number_of_doors": self.number_of_doors,
+            "number_of_insulated_doors": self.number_of_insulated_doors,
+            "number_of_floors": self.number_of_floors,
+            "insulation_wall_area": self.insulation_wall_area,
+            "heat_loss_perimeter": self.heat_loss_perimeter,
+            "party_wall_length": self.party_wall_length,
+            "perimeter": self.perimeter,
+            "rooms_with_bath_and_or_shower": self.get_node_value('Rooms-With-Bath-And-Or-Shower'),
+            "rooms_with_mixer_shower_no_bath": self.get_node_value('Rooms-With-Mixer-Shower-No-Bath'),
+            "room_with_bath_and_mixer_shower": self.get_node_value('Rooms-With-Bath-And-Mixer-Shower'),
+            "percent_draftproofed": self.get_node_value('Percent-Draughtproofed'),
         }
 
     def get_node_value(self, tag_name):
@@ -516,56 +554,6 @@ class XmlParser:
             "constituency-label": constituency_label
         }
 
-    def get_property_dimensions(self):
-        """
-        This function will extract the relevant property dimensions including the floor area,
-        number of floors, perimeter, party wall length and the insulation_wall_area.
-
-        insulation_wall_area is typically simplified down to perimeter * height * 0.85
-        :return:
-        """
-
-        # Each floor has its own SAP-Floor-Dimension tag
-        floor_dimensions = (
-            self.xml.getElementsByTagName("SAP-Floor-Dimensions")[0]
-            .getElementsByTagName("SAP-Floor-Dimension")
-        )
-
-        self.number_of_floors = len(floor_dimensions)
-
-        self.heat_loss_perimeter = float(
-            floor_dimensions[0].getElementsByTagName("Heat-Loss-Perimeter")[0].firstChild.nodeValue
-        )
-
-        self.party_wall_length = float(
-            floor_dimensions[0].getElementsByTagName("Party-Wall-Length")[0].firstChild.nodeValue
-        )
-
-        party_wall_construction_tag = (
-            self.xml.getElementsByTagName("Party-Wall-Construction")[0].firstChild.nodeValue.replace("\n", "").strip()
-        )
-
-        self.is_there_party_wall = (
-            "Yes" if (self.party_wall_length > 0) or (party_wall_construction_tag != "") else "No"
-        )
-
-        # We pull out all of the floor areas
-        floor_areas = [
-            float(x.getElementsByTagName("Total-Floor-Area")[0].firstChild.nodeValue) for x in floor_dimensions
-        ]
-
-        self.total_floor_area = sum(floor_areas)
-        self.ground_floor_area = floor_areas[0]
-
-        self.floor_height = float(
-            floor_dimensions[0]
-            .getElementsByTagName("Room-Height")[0]
-            .firstChild.nodeValue
-        )
-
-        self.insulation_wall_area = self.heat_loss_perimeter * self.floor_height * self.INSULATION_WALL_AREA_FACTOR
-        self.perimeter = self.heat_loss_perimeter + self.party_wall_length
-
     def get_floor_dimensions(self):
 
         """
@@ -594,16 +582,53 @@ class XmlParser:
                     'floor': get_part_value(floor_dimension, 'Floor'),
                     'floor_construction': get_part_value(floor_dimension, 'Floor-Construction'),
                     'floor_insulation': get_part_value(floor_dimension, 'Floor-Insulation'),
-                    'heat_loss-perimeter': get_part_value(floor_dimension, 'Heat-Loss-Perimeter'),
-                    'party_wall-length': get_part_value(floor_dimension, 'Party-Wall-Length'),
-                    'total_floor-area': get_part_value(floor_dimension, 'Total-Floor-Area'),
-                    'room_height': get_part_value(floor_dimension, 'Room-Height')
+                    'heat_loss_perimeter': get_part_value(floor_dimension, 'Heat-Loss-Perimeter'),
+                    'party_wall_length': get_part_value(floor_dimension, 'Party-Wall-Length'),
+                    'total_floor_area': get_part_value(floor_dimension, 'Total-Floor-Area'),
+                    'room_height': get_part_value(floor_dimension, 'Room-Height'),
+                    "room_roof": False
                 } for floor_dimension in sap_floor_dimensions
             ]
+
+            room_roofs = building_part.getElementsByTagName("SAP-Room-In-Roof")
+            room_roof_data = [
+                {
+                    "building_part_identifier": building_part_identifier,
+                    "floor": str(max([int(d["floor"]) for d in data]) + 1),
+                    "floor_construction": "",
+                    "floor_insulation": rr.getElementsByTagName("Insulation")[0].firstChild.nodeValue,
+                    "heat_loss_perimeter": "",
+                    "party_wall_length": "",
+                    "total_floor_area": rr.getElementsByTagName("Floor-Area")[0].firstChild.nodeValue,
+                    "room_height": "",
+                    "room_roof": True
+                } for rr in room_roofs
+            ]
+
             floor_dimensions.extend(data)
+            floor_dimensions.extend(room_roof_data)
 
         self.floor_dimensions = floor_dimensions
 
+        self.number_of_floors = len(
+            [f for f in self.floor_dimensions if f["building_part_identifier"] == "Main Dwelling"]
+        )
+        self.heat_loss_perimeter = max(
+            [
+                float(f["heat_loss_perimeter"]) for f in self.floor_dimensions
+                if f["building_part_identifier"] == "Main Dwelling" and not f["room_roof"]
+            ]
+        )
+
+        self.party_wall_length = max(
+            [
+                float(f["party_wall_length"]) for f in self.floor_dimensions
+                if f["building_part_identifier"] == "Main Dwelling" and not f["room_roof"]
+            ]
+        )
+
+        self.perimeter = self.heat_loss_perimeter + self.party_wall_length
+
     def get_windows(self):
         """
         Extracts data about the windows in the property, including the number of windows and the window type.
@@ -612,15 +637,6 @@ class XmlParser:
 
         sap_windows = self.xml.getElementsByTagName("SAP-Windows")[0].getElementsByTagName("SAP-Window")
 
-        # This is the data in each sap window:
-        # <Window-Location>2</Window-Location>
-        #           <Window-Area quantity="square metres">1.55</Window-Area>
-        #           <Window-Type>1</Window-Type>
-        #           <Glazing-Type>3</Glazing-Type>
-        #           <PVC-Frame>true</PVC-Frame>
-        #           <Glazing-Gap>16+</Glazing-Gap>
-        #           <Orientation>7</Orientation>
-
         glazing_type_lookup = {
             "3": "double glazing, unknown install date"
         }
diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py
index c32bd787..b3500e71 100644
--- a/etl/xml_survey_extraction/app.py
+++ b/etl/xml_survey_extraction/app.py
@@ -48,7 +48,7 @@ def main():
         for xml in xmls:
             xml_data = read_from_s3(bucket_name=BUCKET, s3_file_name=xml)
             xml_data_io = BytesIO(xml_data)
-            xml_parser = XmlParser(file=xml_data_io, filekey=xml, uprn=uprn)
+            xml_parser = XmlParser(file=xml_data_io, filekey=os.path.join(f"s3://{BUCKET}", xml), uprn=uprn)
             xml_parser.run()
             logger.info(f"Extracted data from {xml}")
 

From c9d3bb6eec1dde40a136b01ff7efefb1d51f811c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 25 Jul 2024 18:09:37 +0100
Subject: [PATCH 010/182] completed extraction of data

---
 etl/xml_survey_extraction/XmlParser.py | 19 ++++++++++++++++++-
 etl/xml_survey_extraction/app.py       |  7 ++++++-
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py
index 53f7e859..d14dafc4 100644
--- a/etl/xml_survey_extraction/XmlParser.py
+++ b/etl/xml_survey_extraction/XmlParser.py
@@ -115,11 +115,12 @@ class XmlParser:
         "2": "Single"
     }
 
-    def __init__(self, file, filekey, uprn=None):
+    def __init__(self, file, filekey, surveyor_company, uprn=None):
         file.seek(0)  # Ensure the file pointer is at the beginning
         xml_string = file.read().decode('utf-8')
         self.xml = parseString(xml_string)
         self.filekey = filekey
+        self.surveyor_company = surveyor_company
 
         # The xml parser is use to parse the EPC and EPR xmls and different file types will contain different
         # information
@@ -349,9 +350,21 @@ class XmlParser:
 
         self.insulation_wall_area = self.get_insulation_wall_area()
 
+        boolean_lookup = {
+            "true": True,
+            "false": False,
+            "Y": True,
+            "N": False
+        }
+
+        cylinder_insulation_type = {
+            "1": "Foam",
+        }
+
         self.additional_data = {
             "file_location": self.filekey,
             "surveyor_name": self.surveyor_name,
+            "surveyor_company": self.surveyor_company,
             "space_heating_kwh": self.space_heating_kwh,
             "water_heating_kwh": self.water_heating_kwh,
             # "heating_system": self.heating_system,
@@ -367,6 +380,10 @@ class XmlParser:
             "rooms_with_mixer_shower_no_bath": self.get_node_value('Rooms-With-Mixer-Shower-No-Bath'),
             "room_with_bath_and_mixer_shower": self.get_node_value('Rooms-With-Bath-And-Mixer-Shower'),
             "percent_draftproofed": self.get_node_value('Percent-Draughtproofed'),
+            "has_hot_water_cylinder": boolean_lookup[self.get_node_value('Has-Hot-Water-Cylinder')],
+            "cylinder_insulation_type": cylinder_insulation_type[self.get_node_value('Cylinder-Insulation-Type')],
+            "cylinder_insulation_thickness": self.get_node_value('Cylinder-Insulation-Thickness'),
+            "cylinder_thermostat": boolean_lookup[self.get_node_value('Cylinder-Thermostat')],
         }
 
     def get_node_value(self, tag_name):
diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py
index b3500e71..92048a68 100644
--- a/etl/xml_survey_extraction/app.py
+++ b/etl/xml_survey_extraction/app.py
@@ -48,7 +48,12 @@ def main():
         for xml in xmls:
             xml_data = read_from_s3(bucket_name=BUCKET, s3_file_name=xml)
             xml_data_io = BytesIO(xml_data)
-            xml_parser = XmlParser(file=xml_data_io, filekey=os.path.join(f"s3://{BUCKET}", xml), uprn=uprn)
+            xml_parser = XmlParser(
+                file=xml_data_io,
+                filekey=os.path.join(f"s3://{BUCKET}", xml),
+                uprn=uprn,
+                surveyor_company=SURVEYORS,
+            )
             xml_parser.run()
             logger.info(f"Extracted data from {xml}")
 

From 7b04e1edc72a2e255fbc359fbbec3c1c72a37206 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 25 Jul 2024 18:13:50 +0100
Subject: [PATCH 011/182] preparing for data extraction

---
 etl/xml_survey_extraction/XmlParser.py | 27 +++++++++-----------------
 1 file changed, 9 insertions(+), 18 deletions(-)

diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py
index d14dafc4..76fa5612 100644
--- a/etl/xml_survey_extraction/XmlParser.py
+++ b/etl/xml_survey_extraction/XmlParser.py
@@ -44,8 +44,8 @@ def get_house_number(address: str) -> str | None:
 
 
 class XmlParser:
-    epc = None
-    additional_data = None
+    epc = {}
+    additional_data = {}
     uprn = None
 
     # heating/emissions information
@@ -72,12 +72,6 @@ class XmlParser:
 
     floor_dimensions = None
 
-    # The value of the URPN tells us about the file type that we're parsing
-    UPRN_FILETYPE_MAP = {
-        0: "EPR",
-        -1: "RDSAP_EPR"
-    }
-
     RATINGS_MAP = {
         "0": "N/A",
         "1": "Very Poor",
@@ -122,14 +116,11 @@ class XmlParser:
         self.filekey = filekey
         self.surveyor_company = surveyor_company
 
-        # The xml parser is use to parse the EPC and EPR xmls and different file types will contain different
-        # information
-        # In order to identify the file type, we can look for the presence of the 'UPRN' tag
-        # If the UPRN tag is present, we can assume that the file is an EPC
-        # If the UPRN tag is not present, we can assume that the file is an EPR
-        self.get_uprn(uprn)
+        # We check if we have a lig xml or rdsap xml
+        # We look for the presence of the Schema-Version-Original tag
+        self.is_lig = len(self.xml.getElementsByTagName("Schema-Version-Original")) > 0
 
-        self.file_type = self.UPRN_FILETYPE_MAP.get(self.uprn, "EPC")
+        self.get_uprn(uprn)
 
     @staticmethod
     def get_node(node):
@@ -145,10 +136,10 @@ class XmlParser:
         return node_first_child.nodeValue
 
     def run(self):
-        if self.file_type == "RDSAP_EPR":
-            # This file type contains just limited information compared to a regular EPR/EPC, and so we just exit
-            # unless we learn something else that determines that we need information from this file
+
+        if not self.is_lig:
             return
+
         self.get_assessor_details()
 
         self.get_heating_and_emissions_data()

From 6702eb65b06419b1459b1226915d73aded06110b Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 25 Jul 2024 18:35:00 +0100
Subject: [PATCH 012/182] energy assessment model

---
 backend/app/db/models/energy_assessments.py | 121 ++++++++++++++++++++
 etl/xml_survey_extraction/XmlParser.py      |  10 +-
 etl/xml_survey_extraction/app.py            |   2 +
 3 files changed, 128 insertions(+), 5 deletions(-)
 create mode 100644 backend/app/db/models/energy_assessments.py

diff --git a/backend/app/db/models/energy_assessments.py b/backend/app/db/models/energy_assessments.py
new file mode 100644
index 00000000..b6f7dd73
--- /dev/null
+++ b/backend/app/db/models/energy_assessments.py
@@ -0,0 +1,121 @@
+from sqlalchemy import Column, Integer, BigInteger, Text, Float, DateTime, Boolean
+from sqlalchemy.ext.declarative import declarative_base
+
+Base = declarative_base()
+
+
+class EnergyAssessment(Base):
+    __tablename__ = 'energy_assessments'
+    id = Column(BigInteger, primary_key=True, autoincrement=True)
+    uprn = Column(BigInteger, nullable=False)
+    uprn_source = Column(Text, nullable=False)
+    property_type = Column(Text, nullable=False)
+    building_reference_number = Column(Text)
+    current_energy_efficiency = Column(Text, nullable=False)
+    current_energy_rating = Column(Text, nullable=False)
+    address1 = Column(Text, nullable=False)
+    address2 = Column(Text, nullable=False)
+    address3 = Column(Text)
+    posttown = Column(Text, nullable=False)
+    postcode = Column(Text, nullable=False)
+    address = Column(Text, nullable=False)
+    county = Column(Text)
+    constituency = Column(Text)
+    constituency_label = Column(Text)
+    low_energy_fixed_light_count = Column(Text, nullable=False)
+    construction_age_band = Column(Text, nullable=False)
+    mainheat_energy_eff = Column(Text, nullable=False)
+    windows_env_eff = Column(Text, nullable=False)
+    lighting_energy_eff = Column(Text, nullable=False)
+    environment_impact_potential = Column(Text, nullable=False)
+    mainheatcont_description = Column(Text, nullable=False)
+    sheating_energy_eff = Column(Text, nullable=False)
+    local_authority = Column(Text, nullable=False)
+    local_authority_label = Column(Text, nullable=False)
+    fixed_lighting_outlets_count = Column(Text, nullable=False)
+    energy_tariff = Column(Text, nullable=False)
+    mechanical_ventilation = Column(Text, nullable=False)
+    solar_water_heating_flag = Column(Text, nullable=False)
+    co2_emissions_potential = Column(Text, nullable=False)
+    number_heated_rooms = Column(Text, nullable=False)
+    floor_description = Column(Text, nullable=False)
+    energy_consumption_potential = Column(Text, nullable=False)
+    built_form = Column(Text, nullable=False)
+    number_open_fireplaces = Column(Text, nullable=False)
+    windows_description = Column(Text, nullable=False)
+    glazed_area = Column(Text, nullable=False)
+    inspection_date = Column(DateTime(timezone=True), nullable=False)
+    mains_gas_flag = Column(Text, nullable=False)
+    co2_emiss_curr_per_floor_area = Column(Text, nullable=False)
+    heat_loss_corridor = Column(Text, nullable=False)
+    unheated_corridor_length = Column(Text)
+    flat_storey_count = Column(Text)
+    roof_energy_eff = Column(Text, nullable=False)
+    total_floor_area = Column(Text, nullable=False)
+    environment_impact_current = Column(Text, nullable=False)
+    roof_description = Column(Text, nullable=False)
+    floor_energy_eff = Column(Text, nullable=False)
+    number_habitable_rooms = Column(Text, nullable=False)
+    hot_water_env_eff = Column(Text, nullable=False)
+    mainheatc_energy_eff = Column(Text, nullable=False)
+    main_fuel = Column(Text, nullable=False)
+    lighting_env_eff = Column(Text, nullable=False)
+    windows_energy_eff = Column(Text, nullable=False)
+    floor_env_eff = Column(Text, nullable=False)
+    sheating_env_eff = Column(Text, nullable=False)
+    lighting_description = Column(Text, nullable=False)
+    roof_env_eff = Column(Text, nullable=False)
+    walls_energy_eff = Column(Text, nullable=False)
+    photo_supply = Column(Text, nullable=False)
+    lighting_cost_potential = Column(Text, nullable=False)
+    mainheat_env_eff = Column(Text, nullable=False)
+    multi_glaze_proportion = Column(Text, nullable=False)
+    main_heating_controls = Column(Text, nullable=False)
+    flat_top_storey = Column(Text)
+    secondheat_description = Column(Text, nullable=False)
+    walls_env_eff = Column(Text, nullable=False)
+    transaction_type = Column(Text, nullable=False)
+    extension_count = Column(Text, nullable=False)
+    mainheatc_env_eff = Column(Text, nullable=False)
+    lmk_key = Column(Text)
+    wind_turbine_count = Column(Text, nullable=False)
+    tenure = Column(Text, nullable=False)
+    floor_level = Column(Text, nullable=False)
+    potential_energy_efficiency = Column(Text, nullable=False)
+    potential_energy_rating = Column(Text, nullable=False)
+    hot_water_energy_eff = Column(Text, nullable=False)
+    low_energy_lighting = Column(Text, nullable=False)
+    walls_description = Column(Text, nullable=False)
+    hotwater_description = Column(Text, nullable=False)
+    co2_emissions_current = Column(Text, nullable=False)
+    heating_cost_current = Column(Text, nullable=False)
+    heating_cost_potential = Column(Text, nullable=False)
+    hot_water_cost_current = Column(Text, nullable=False)
+    hot_water_cost_potential = Column(Text, nullable=False)
+    lighting_cost_current = Column(Text, nullable=False)
+    energy_consumption_current = Column(Text, nullable=False)
+    lodgement_date = Column(DateTime(timezone=True), nullable=False)
+    lodgement_datetime = Column(DateTime(timezone=True), nullable=False)
+    mainheat_description = Column(Text, nullable=False)
+    floor_height = Column(Float, nullable=False)
+    glazed_type = Column(Text, nullable=False)
+    file_location = Column(Text, nullable=False)
+    surveyor_name = Column(Text, nullable=False)
+    surveyor_company = Column(Text, nullable=False)
+    space_heating_kwh = Column(Text, nullable=False)
+    water_heating_kwh = Column(Text, nullable=False)
+    number_of_doors = Column(Integer, nullable=False)
+    number_of_insulated_doors = Column(Integer, nullable=False)
+    number_of_floors = Column(Integer, nullable=False)
+    insulation_wall_area = Column(Float, nullable=False)
+    heat_loss_perimeter = Column(Float, nullable=False)
+    party_wall_length = Column(Float, nullable=False)
+    perimeter = Column(Float, nullable=False)
+    rooms_with_bath_and_or_shower = Column(Integer)
+    rooms_with_mixer_shower_no_bath = Column(Integer)
+    room_with_bath_and_mixer_shower = Column(Integer)
+    percent_draftproofed = Column(Integer)
+    has_hot_water_cylinder = Column(Boolean)
+    cylinder_insulation_type = Column(Text)
+    cylinder_insulation_thickness = Column(Integer)
+    cylinder_thermostat = Column(Boolean)
diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py
index 76fa5612..3f277bad 100644
--- a/etl/xml_survey_extraction/XmlParser.py
+++ b/etl/xml_survey_extraction/XmlParser.py
@@ -367,13 +367,13 @@ class XmlParser:
             "heat_loss_perimeter": self.heat_loss_perimeter,
             "party_wall_length": self.party_wall_length,
             "perimeter": self.perimeter,
-            "rooms_with_bath_and_or_shower": self.get_node_value('Rooms-With-Bath-And-Or-Shower'),
-            "rooms_with_mixer_shower_no_bath": self.get_node_value('Rooms-With-Mixer-Shower-No-Bath'),
-            "room_with_bath_and_mixer_shower": self.get_node_value('Rooms-With-Bath-And-Mixer-Shower'),
-            "percent_draftproofed": self.get_node_value('Percent-Draughtproofed'),
+            "rooms_with_bath_and_or_shower": int(self.get_node_value('Rooms-With-Bath-And-Or-Shower')),
+            "rooms_with_mixer_shower_no_bath": int(self.get_node_value('Rooms-With-Mixer-Shower-No-Bath')),
+            "room_with_bath_and_mixer_shower": int(self.get_node_value('Rooms-With-Bath-And-Mixer-Shower')),
+            "percent_draftproofed": int(self.get_node_value('Percent-Draughtproofed')),
             "has_hot_water_cylinder": boolean_lookup[self.get_node_value('Has-Hot-Water-Cylinder')],
             "cylinder_insulation_type": cylinder_insulation_type[self.get_node_value('Cylinder-Insulation-Type')],
-            "cylinder_insulation_thickness": self.get_node_value('Cylinder-Insulation-Thickness'),
+            "cylinder_insulation_thickness": int(self.get_node_value('Cylinder-Insulation-Thickness')),
             "cylinder_thermostat": boolean_lookup[self.get_node_value('Cylinder-Thermostat')],
         }
 
diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py
index 92048a68..3e41b5fb 100644
--- a/etl/xml_survey_extraction/app.py
+++ b/etl/xml_survey_extraction/app.py
@@ -56,6 +56,8 @@ def main():
             )
             xml_parser.run()
             logger.info(f"Extracted data from {xml}")
+            extracted_epc = xml_parser.epc
+            extracted_additional_data = xml_parser.additional_data
 
     # TODO: Set a portfolio ID, Target and Automatically upload the asset list and create the event for the portfolio
 

From 4192ee7d690b1a74a5e1a3e361abe5c08a48bc43 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 25 Jul 2024 18:43:14 +0100
Subject: [PATCH 013/182] putting together data upload to db

---
 etl/xml_survey_extraction/app.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py
index 3e41b5fb..c6e16e3b 100644
--- a/etl/xml_survey_extraction/app.py
+++ b/etl/xml_survey_extraction/app.py
@@ -1,5 +1,5 @@
-import pandas as pd
-
+from sqlalchemy.orm import sessionmaker
+from backend.app.db.connection import db_engine
 from utils.s3 import read_from_s3, list_files_and_subfolders_in_s3_folder, list_xmls_in_s3_folder
 from utils.logger import setup_logger
 from etl.xml_survey_extraction.XmlParser import XmlParser
@@ -43,6 +43,7 @@ def main():
     logger.info(f"Exatracted XMLS for the energy assessments")
 
     # For each property, we download the xmls and extract the data
+    database_data = []
     for uprn, xmls in assessments_map.items():
         extracted_data = {}
         for xml in xmls:
@@ -59,6 +60,16 @@ def main():
             extracted_epc = xml_parser.epc
             extracted_additional_data = xml_parser.additional_data
 
+            data_to_update = {
+                **extracted_epc, **extracted_additional_data
+            }
+            extracted_data.update(data_to_update)
+
+        database_data.append(extracted_data)
+
+    logger.info("Uploading data to the database")
+    session = sessionmaker(bind=db_engine)()
+
     # TODO: Set a portfolio ID, Target and Automatically upload the asset list and create the event for the portfolio
 
     # TODO: In order to get the full data associated to the heating system, we need to download and parse the pcdb which

From 78066563cea4b328952707fe3d60aba367ef88db Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 25 Jul 2024 18:47:43 +0100
Subject: [PATCH 014/182] Added missing orientations

---
 etl/xml_survey_extraction/XmlParser.py | 8 ++++++--
 etl/xml_survey_extraction/app.py       | 3 +++
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py
index 3f277bad..c65173dd 100644
--- a/etl/xml_survey_extraction/XmlParser.py
+++ b/etl/xml_survey_extraction/XmlParser.py
@@ -650,10 +650,14 @@ class XmlParser:
         }
 
         orientation_lookup = {
-            "3": "East",
-            "5": "South",
             "1": "North",
+            "2": "North East",
+            "3": "East",
+            "4": "South East",
+            "5": "South",
+            "6": "South West",
             "7": "West",
+            "8": "North West"
         }
 
         self.windows = [
diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py
index c6e16e3b..6fe02e2d 100644
--- a/etl/xml_survey_extraction/app.py
+++ b/etl/xml_survey_extraction/app.py
@@ -42,6 +42,9 @@ def main():
 
     logger.info(f"Exatracted XMLS for the energy assessments")
 
+    # TODO: IF we have many uploads, we can do them in a batch so we don't try and upload huge amounts of data to
+    #       the database at onece
+
     # For each property, we download the xmls and extract the data
     database_data = []
     for uprn, xmls in assessments_map.items():

From 81a77b26af003bc3fb94b619470f661125ce3329 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 25 Jul 2024 18:49:29 +0100
Subject: [PATCH 015/182] Updated tariff map

---
 etl/xml_survey_extraction/XmlParser.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py
index c65173dd..478891bf 100644
--- a/etl/xml_survey_extraction/XmlParser.py
+++ b/etl/xml_survey_extraction/XmlParser.py
@@ -106,6 +106,7 @@ class XmlParser:
     }
 
     TARIFF_MAP = {
+        "1": "Dual",
         "2": "Single"
     }
 

From b60112d75b2362a0bed394bba215d486c8fe9a9c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 25 Jul 2024 19:03:57 +0100
Subject: [PATCH 016/182] setting up push to db

---
 .../functions/energy_assessment_functions.py  | 27 +++++++++++++
 etl/xml_survey_extraction/app.py              | 38 +++++++++++++++++--
 2 files changed, 62 insertions(+), 3 deletions(-)
 create mode 100644 backend/app/db/functions/energy_assessment_functions.py

diff --git a/backend/app/db/functions/energy_assessment_functions.py b/backend/app/db/functions/energy_assessment_functions.py
new file mode 100644
index 00000000..8befe903
--- /dev/null
+++ b/backend/app/db/functions/energy_assessment_functions.py
@@ -0,0 +1,27 @@
+from backend.app.db.models.energy_assessments import EnergyAssessment
+from sqlalchemy.orm import Session
+from sqlalchemy.exc import IntegrityError
+
+
+def bulk_insert_energy_assessments(session: Session, data_list):
+    """
+    This function inserts multiple energy assessment records into the database.
+
+    :param session: The database session
+    :param data_list: A list of dictionaries containing energy assessment data.
+    """
+
+    try:
+        # Map dictionaries to EnergyAssessment instances
+        assessments = [EnergyAssessment(**data) for data in data_list]
+
+        # Add all instances to the session
+        session.add_all(assessments)
+        # Commit the transaction
+        session.commit()
+        print("All records inserted successfully.")
+
+    except IntegrityError as e:
+        # Rollback the session in case of error
+        session.rollback()
+        print(f"Error occurred: {e}")
diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py
index 6fe02e2d..eea030e5 100644
--- a/etl/xml_survey_extraction/app.py
+++ b/etl/xml_survey_extraction/app.py
@@ -1,9 +1,11 @@
+from backend.app.db.functions.energy_assessment_functions import bulk_insert_energy_assessments
 from sqlalchemy.orm import sessionmaker
 from backend.app.db.connection import db_engine
 from utils.s3 import read_from_s3, list_files_and_subfolders_in_s3_folder, list_xmls_in_s3_folder
 from utils.logger import setup_logger
 from etl.xml_survey_extraction.XmlParser import XmlParser
 import os
+import pandas as pd
 from io import BytesIO
 
 logger = setup_logger()
@@ -11,7 +13,8 @@ logger = setup_logger()
 SURVEYORS = "JAFFERSONS ENERGY CONSULTANTS"
 PROJECT_CODE = "VDE001"
 BUCKET = "retrofit-energy-assessments-dev"
-PORTFOLIO_ID = None
+PORTFOLIO_ID = 86
+USER_ID = 8
 
 
 def main():
@@ -59,7 +62,8 @@ def main():
                 surveyor_company=SURVEYORS,
             )
             xml_parser.run()
-            logger.info(f"Extracted data from {xml}")
+            if xml_parser.is_lig:
+                logger.info(f"Extracted data from {xml}")
             extracted_epc = xml_parser.epc
             extracted_additional_data = xml_parser.additional_data
 
@@ -72,8 +76,36 @@ def main():
 
     logger.info("Uploading data to the database")
     session = sessionmaker(bind=db_engine)()
+    bulk_insert_energy_assessments(session, database_data)
+    session.close()
 
-    # TODO: Set a portfolio ID, Target and Automatically upload the asset list and create the event for the portfolio
+    # Create the asset list
+    asset_list = [
+        {"uprn": x["uprn"], "address": x["address1"], "postcode": x["postcode"]} for x in database_data
+    ]
+    asset_list = pd.DataFrame(asset_list)
+
+    # Store the asset list in s3
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/non_intrusives.csv"
+    save_csv_to_s3(
+        dataframe=asset_list,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increase EPC",
+        "goal_value": "A",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": "",
+        "exclusions": "",
+        "budget": None,
+    }
+    print(body)
 
     # TODO: In order to get the full data associated to the heating system, we need to download and parse the pcdb which
     #       can be found here: https://www.ncm-pcdb.org.uk/pcdb/pcdb10.dat

From c90c6d860b668f4d1960e4380ec170be1b95ddb1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 25 Jul 2024 23:56:36 +0100
Subject: [PATCH 017/182] starting looking at sfr

---
 .../functions/energy_assessment_functions.py  | 30 ++++++--
 etl/bill_savings/data_collection.py           |  4 +-
 etl/customers/goldman/property_ownership.py   | 75 +++++++++++++++----
 etl/xml_survey_extraction/XmlParser.py        |  2 +
 etl/xml_survey_extraction/app.py              |  7 +-
 5 files changed, 94 insertions(+), 24 deletions(-)

diff --git a/backend/app/db/functions/energy_assessment_functions.py b/backend/app/db/functions/energy_assessment_functions.py
index 8befe903..0970b71f 100644
--- a/backend/app/db/functions/energy_assessment_functions.py
+++ b/backend/app/db/functions/energy_assessment_functions.py
@@ -5,21 +5,35 @@ from sqlalchemy.exc import IntegrityError
 
 def bulk_insert_energy_assessments(session: Session, data_list):
     """
-    This function inserts multiple energy assessment records into the database.
+    This function inserts or updates multiple energy assessment records into the database.
 
-    :param session: The database session
+    :param session: The SQLAlchemy session.
     :param data_list: A list of dictionaries containing energy assessment data.
     """
-
     try:
-        # Map dictionaries to EnergyAssessment instances
-        assessments = [EnergyAssessment(**data) for data in data_list]
+        for data in data_list:
+            uprn = data.get('uprn')
+            lodgement_date = data.get('lodgement_date')
+
+            # Check if a record with the same uprn and lodgement_date exists
+            existing_record = session.query(EnergyAssessment).filter_by(
+                uprn=uprn,
+                lodgement_date=lodgement_date
+            ).first()
+
+            if existing_record:
+                # Update the existing record with new data
+                for key, value in data.items():
+                    setattr(existing_record, key, value)
+                session.add(existing_record)
+            else:
+                # Insert a new record
+                new_assessment = EnergyAssessment(**data)
+                session.add(new_assessment)
 
-        # Add all instances to the session
-        session.add_all(assessments)
         # Commit the transaction
         session.commit()
-        print("All records inserted successfully.")
+        print("All records inserted or updated successfully.")
 
     except IntegrityError as e:
         # Rollback the session in case of error
diff --git a/etl/bill_savings/data_collection.py b/etl/bill_savings/data_collection.py
index 6095741f..e6f6de6f 100644
--- a/etl/bill_savings/data_collection.py
+++ b/etl/bill_savings/data_collection.py
@@ -133,8 +133,8 @@ def app():
     energy_consumption_data = []
     for i, directory in tqdm(enumerate(epc_directories), total=len(epc_directories)):
         # Skip the first 50
-        # if i < 344:
-        #     continue
+        if i < 57:
+            continue
 
         data = pd.read_csv(directory / "certificates.csv", low_memory=False)
         # Rename the columns to the same format as the api returns
diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py
index 500963a1..1b1cf014 100644
--- a/etl/customers/goldman/property_ownership.py
+++ b/etl/customers/goldman/property_ownership.py
@@ -73,7 +73,7 @@ def find_f_g_properties(paths):
         epc_data["UPRN"] = epc_data["UPRN"].astype(int).astype(str)
 
         # Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this
-        epc_data["LODGEMENT_DATETIME"] = pd.to_datetime(epc_data["LODGEMENT_DATETIME"], format='mixed')
+        epc_data["LODGEMENT_DATETIME"] = pd.to_datetime(epc_data["LODGEMENT_DATETIME"], format='mixed', errors="coerce")
 
         epc_data = epc_data.sort_values("LODGEMENT_DATETIME", ascending=False).drop_duplicates("UPRN")
 
@@ -84,7 +84,7 @@ def find_f_g_properties(paths):
     data = pd.concat(data)
 
     # Save as an excel
-    data.to_excel("EPC F & G Properties.xlsx", index=False)
+    data.to_excel("EPC F & G Properties - V2.xlsx", index=False)
 
 
 def remove_text_in_brackets(address: str) -> str:
@@ -196,7 +196,7 @@ def remove_duplicate_matches(matching_lookup, properties, company_ownership):
             matches_to_drop[["UPRN", "Title Number"]].copy()
         )
 
-    to_drop = pd.concat(to_drop)
+    to_drop = pd.concat(to_drop) if to_drop else pd.DataFrame()
 
     if not to_drop.empty:
         merged = pd.merge(matching_lookup, to_drop, on=['UPRN', 'Title Number'], how='left', indicator=True)
@@ -245,6 +245,44 @@ def remove_duplicate_uprn_matches(matching_lookup, properties, company_ownership
     return matching_lookup
 
 
+def filter_land_registry(properties):
+    column_names = [
+        "transaction_id",
+        "price",
+        "date_of_transfer",
+        "postcode",
+        "property_type",
+        "old_new",
+        "duration",
+        "paon",
+        "saon",
+        "street",
+        "locality",
+        "town_city",
+        "district",
+        "county",
+        "ppd_category_type",
+        "record_status",
+    ]
+    land_registry = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/pp-complete.csv", header=None)
+    land_registry.columns = column_names
+    land_registry = land_registry[
+        land_registry["postcode"].str.lower().isin(properties["POSTCODE"].str.lower().unique())
+    ]
+    land_registry["date_of_transfer"] = pd.to_datetime(
+        land_registry["date_of_transfer"], format="%Y-%m-%d", errors="coerce"
+    )
+    # Take data from the last 5 years
+    land_registry = land_registry[
+        (land_registry["date_of_transfer"] >= "2019-01-01")
+    ]
+
+    # Filter this
+    land_registry.to_csv(
+        "/Users/khalimconn-kowlessar/Downloads/land_registry_prices_paid_filtered.csv", index=False
+    )
+
+
 def app():
     """
     This script is for scoping property ownership for EPC F & G rated properties in Birmingam, for Goldman Sachs
@@ -293,17 +331,22 @@ def app():
     # paths = list(set(paths))
     # find_f_g_properties(paths)
 
-    properties = pd.read_excel("EPC F & G Properties.xlsx")
-    company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_04.csv")
+    properties = pd.read_excel("EPC F & G Properties - V2.xlsx")
+    # filter_land_registry(properties)
+    company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_07.csv")
     company_ownership["is_overseas"] = False
-    overseas_company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/OCOD_FULL_2024_04 2.csv")
+    overseas_company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/OCOD_FULL_2024_07.csv")
     overseas_company_ownership["is_overseas"] = True
 
     company_ownership = pd.concat([company_ownership, overseas_company_ownership])
 
     # FIlter on relevant postcodes
     company_ownership = company_ownership[
-        company_ownership["Postcode"].str.lower().isin(properties["POSTCODE"].str.lower().unique())]
+        company_ownership["Postcode"].str.lower().isin(properties["POSTCODE"].str.lower().unique())
+    ]
+
+    # Read in land registry
+    land_registry = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/land_registry_prices_paid_filtered.csv")
 
     # Now we filter properties the other way around
     properties = properties[properties["POSTCODE"].str.lower().isin(company_ownership["Postcode"].str.lower().unique())]
@@ -316,6 +359,8 @@ def app():
     # Take the newest UPRN
     properties = properties.sort_values("LODGEMENT_DATE", ascending=False).drop_duplicates("UPRN")
 
+    # TODO: Do we want to filter properties based on lodgement dates?
+
     # Remove entries where the address begins with the term "land adjoining", or other records that don't reference the
     # the property itself
     starting_terms = [
@@ -414,8 +459,8 @@ def app():
 
     freehold_matching_lookup = pd.DataFrame(freehold_matching_lookup)
     leasehold_matching_lookup = pd.DataFrame(leasehold_matching_lookup)
-    shared_leasehold_match = pd.concat(shared_leasehold_match)
-    shared_freehold_match = pd.concat(shared_freehold_match)
+    # shared_leasehold_match = pd.concat(shared_leasehold_match)
+    # shared_freehold_match = pd.concat(shared_freehold_match)
 
     # freehold_matching_lookup.to_excel("freehold_matching_lookup_new.xlsx")
     # leasehold_matching_lookup.to_excel("leasehold_matching_lookup_new.xlsx")
@@ -429,7 +474,9 @@ def app():
     # Combine
     combined_matching_lookup = pd.concat([freehold_matching_lookup, leasehold_matching_lookup])
     # Remove duplicates
-    combined_matching_lookup = remove_duplicate_matches(combined_matching_lookup, properties, company_ownership)
+    combined_matching_lookup = remove_duplicate_matches(
+        matching_lookup=combined_matching_lookup, properties=properties, company_ownership=company_ownership
+    )
     # We also have duplicates at a UPRN level
     combined_matching_lookup = remove_duplicate_uprn_matches(combined_matching_lookup, properties, company_ownership)
 
@@ -457,11 +504,13 @@ def app():
     # leasehold_matching_lookup = pd.read_excel("leasehold_matching_lookup.xlsx")
     # shared_leasehold_match = pd.read_excel("shared_leasehold_match.xlsx")
 
-    freehold_aggregate = aggregate_matches(freehold_matching_lookup, company_ownership, properties)
-    leasehold_aggregate = aggregate_matches(leasehold_matching_lookup, company_ownership, properties)
+    # freehold_aggregate = aggregate_matches(freehold_matching_lookup, company_ownership, properties)
+    # leasehold_aggregate = aggregate_matches(leasehold_matching_lookup, company_ownership, properties)
 
     combined_aggregate = aggregate_matches(
-        combined_matching_lookup, company_ownership, properties
+        matching_lookup=combined_matching_lookup,
+        company_ownership=company_ownership,
+        properties=properties
     )
 
     investment_20m = combined_aggregate[combined_aggregate["cumulative_value"] <= 20_500_000]
diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py
index 478891bf..90a51ae6 100644
--- a/etl/xml_survey_extraction/XmlParser.py
+++ b/etl/xml_survey_extraction/XmlParser.py
@@ -546,6 +546,8 @@ class XmlParser:
         county = property_tag.getElementsByTagName("County")
         if county:
             county = county[0].firstChild.nodeValue
+        else:
+            county = ""
 
         # Seems to be unavailable in the xml
         constituency = None
diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py
index eea030e5..0cb95332 100644
--- a/etl/xml_survey_extraction/app.py
+++ b/etl/xml_survey_extraction/app.py
@@ -1,7 +1,7 @@
 from backend.app.db.functions.energy_assessment_functions import bulk_insert_energy_assessments
 from sqlalchemy.orm import sessionmaker
 from backend.app.db.connection import db_engine
-from utils.s3 import read_from_s3, list_files_and_subfolders_in_s3_folder, list_xmls_in_s3_folder
+from utils.s3 import read_from_s3, list_files_and_subfolders_in_s3_folder, list_xmls_in_s3_folder, save_csv_to_s3
 from utils.logger import setup_logger
 from etl.xml_survey_extraction.XmlParser import XmlParser
 import os
@@ -70,6 +70,11 @@ def main():
             data_to_update = {
                 **extracted_epc, **extracted_additional_data
             }
+
+            # We need to update the keys to match the database schema - i.e. we should replace all hyphens with
+            # underscores
+            data_to_update = {k.replace("-", "_"): v for k, v in data_to_update.items()}
+
             extracted_data.update(data_to_update)
 
         database_data.append(extracted_data)

From b42d2c7750af60a2f869da9134f18fc6302bdf57 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 26 Jul 2024 14:03:24 +0100
Subject: [PATCH 018/182] Added serialization method

---
 .../functions/energy_assessment_functions.py  | 26 ++++++++++++++++---
 backend/app/db/models/energy_assessments.py   |  6 +++++
 backend/app/plan/router.py                    |  6 +++++
 etl/xml_survey_extraction/app.py              |  2 +-
 4 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/backend/app/db/functions/energy_assessment_functions.py b/backend/app/db/functions/energy_assessment_functions.py
index 0970b71f..2d6a611e 100644
--- a/backend/app/db/functions/energy_assessment_functions.py
+++ b/backend/app/db/functions/energy_assessment_functions.py
@@ -1,6 +1,8 @@
 from backend.app.db.models.energy_assessments import EnergyAssessment
 from sqlalchemy.orm import Session
 from sqlalchemy.exc import IntegrityError
+from typing import Optional
+from sqlalchemy import desc
 
 
 def bulk_insert_energy_assessments(session: Session, data_list):
@@ -13,12 +15,12 @@ def bulk_insert_energy_assessments(session: Session, data_list):
     try:
         for data in data_list:
             uprn = data.get('uprn')
-            lodgement_date = data.get('lodgement_date')
+            inspection_date = data.get('inspection_date')
 
-            # Check if a record with the same uprn and lodgement_date exists
+            # Check if a record with the same uprn and inspection_date exists
             existing_record = session.query(EnergyAssessment).filter_by(
                 uprn=uprn,
-                lodgement_date=lodgement_date
+                inspection_date=inspection_date
             ).first()
 
             if existing_record:
@@ -39,3 +41,21 @@ def bulk_insert_energy_assessments(session: Session, data_list):
         # Rollback the session in case of error
         session.rollback()
         print(f"Error occurred: {e}")
+
+
+def get_latest_assessment_by_uprn(session: Session, uprn: int) -> Optional[EnergyAssessment]:
+    """
+    Retrieve the latest energy assessment for a given UPRN based on the inspection date.
+
+    :param session: The database session
+    :param uprn: The unique property reference number
+    :return: The latest EnergyAssessment object or None if not found
+    """
+    try:
+        # Query the EnergyAssessment model, filter by uprn, order by inspection_date in descending order
+        latest_assessment = session.query(EnergyAssessment).filter_by(uprn=uprn).order_by(
+            desc(EnergyAssessment.inspection_date)).first()
+        return latest_assessment.to_dict() if latest_assessment else {}
+    except Exception as e:
+        print(f"An error occurred: {e}")
+        return None
diff --git a/backend/app/db/models/energy_assessments.py b/backend/app/db/models/energy_assessments.py
index b6f7dd73..86230c00 100644
--- a/backend/app/db/models/energy_assessments.py
+++ b/backend/app/db/models/energy_assessments.py
@@ -119,3 +119,9 @@ class EnergyAssessment(Base):
     cylinder_insulation_type = Column(Text)
     cylinder_insulation_thickness = Column(Integer)
     cylinder_thermostat = Column(Boolean)
+
+    def to_dict(self):
+        """
+        Convert the SQLAlchemy object to a dictionary.
+        """
+        return {column.name: getattr(self, column.name) for column in self.__table__.columns}
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 00e73b56..c73aff7e 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -21,6 +21,7 @@ from backend.app.db.functions.property_functions import (
 from backend.app.db.functions.recommendations_functions import (
     create_plan, create_plan_recommendations, upload_recommendations
 )
+from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn
 from backend.app.db.models.portfolio import rating_lookup
 from backend.app.dependencies import validate_token
 from backend.app.plan.schemas import PlanTriggerRequest, MdsRequest
@@ -265,6 +266,7 @@ async def trigger_plan(body: PlanTriggerRequest):
 
         input_properties = []
         for config in tqdm(plan_input):
+
             # We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
             uprn = config.get("uprn", None)
             if uprn:
@@ -281,6 +283,10 @@ async def trigger_plan(body: PlanTriggerRequest):
             epc_searcher.ordnance_survey_client.property_type = config.get("property_type", None)
             # For the moment, our OS API access is unavailable, so we skip and interpolate
             epc_searcher.find_property(skip_os=True)
+
+            # We check for an energy assessment we have performed on this property:
+            energy_assessment = get_latest_assessment_by_uprn(session, uprn)
+
             # Create a record in db
             property_id, is_new = create_property(
                 session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn
diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py
index 0cb95332..beb47454 100644
--- a/etl/xml_survey_extraction/app.py
+++ b/etl/xml_survey_extraction/app.py
@@ -107,7 +107,7 @@ def main():
         "already_installed_file_path": "",
         "patches_file_path": "",
         "non_invasive_recommendations_file_path": "",
-        "exclusions": "",
+        # "exclusions": [],
         "budget": None,
     }
     print(body)

From bdd6171626e85689d430180520e84f507b6010e2 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 26 Jul 2024 15:07:23 +0100
Subject: [PATCH 019/182] Added mapping of age band

---
 .../functions/energy_assessment_functions.py  |  2 +-
 backend/app/db/models/energy_assessments.py   | 37 +++++++-
 backend/app/plan/router.py                    | 85 +++++++++++++------
 etl/xml_survey_extraction/XmlParser.py        | 23 ++++-
 4 files changed, 117 insertions(+), 30 deletions(-)

diff --git a/backend/app/db/functions/energy_assessment_functions.py b/backend/app/db/functions/energy_assessment_functions.py
index 2d6a611e..45fb2b8b 100644
--- a/backend/app/db/functions/energy_assessment_functions.py
+++ b/backend/app/db/functions/energy_assessment_functions.py
@@ -55,7 +55,7 @@ def get_latest_assessment_by_uprn(session: Session, uprn: int) -> Optional[Energ
         # Query the EnergyAssessment model, filter by uprn, order by inspection_date in descending order
         latest_assessment = session.query(EnergyAssessment).filter_by(uprn=uprn).order_by(
             desc(EnergyAssessment.inspection_date)).first()
-        return latest_assessment.to_dict() if latest_assessment else {}
+        return latest_assessment.to_dict() if latest_assessment else latest_assessment.empty_response()
     except Exception as e:
         print(f"An error occurred: {e}")
         return None
diff --git a/backend/app/db/models/energy_assessments.py b/backend/app/db/models/energy_assessments.py
index 86230c00..efcbc26c 100644
--- a/backend/app/db/models/energy_assessments.py
+++ b/backend/app/db/models/energy_assessments.py
@@ -120,8 +120,43 @@ class EnergyAssessment(Base):
     cylinder_insulation_thickness = Column(Integer)
     cylinder_thermostat = Column(Boolean)
 
+    EPC_KEYS = [
+        'low_energy_fixed_light_count', 'address', 'uprn_source', 'floor_height', 'heating_cost_potential',
+        'unheated_corridor_length', 'hot_water_cost_potential', 'construction_age_band', 'potential_energy_rating',
+        'mainheat_energy_eff', 'windows_env_eff', 'lighting_energy_eff', 'environment_impact_potential', 'glazed_type',
+        'heating_cost_current', 'address3', 'mainheatcont_description', 'sheating_energy_eff', 'property_type',
+        'local_authority_label', 'fixed_lighting_outlets_count', 'energy_tariff', 'mechanical_ventilation',
+        'hot_water_cost_current', 'county', 'postcode', 'solar_water_heating_flag', 'constituency',
+        'co2_emissions_potential', 'number_heated_rooms', 'floor_description', 'energy_consumption_potential',
+        'local_authority', 'built_form', 'number_open_fireplaces', 'windows_description', 'glazed_area',
+        'inspection_date', 'mains_gas_flag', 'co2_emiss_curr_per_floor_area', 'address1', 'heat_loss_corridor',
+        'flat_storey_count', 'constituency_label', 'roof_energy_eff', 'total_floor_area', 'building_reference_number',
+        'environment_impact_current', 'co2_emissions_current', 'roof_description', 'floor_energy_eff',
+        'number_habitable_rooms', 'address2', 'hot_water_env_eff', 'posttown', 'mainheatc_energy_eff', 'main_fuel',
+        'lighting_env_eff', 'windows_energy_eff', 'floor_env_eff', 'sheating_env_eff', 'lighting_description',
+        'roof_env_eff', 'walls_energy_eff', 'photo_supply', 'lighting_cost_potential', 'mainheat_env_eff',
+        'multi_glaze_proportion', 'main_heating_controls', 'lodgement_datetime', 'flat_top_storey',
+        'current_energy_rating', 'secondheat_description', 'walls_env_eff', 'transaction_type', 'uprn',
+        'current_energy_efficiency', 'energy_consumption_current', 'mainheat_description', 'lighting_cost_current',
+        'lodgement_date', 'extension_count', 'mainheatc_env_eff', 'lmk_key', 'wind_turbine_count', 'tenure',
+        'floor_level', 'potential_energy_efficiency', 'hot_water_energy_eff', 'low_energy_lighting',
+        'walls_description', 'hotwater_description'
+    ]
+
     def to_dict(self):
         """
         Convert the SQLAlchemy object to a dictionary.
         """
-        return {column.name: getattr(self, column.name) for column in self.__table__.columns}
+
+        epc = {key.replace("_", "-"): getattr(self, key) for key in self.EPC_KEYS}
+        # Get everything else
+        additional = {
+            column.name: getattr(self, column.name)
+            for column in self.__table__.columns if column.name not in self.EPC_KEYS
+        }
+
+        return {"epc": epc, "additional": additional}
+
+    @staticmethod
+    def empty_response():
+        return {"epc": {}, "additional": {}}
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index c73aff7e..175561e4 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -220,6 +220,60 @@ def extract_portfolio_aggregation_data(
     return aggregation_data
 
 
+def create_epc_records(epc_searcher: SearchEpc, energy_assessment: dict):
+    """
+    This function will set up with epc_records dictionary with the newest EPC, the full SAP EPC and the older EPCs
+    and will factor in an energy assessment that we have performed for a client.
+    :param epc_searcher: An instance of the SearchEpc class
+    :param energy_assessment: The energy assessment we have performed. If we have not performed an energy assessment,
+                              this should be an empty response as defined by the models's
+                              EnergyAssessment.empty_response() method
+    """
+
+    if not energy_assessment["epc"]:
+        return {
+            'original_epc': epc_searcher.newest_epc.copy(),
+            'full_sap_epc': epc_searcher.full_sap_epc.copy(),
+            'old_data': epc_searcher.older_epcs.copy(),
+        }
+
+    epc = energy_assessment["epc"]
+    energy_assessment_date = epc["inspection-date"].strftime("%Y-%m-%d")
+
+    # We check if the energy assessment is newer than the newest EPC
+    if pd.to_datetime(energy_assessment_date) > pd.to_datetime(epc_searcher.newest_epc["inspection-date"]):
+        # In this case, our energy assessment is newer than the EPCs available for this property
+        return {
+            "original_epc": epc,
+            "full_sap_epc": epc_searcher.full_sap_epc.copy(),
+            "old_data": epc_searcher.older_epcs.copy() + [epc_searcher.newest_epc.copy()]
+        }
+
+    # We check if the EPC we have produced is contained in the set of EPCs done for the property
+    # We do this based on inspection-date and SAP
+    epc_in_historicals = [
+        x for x in epc_searcher.older_epcs + [epc_searcher.newest_epc]
+        if x["inspection-date"] == energy_assessment_date and
+                   x["current-energy-efficiency"] == epc["current-energy-efficiency"]
+    ]
+
+    if epc_in_historicals:
+        # Then the EPC we have produced is already in the set of EPCs, and our EPC is older than the newest
+        return {
+            "original_epc": epc_searcher.newest_epc.copy(),
+            "full_sap_epc": epc_searcher.full_sap_epc.copy(),
+            "old_data": epc_searcher.older_epcs.copy()
+        }
+
+    # In this case, our EPC is older than the newest publically avaible one, but is not contained in
+    # the historicals, so it can't have been lodged, so we include it in the old data
+    return {
+        'original_epc': epc_searcher.newest_epc.copy(),
+        'full_sap_epc': epc_searcher.full_sap_epc.copy(),
+        'old_data': epc_searcher.older_epcs.copy() + [epc],
+    }
+
+
 router = APIRouter(
     prefix="/plan",
     tags=["plan"],
@@ -285,7 +339,7 @@ async def trigger_plan(body: PlanTriggerRequest):
             epc_searcher.find_property(skip_os=True)
 
             # We check for an energy assessment we have performed on this property:
-            energy_assessment = get_latest_assessment_by_uprn(session, uprn)
+            energy_assessment = get_latest_assessment_by_uprn(session, uprn if uprn is not None else epc_searcher.uprn)
 
             # Create a record in db
             property_id, is_new = create_property(
@@ -302,32 +356,9 @@ async def trigger_plan(body: PlanTriggerRequest):
                 heat_demand_target=None
             )
 
-            epc_records = {
-                'original_epc': epc_searcher.newest_epc.copy(),
-                'full_sap_epc': epc_searcher.full_sap_epc.copy(),
-                'old_data': epc_searcher.older_epcs.copy(),
-            }
-
-            patch = next((
-                x for x in patches if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
-            ), {})
-            epc_records = patch_epc(patch, epc_records)
-
-            prepared_epc = EPCRecord(
-                epc_records=epc_records,
-                run_mode="newdata",
-                cleaning_data=cleaning_data
-            )
-
-            property_already_installed = next((
-                x for x in already_installed if
-                (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
-            ), {})
-
-            property_non_invasive_recommendations = next((
-                x for x in non_invasive_recommendations if
-                (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
-            ), {})
+            # If we have an energy assessment in place, that is newer than all of the previous EPCs, we use that.
+            # Otherwise, we use the newest EPC
+            epc_records = create_epc_records(epc_searcher, energy_assessment)
 
             input_properties.append(
                 Property(
diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py
index 90a51ae6..522cb899 100644
--- a/etl/xml_survey_extraction/XmlParser.py
+++ b/etl/xml_survey_extraction/XmlParser.py
@@ -72,6 +72,25 @@ class XmlParser:
 
     floor_dimensions = None
 
+    # The age band lookup is based on the country code
+    AGE_BAND_LOOKUP = {
+        # England & Wales
+        "EAW": {
+            "A": "England and Wales: before 1900",
+            "B": "England and Wales: 1900-1929",
+            "C": "England and Wales: 1930-1949",
+            "D": "England and Wales: 1950-1966",
+            "E": "England and Wales: 1967-1975",
+            "F": "England and Wales: 1976-1982",
+            "G": "England and Wales: 1983-1990",
+            "H": "England and Wales: 1991-1995",
+            "I": "England and Wales: 1996-2002",
+            "J": "England and Wales: 2003-2006",
+            "K": "England and Wales: 2007-2011",
+            "L": "England and Wales: 2012 onwards",
+        }
+    }
+
     RATINGS_MAP = {
         "0": "N/A",
         "1": "Very Poor",
@@ -205,7 +224,9 @@ class XmlParser:
             **self.get_sap(),
             **self.get_property_address(),
             "low-energy-fixed-light-count": self.get_node_value('Low-Energy-Fixed-Lighting-Outlets-Count'),
-            "construction-age-band": self.get_node_value('Construction-Age-Band'),
+            "construction-age-band": self.AGE_BAND_LOOKUP[
+                self.get_node_value('Country-Code')
+            ][self.get_node_value('Construction-Age-Band')],
             "mainheat-energy-eff": self.RATINGS_MAP[
                 self.get_property_summary_value('Main-Heating', 'Energy-Efficiency-Rating')
             ],

From 2c931b438367f63997760b56de3b64913727d530 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 26 Jul 2024 15:39:47 +0100
Subject: [PATCH 020/182] Updating logic for extracting heat loss perimeter and
 party walls from xml data

---
 backend/Property.py                         | 21 ++++++++++++---
 backend/app/db/models/energy_assessments.py |  6 ++---
 backend/app/plan/router.py                  | 24 ++++++++++++++++-
 etl/xml_survey_extraction/XmlParser.py      | 30 ++++++++++++---------
 etl/xml_survey_extraction/app.py            | 12 +++++++++
 5 files changed, 74 insertions(+), 19 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 4d5a93a7..4f508b9a 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -76,6 +76,7 @@ class Property:
         already_installed=None,
         non_invasive_recommendations=None,
         measures=None,
+        energy_assessment=None,
         **kwargs
     ):
 
@@ -178,6 +179,11 @@ class Property:
         self.recommendations_scoring_data = []
         self.simulation_epcs = {}
 
+        # This additional condition data should change how we pass kwargs to this. We should no longer need to pass
+        # kwargs to this class, but instead, we should pass the energy assessment condition data
+        self.energy_assessment_condition_data = energy_assessment["condition"]
+
+        # TODO: We keep this but only temporarily until we add bathrooms, bedrooms, building id to the condition data
         self.parse_kwargs(kwargs)
 
     @classmethod
@@ -188,6 +194,10 @@ class Property:
         :param kwargs:
         :return:
         """
+
+        # Note - none of this data is contained in an energy asssessment, but we should consider how this is done
+        # as we collect more data from the energy assessment
+
         n_bathrooms = kwargs.get("n_bathrooms", None)
         if n_bathrooms not in [None, ""]:
             # We add on a small value to ensure that the number of bathrooms is rounded up, in case the value is 0.5
@@ -1034,9 +1044,14 @@ class Property:
         # TODO: These functions should work on an EPCRecord object, so that the format is more standardised.
         #       They could also be added as attributes to the EPC Record
 
-        self.perimeter = estimate_perimeter(
-            self.floor_area / self.number_of_floors,
-            self.number_of_rooms / self.number_of_floors,
+        # Many of these pieces of information are now contained in the condition data
+        condition_data = self.energy_assessment_condition_data.copy()
+
+        self.perimeter = float(self.energy_assessment_condition_data["perimeter"]) \
+            if condition_data["perimeter"] is not None \
+            else estimate_perimeter(
+            floor_area=self.floor_area / self.number_of_floors,
+            num_rooms=self.number_of_rooms / self.number_of_floors
         )
 
         self.insulation_wall_area = estimate_external_wall_area(
diff --git a/backend/app/db/models/energy_assessments.py b/backend/app/db/models/energy_assessments.py
index efcbc26c..f89cccb7 100644
--- a/backend/app/db/models/energy_assessments.py
+++ b/backend/app/db/models/energy_assessments.py
@@ -150,13 +150,13 @@ class EnergyAssessment(Base):
 
         epc = {key.replace("_", "-"): getattr(self, key) for key in self.EPC_KEYS}
         # Get everything else
-        additional = {
+        condition = {
             column.name: getattr(self, column.name)
             for column in self.__table__.columns if column.name not in self.EPC_KEYS
         }
 
-        return {"epc": epc, "additional": additional}
+        return {"epc": epc, "condition": condition}
 
     @staticmethod
     def empty_response():
-        return {"epc": {}, "additional": {}}
+        return {"epc": {}, "condition": {}}
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 175561e4..2ed19880 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -360,6 +360,27 @@ async def trigger_plan(body: PlanTriggerRequest):
             # Otherwise, we use the newest EPC
             epc_records = create_epc_records(epc_searcher, energy_assessment)
 
+            patch = next((
+                x for x in patches if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
+            ), {})
+            epc_records = patch_epc(patch, epc_records)
+
+            prepared_epc = EPCRecord(
+                epc_records=epc_records,
+                run_mode="newdata",
+                cleaning_data=cleaning_data
+            )
+
+            property_already_installed = next((
+                x for x in already_installed if
+                (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
+            ), {})
+
+            property_non_invasive_recommendations = next((
+                x for x in non_invasive_recommendations if
+                (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
+            ), {})
+
             input_properties.append(
                 Property(
                     id=property_id,
@@ -368,7 +389,8 @@ async def trigger_plan(body: PlanTriggerRequest):
                     epc_record=prepared_epc,
                     already_installed=property_already_installed,
                     non_invasive_recommendations=property_non_invasive_recommendations,
-                    **Property.extract_kwargs(config)
+                    energy_assessment=energy_assessment,
+                    **Property.extract_kwargs(config),  # TODO: Depraecate this
                 )
             )
 
diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py
index 522cb899..3301b0be 100644
--- a/etl/xml_survey_extraction/XmlParser.py
+++ b/etl/xml_survey_extraction/XmlParser.py
@@ -645,19 +645,25 @@ class XmlParser:
         self.number_of_floors = len(
             [f for f in self.floor_dimensions if f["building_part_identifier"] == "Main Dwelling"]
         )
-        self.heat_loss_perimeter = max(
-            [
-                float(f["heat_loss_perimeter"]) for f in self.floor_dimensions
-                if f["building_part_identifier"] == "Main Dwelling" and not f["room_roof"]
-            ]
-        )
 
-        self.party_wall_length = max(
-            [
-                float(f["party_wall_length"]) for f in self.floor_dimensions
-                if f["building_part_identifier"] == "Main Dwelling" and not f["room_roof"]
-            ]
-        )
+        # We extract the maximum heat loss perimeter, per building part
+        max_heat_loss_perimeters = {d['building_part_identifier']: max(
+            (float(x['heat_loss_perimeter']) for x in self.floor_dimensions if
+             x['building_part_identifier'] == d['building_part_identifier'] and x['heat_loss_perimeter']),
+            default=float('-inf')
+        ) for d in self.floor_dimensions}
+
+        self.heat_loss_perimeter = sum(max_heat_loss_perimeters.values())
+
+        max_party_walls = {
+            d['building_part_identifier']: max(
+                (float(x['party_wall_length']) for x in self.floor_dimensions if
+                 x['building_part_identifier'] == d['building_part_identifier'] and x['party_wall_length']),
+                default=float('-inf')
+            ) for d in self.floor_dimensions
+        }
+
+        self.party_wall_length = sum(max_party_walls.values())
 
         self.perimeter = self.heat_loss_perimeter + self.party_wall_length
 
diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py
index beb47454..7f4e679c 100644
--- a/etl/xml_survey_extraction/app.py
+++ b/etl/xml_survey_extraction/app.py
@@ -48,6 +48,9 @@ def main():
     # TODO: IF we have many uploads, we can do them in a batch so we don't try and upload huge amounts of data to
     #       the database at onece
 
+    # TODO: We now have detailed information about primary and secondary walls, so we should use this information
+    #       in our recommendations when we have it
+
     # For each property, we download the xmls and extract the data
     database_data = []
     for uprn, xmls in assessments_map.items():
@@ -117,3 +120,12 @@ def main():
     #                          https://www.ncm-pcdb.org.uk/sap/download
     #       However retrieving this data is not a priority, so we can leave this for now as parsing the database
     #       is a non-trivial task
+
+    # TODO: The condition report contains additional data such as the number of bedrooms and the number of bathrooms
+    #       We can extract this data and store it in the database as well. We can then update our kwargs methodology
+    #       that is passed to the property class, where instead we store this additional data in our database (it could
+    #       be stored in the energy assessment table, or in a separate table) and then when we're passed additional data
+    #       we can query the database for this data and use it to update the property object, instead of storing it
+    #       in the asset list and pulling it out of the asset list
+    #       1) Bathrooms
+    #       2) Bedrooms

From beb09df342081ea358240efb3048935d39930874 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 26 Jul 2024 15:42:36 +0100
Subject: [PATCH 021/182] Making a note on the recommendations

---
 etl/xml_survey_extraction/app.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py
index 7f4e679c..edebbece 100644
--- a/etl/xml_survey_extraction/app.py
+++ b/etl/xml_survey_extraction/app.py
@@ -50,6 +50,11 @@ def main():
 
     # TODO: We now have detailed information about primary and secondary walls, so we should use this information
     #       in our recommendations when we have it
+    #       For example, for 77 Peryn Road, W3 7LT, the energy assessment has a main dwelling and two extensions, where
+    #       the physical dimensions and the fabric of each building is constructed in a way as if each building is
+    #       separate. We should use this information to make recommendations that are specific to each building
+    #       part, though the problem here is that while the fabric and dimensions are separate, the actual SAP, CO2, etc
+    #       figures span across the entire property.
 
     # For each property, we download the xmls and extract the data
     database_data = []

From 73b6fb2b70727532edec9d1a37e5210a27e23d8d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 26 Jul 2024 15:54:07 +0100
Subject: [PATCH 022/182] notes on extension recommendations

---
 etl/xml_survey_extraction/XmlParser.py | 5 ++++-
 etl/xml_survey_extraction/app.py       | 3 +++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py
index 3301b0be..8391314a 100644
--- a/etl/xml_survey_extraction/XmlParser.py
+++ b/etl/xml_survey_extraction/XmlParser.py
@@ -345,7 +345,10 @@ class XmlParser:
     def get_insulation_wall_area(self):
         """
         Extracts the insulation wall area for the main dwelling
-        :return:
+
+        Note that this doesn't include any extensions. We don't have recommendations for extensions right now, so we
+        don't currently calculate the insulation wall area for them, since it's not used in the recommendations.
+
         """
 
         main_dwelling_floors = [
diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py
index edebbece..9a813216 100644
--- a/etl/xml_survey_extraction/app.py
+++ b/etl/xml_survey_extraction/app.py
@@ -55,6 +55,9 @@ def main():
     #       separate. We should use this information to make recommendations that are specific to each building
     #       part, though the problem here is that while the fabric and dimensions are separate, the actual SAP, CO2, etc
     #       figures span across the entire property.
+    #       Idea: We can collect all of this information by building part and store it separately in the database
+    #             against the uprn. We can have key data for the EPC, but then also additional data for each building
+    #             part. We can then use this data to make recommendations that are specific to each building part
 
     # For each property, we download the xmls and extract the data
     database_data = []

From 24508b2a84cbbcb33bf5f7feff5ba217d69fe3b1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 26 Jul 2024 16:41:32 +0100
Subject: [PATCH 023/182] added condition data to router

---
 backend/Property.py                         | 56 +++++++++++++++------
 backend/app/db/models/energy_assessments.py |  3 ++
 backend/app/plan/router.py                  |  2 +
 etl/bill_savings/EnergyConsumptionModel.py  |  1 +
 etl/xml_survey_extraction/XmlParser.py      | 13 +++++
 etl/xml_survey_extraction/app.py            |  5 ++
 recommendations/WindowsRecommendations.py   |  4 ++
 7 files changed, 68 insertions(+), 16 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 4f508b9a..6365bb0b 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -166,6 +166,7 @@ class Property:
         )
         self.floor_level = None
         self.number_of_windows = None
+        self.windows_area = None
         self.solar_pv_percentage = None
 
         self.current_adjusted_energy = None
@@ -707,17 +708,20 @@ class Property:
         # Today's costs
         todays_heating_cost = energy_consumption_client.convert_cost_to_today(
             original_cost=float(self.data["heating-cost-current"]),
-            lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"])
+            lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"]).tz_localize(None)
         )
         todays_hot_water_cost = energy_consumption_client.convert_cost_to_today(
             original_cost=float(self.data["hot-water-cost-current"]),
-            lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"])
+            lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"]).tz_localize(None)
         )
         todays_lighting_cost = energy_consumption_client.convert_cost_to_today(
             original_cost=float(self.data["lighting-cost-current"]),
-            lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"])
+            lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"]).tz_localize(None)
         )
 
+        # If we have the kwh figures, we don't need to predict them
+        condition_data = self.energy_assessment_condition_data.copy()
+
         scoring_df = pd.DataFrame([self.epc_record.prepared_epc])
         # Change columns from underscores to hyphens
         scoring_df.columns = [
@@ -727,13 +731,20 @@ class Property:
             scoring_df[col] = None
 
         energy_consumption_client.data = None
-        heating_prediction = energy_consumption_client.score_new_data(
-            new_data=scoring_df, target="heating_kwh"
-        )[0]
 
-        hot_water_prediction = energy_consumption_client.score_new_data(
-            new_data=scoring_df, target="hot_water_kwh"
-        )[0]
+        heating_prediction = (
+            float(condition_data["space_heating_kwh"]) if condition_data["space_heating_kwh"]
+            else energy_consumption_client.score_new_data(
+                new_data=scoring_df, target="heating_kwh"
+            )[0]
+        )
+
+        hot_water_prediction = (
+            float(condition_data["water_heating_kwh"]) if condition_data["water_heating_kwh"]
+            else energy_consumption_client.score_new_data(
+                new_data=scoring_df, target="hot_water_kwh"
+            )[0]
+        )
 
         # We convert the lighting cost into kwh, just using the price cap
         lighting_kwh = float(self.data["lighting-cost-current"]) / AnnualBillSavings.ELECTRICITY_PRICE_CAP
@@ -1040,13 +1051,14 @@ class Property:
         medians across the EPC data
         :return:
         """
-
-        # TODO: These functions should work on an EPCRecord object, so that the format is more standardised.
-        #       They could also be added as attributes to the EPC Record
-
         # Many of these pieces of information are now contained in the condition data
         condition_data = self.energy_assessment_condition_data.copy()
 
+        # We can update the number of floors if we have this information in the condition data
+        self.number_of_floors = int(self.energy_assessment_condition_data["number_of_floors"]) \
+            if condition_data["number_of_floors"] is not None \
+            else self.number_of_floors
+
         self.perimeter = float(self.energy_assessment_condition_data["perimeter"]) \
             if condition_data["perimeter"] is not None \
             else estimate_perimeter(
@@ -1054,14 +1066,18 @@ class Property:
             num_rooms=self.number_of_rooms / self.number_of_floors
         )
 
-        self.insulation_wall_area = estimate_external_wall_area(
+        self.insulation_wall_area = float(self.energy_assessment_condition_data["insulation_wall_area"]) \
+            if condition_data["insulation_wall_area"] is not None \
+            else estimate_external_wall_area(
             num_floors=self.number_of_floors,
             floor_height=self.floor_height,
             perimeter=self.perimeter,
             built_form=self.data["built-form"],
         )
 
-        self.insulation_floor_area = self.floor_area / self.number_of_floors
+        self.insulation_floor_area = float(self.energy_assessment_condition_data["main_dwelling_ground_floor_area"]) \
+            if condition_data["main_dwelling_ground_floor_area"] is not None \
+            else self.floor_area / self.number_of_floors
 
         self.pitched_roof_area = esimtate_pitched_roof_area(
             floor_area=self.insulation_floor_area, floor_height=self.floor_height
@@ -1163,7 +1179,11 @@ class Property:
         :return:
         """
 
-        self.number_of_windows = estimate_windows(
+        condition_data = self.energy_assessment_condition_data.copy()
+
+        self.number_of_windows = int(condition_data["number_of_windows"]) \
+            if condition_data["number_of_windows"] is not None \
+            else estimate_windows(
             property_type=self.data["property-type"],
             built_form=self.data["built-form"],
             construction_age_band=self.construction_age_band,
@@ -1171,6 +1191,10 @@ class Property:
             number_habitable_rooms=self.number_of_rooms,
         )
 
+        self.windows_area = float(condition_data["windows_area"]) \
+            if condition_data["windows_area"] is not None \
+            else None
+
     def set_solar_panel_area(self, photo_supply_lookup, floor_area_decile_thresholds):
         """
         Sets the approximate area of the solar panels
diff --git a/backend/app/db/models/energy_assessments.py b/backend/app/db/models/energy_assessments.py
index f89cccb7..2c3cc144 100644
--- a/backend/app/db/models/energy_assessments.py
+++ b/backend/app/db/models/energy_assessments.py
@@ -119,6 +119,9 @@ class EnergyAssessment(Base):
     cylinder_insulation_type = Column(Text)
     cylinder_insulation_thickness = Column(Integer)
     cylinder_thermostat = Column(Boolean)
+    main_dwelling_ground_floor_area = Column(Float)
+    number_of_windows = Column(Integer)
+    windows_area = Column(Float)
 
     EPC_KEYS = [
         'low_energy_fixed_light_count', 'address', 'uprn_source', 'floor_height', 'heating_cost_potential',
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 2ed19880..e76d4430 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -515,6 +515,8 @@ async def trigger_plan(body: PlanTriggerRequest):
             #     )
             print("Implement me")
 
+            # TODO: We can set the pitched roof area based on the results of the solar api!
+
         logger.info("Getting components and epc recommendations")
         recommendations = {}
         recommendations_scoring_data = []
diff --git a/etl/bill_savings/EnergyConsumptionModel.py b/etl/bill_savings/EnergyConsumptionModel.py
index 9a7d6523..dfb0e574 100644
--- a/etl/bill_savings/EnergyConsumptionModel.py
+++ b/etl/bill_savings/EnergyConsumptionModel.py
@@ -102,6 +102,7 @@ class EnergyConsumptionModel:
             # We also retrieve the newest retail price comparison data which comes from Ofgem:
             # https://www.ofgem.gov.uk/energy-data-and-research/data-portal/retail-market-indicators
             # We use the detail price comparison by company and tariff type data
+            print("Reading retail price comparison - make sure this is up-to-date")
             self.read_retail_price_comparison()
 
     def read_retail_price_comparison(self):
diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py
index 8391314a..0bc3d56b 100644
--- a/etl/xml_survey_extraction/XmlParser.py
+++ b/etl/xml_survey_extraction/XmlParser.py
@@ -366,6 +366,16 @@ class XmlParser:
 
         self.insulation_wall_area = self.get_insulation_wall_area()
 
+        # We pull this out which is used as the insulation floor area
+        main_dwelling_ground_floor_area = [
+            f for f in self.floor_dimensions if f["building_part_identifier"] == "Main Dwelling" and f["floor"] == "0"
+        ][0]["total_floor_area"]
+
+        main_dwelling_windows = [w for w in self.windows if w["window_location"] == "0"]
+
+        number_of_windows = len(main_dwelling_windows)
+        windows_area = sum([float(w["window_area"]) for w in main_dwelling_windows])
+
         boolean_lookup = {
             "true": True,
             "false": False,
@@ -400,6 +410,9 @@ class XmlParser:
             "cylinder_insulation_type": cylinder_insulation_type[self.get_node_value('Cylinder-Insulation-Type')],
             "cylinder_insulation_thickness": int(self.get_node_value('Cylinder-Insulation-Thickness')),
             "cylinder_thermostat": boolean_lookup[self.get_node_value('Cylinder-Thermostat')],
+            "main_dwelling_ground_floor_area": float(main_dwelling_ground_floor_area),
+            "number_of_windows": int(number_of_windows),
+            "windows_area": float(windows_area),
         }
 
     def get_node_value(self, tag_name):
diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py
index 9a813216..c4f6091f 100644
--- a/etl/xml_survey_extraction/app.py
+++ b/etl/xml_survey_extraction/app.py
@@ -58,6 +58,11 @@ def main():
     #       Idea: We can collect all of this information by building part and store it separately in the database
     #             against the uprn. We can have key data for the EPC, but then also additional data for each building
     #             part. We can then use this data to make recommendations that are specific to each building part
+    #       We should probably re-think this data model, so we break up the data in a more considered fasion and produce
+    #       the underlying EPC data as a summary of the building parts. Not only do we have data against the main
+    #       dwelling and extensions, but we also have multiple windows with individiaul pieces of information that
+    #       we can use to make recommendations. We should store this data in a way that we can easily access it and
+    #       use it to make recommendations (e.g. we should have a Windows table)
 
     # For each property, we download the xmls and extract the data
     database_data = []
diff --git a/recommendations/WindowsRecommendations.py b/recommendations/WindowsRecommendations.py
index 29c75989..9a30cd2e 100644
--- a/recommendations/WindowsRecommendations.py
+++ b/recommendations/WindowsRecommendations.py
@@ -48,10 +48,14 @@ class WindowsRecommendations:
         is_secondary_glazing = self.property.restricted_measures or (
             self.property.windows["glazing_type"] == "secondary"
         )
+        windows_area = self.property.windows_area
 
         if not number_of_windows:
             raise ValueError("Number of windows not specified")
 
+        if windows_area is not None:
+            raise Exception("We have windows area, we should use this data for our recommendations!!!")
+
         if self.property.windows["has_glazing"] & (
             self.property.windows["glazing_coverage"] == "full"
         ):

From 971a74017e97f0699138b8712aaa3c64cbf160b6 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 26 Jul 2024 17:50:29 +0100
Subject: [PATCH 024/182] working on unit level solar api integration - need to
 make adjusted to the energy consumtpion

---
 backend/app/plan/router.py | 46 ++++++++++++++++++++++++++++++++------
 1 file changed, 39 insertions(+), 7 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index e76d4430..4796cd9f 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -434,9 +434,11 @@ async def trigger_plan(body: PlanTriggerRequest):
                 "longitude": p.spatial["longitude"],
                 "latitude": p.spatial["latitude"],
                 # Energy consumption is adjusted for the property's expected post retrofit state
+                # We set the target rating to EPC C, which is the typical EPC rating we would expect the
+                # property to achieve post retrofit of just the fabric
                 "energy_consumption": energy_consumption_client.estimate_new_consumption(
                     current_rating=p.data["current-energy-rating"],
-                    target_rating=body.goal_value,
+                    target_rating="C",
                     current_consumption=p.current_adjusted_energy
                 ),
                 "property_id": p.id,
@@ -507,12 +509,42 @@ async def trigger_plan(body: PlanTriggerRequest):
                         p.set_solar_panel_configuration(unit_solar_panel_configuration)
 
         else:
-            # # Model the solar potential at the property level
-            # for p in input_properties:
-            #     # TODO: Complete me! - we probably won't do this for individual flats
-            #     solar_performance = solar_api_client.get(
-            #         longitude=p.spatial["longitude"], latitude=p.spatial["latitude"]
-            #     )
+            # Model the solar potential at the property level
+            for p in input_properties:
+                # TODO: Complete me! - we probably won't do this for individual flats - IGNORE FLATS FROM THIS WITHOUT
+                #       BUILDING IDS
+
+                # if the property is already very close to an EPC C, we don't adjust the energy consumption based on
+                # expected movement to EPC C.
+                # To extend this, what we could do is adjust the based on the expected movement from the current SAP
+                # rating to the target SAP rating (ie 69C)
+                # TODO: Update this!
+                energy_consumption = energy_consumption_client.estimate_new_consumption(
+                    current_rating=p.data["current-energy-rating"],
+                    target_rating="C",
+                    current_consumption=p.current_adjusted_energy
+                )
+
+                # TODO: Should energy_consumption to adjusted to just electricity requirement?
+                # We should align our calculation of required energy consumption with expectations around decarbonising
+                # heating and hot water, so worse case we should take just the electrical consumption of the property
+                # if the property is current using gas for heating and hot water, then we should adjust the kwh demand
+                # to reflect the 200-400% efficiency of an ASHP with electrified heating, so that the solar panel can
+                # cover heating generation. While
+                # If the main fuel is electricity (not community) then we don't need to change the kwh demand, if it's
+                # gas we should adjust on the suitability of an ashp!
+
+                solar_performance = solar_api_client.get(
+                    longitude=p.spatial["longitude"],
+                    latitude=p.spatial["latitude"],
+                    energy_consumption=energy_consumption,
+                    is_building=False,
+                    session=session,
+                    uprn=p.uprn
+                )
+
+                # TODO: Insert the pitched roof area into the property class as we store the solar performance
+                #       in the property class
             print("Implement me")
 
             # TODO: We can set the pitched roof area based on the results of the solar api!

From a2a5094b01a93ef73f68e546549303ea320706c6 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 27 Jul 2024 22:37:13 +0100
Subject: [PATCH 025/182] working on land registry matches

---
 etl/customers/goldman/property_ownership.py | 162 +++++++++++++++++++-
 1 file changed, 156 insertions(+), 6 deletions(-)

diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py
index 1b1cf014..7958e93b 100644
--- a/etl/customers/goldman/property_ownership.py
+++ b/etl/customers/goldman/property_ownership.py
@@ -345,9 +345,6 @@ def app():
         company_ownership["Postcode"].str.lower().isin(properties["POSTCODE"].str.lower().unique())
     ]
 
-    # Read in land registry
-    land_registry = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/land_registry_prices_paid_filtered.csv")
-
     # Now we filter properties the other way around
     properties = properties[properties["POSTCODE"].str.lower().isin(company_ownership["Postcode"].str.lower().unique())]
     # We end up with 7.4k entires on a postcode match, however we need to now do a direct address match
@@ -485,14 +482,167 @@ def app():
     # leasehold_matching_lookup = remove_duplicate_matches(leasehold_matching_lookup, properties, company_ownership)
 
     matched_addresses = combined_matching_lookup.merge(
-        properties[["UPRN", "ADDRESS", "CURRENT_ENERGY_EFFICIENCY", "CURRENT_ENERGY_RATING"]].rename(
-            columns={"ADDRESS": "epc_address"}),
+        properties[
+            [
+                "UPRN",
+                "ADDRESS",
+                "ADDRESS1",
+                "CURRENT_ENERGY_EFFICIENCY",
+                "CURRENT_ENERGY_RATING",
+                "POSTCODE"
+            ]
+        ].rename(
+            columns={
+                "ADDRESS": "epc_address",
+                "ADDRESS1": "epc_address1",
+                "POSTCODE": "epc_postcode"
+            }
+        ),
         how="left", on="UPRN"
     ).merge(
-        company_ownership[["Title Number", "Property Address", "Company Registration No. (1)", "Proprietor Name (1)"]],
+        company_ownership[
+            [
+                "Title Number",
+                "Property Address",
+                "Postcode",
+                "Company Registration No. (1)",
+                "Proprietor Name (1)",
+
+            ]
+        ],
         how="left", on="Title Number"
     )
 
+    # Let's try and get the house number
+    matched_addresses["house_number"] = (
+        matched_addresses["epc_address"]
+        .apply(remove_text_in_brackets)
+        .apply(SearchEpc.get_house_number)
+        .str.lower()
+        .str.replace(",", "")
+    )
+
+    # Read in land registry
+    land_registry = pd.read_csv(
+        "/Users/khalimconn-kowlessar/Downloads/land_registry_prices_paid_filtered.csv",
+    )
+
+    # We now perform a match between the land registry data and the matched address, in an attempt to find
+    # out when these properties last sold. The land registry data has been pre filtered on the postcodes in this
+    # data, and for sales within the last 5 years, to ensure the file isn't too large.
+
+    land_registry["postcode"] = land_registry["postcode"].str.lower().str.strip()
+    land_registry["street"] = land_registry["street"].str.lower().str.strip()
+    land_registry["paon"] = land_registry["paon"].str.lower().str.strip()
+    land_registry["date_of_transfer"] = pd.to_datetime(land_registry["date_of_transfer"])
+
+    def is_substring(x, match_string):
+
+        if pd.isnull(x):
+            return False
+
+        return x in match_string.lower()
+
+    def house_number_match(paon, house_number):
+        # Firstly try and convert to numberic
+        try:
+            paon_numeric = int(paon)
+            house_number_numeric = int(house_number)
+            return paon_numeric == house_number_numeric
+        except Exception as e:  # noqa
+            # If we can't convert both to numeric, we do an equality
+
+            return paon == house_number
+
+    def check_equalities(lr_filtered):
+        all_paon_equal = all(lr_filtered["paon"] == lr_filtered["paon"].values[0])
+        if pd.isnull(lr_filtered["saon"].values[0]):
+            all_saon_equal = all(pd.isnull(lr_filtered["saon"]))
+        else:
+            all_saon_equal = all(lr_filtered["saon"] == lr_filtered["saon"].values[0])
+
+        all_street_equal = all(lr_filtered["street"] == lr_filtered["street"].values[0])
+
+        return all_paon_equal, all_saon_equal, all_street_equal
+
+    land_registry_matches = []
+    for _, match in tqdm(matched_addresses.iterrows(), total=len(matched_addresses)):
+
+        # Filter land registry on the postcode
+        lr_filtered = land_registry[
+            (land_registry["postcode"] == match["epc_postcode"].lower().strip())
+        ]
+
+        # Filter further, when the street is in in the address
+        # street should be contained in epc_address
+        lr_filtered = lr_filtered[
+            lr_filtered["street"].apply(lambda x: is_substring(x, match["epc_address"].lower()))
+        ]
+
+        if lr_filtered.empty:
+            continue
+
+        # We now check if paon is in address 1
+        lr_filtered["paon_match"] = lr_filtered["paon"].apply(lambda x: house_number_match(x, match["house_number"]))
+        # We also try the secondary match
+        lr_filtered["saon_match"] = lr_filtered["saon"].apply(
+            lambda x: False if pd.isnull(x) else is_substring(x, match["epc_address1"])
+        )
+
+        # We fileter where we have a primary or secondary match
+        lr_filtered = lr_filtered[
+            lr_filtered["paon_match"] | lr_filtered["saon_match"]
+            ]
+
+        if lr_filtered.empty:
+            continue
+        elif lr_filtered.shape[0] == 1:
+            land_registry_matches.append(
+                {
+                    "transaction_id": lr_filtered['transaction_id'].values[0],
+                    "price": lr_filtered["price"].values[0],
+                    "date_of_transfer": lr_filtered["date_of_transfer"].values[0],
+                }
+            )
+            continue
+        elif lr_filtered.shape[0] > 1:
+            # We make sure all records are the same and take the newest
+            all_paon_equal, all_saon_equal, all_street_equal = check_equalities(lr_filtered)
+            has_paon_match = any(lr_filtered["paon_match"])
+
+            if all_paon_equal and all_street_equal and all_saon_equal:
+                # Take the newest record, append and continue
+                lr_filtered = lr_filtered.sort_values("date_of_transfer", ascending=False)
+                lr_filtered = lr_filtered.head(1)
+                land_registry_matches.append(
+                    {
+                        "transaction_id": lr_filtered['transaction_id'].values[0],
+                        "price": lr_filtered["price"].values[0],
+                        "date_of_transfer": lr_filtered["date_of_transfer"].values[0],
+                    }
+                )
+            elif has_paon_match and all_street_equal:
+                # Peform filter on paon
+                lr_filtered = lr_filtered[lr_filtered["paon_match"]]
+                # Do an addtiioanl equality check
+                all_paon_equal, all_saon_equal, all_street_equal = check_equalities(lr_filtered)
+                if all_paon_equal and all_street_equal and all_saon_equal:
+                    lr_filtered = lr_filtered.sort_values("date_of_transfer", ascending=False)
+                    lr_filtered = lr_filtered.head(1)
+                    land_registry_matches.append(
+                        {
+                            "transaction_id": lr_filtered['transaction_id'].values[0],
+                            "price": lr_filtered["price"].values[0],
+                            "date_of_transfer": lr_filtered["date_of_transfer"].values[0],
+                        }
+                    )
+                else:
+                    raise NotImplementedError("wtf")
+            else:
+                raise NotImplementedError("wtf")
+        else:
+            raise NotImplementedError("What happened here?")
+
     # shared_freehold_match = pd.DataFrame(shared_freehold_match)
     # Strore these files
     # freehold_matching_lookup.to_excel("freehold_matching_lookup.xlsx")

From 2174a85a8bc79bd696e1b814c81b7d609d45b680 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 28 Jul 2024 15:21:05 +0100
Subject: [PATCH 026/182] adding to land registry matching logic

---
 etl/customers/goldman/property_ownership.py | 111 +++++++++++++++++---
 1 file changed, 94 insertions(+), 17 deletions(-)

diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py
index 7958e93b..f1f0de38 100644
--- a/etl/customers/goldman/property_ownership.py
+++ b/etl/customers/goldman/property_ownership.py
@@ -357,6 +357,8 @@ def app():
     properties = properties.sort_values("LODGEMENT_DATE", ascending=False).drop_duplicates("UPRN")
 
     # TODO: Do we want to filter properties based on lodgement dates?
+    #       E.g. we might want to filter properties that have had a sale EPC lodged in the last x months, because
+    #       this could be indicative of a sale happening, and the land registry data may not have caught up yet
 
     # Remove entries where the address begins with the term "land adjoining", or other records that don't reference the
     # the property itself
@@ -456,13 +458,9 @@ def app():
 
     freehold_matching_lookup = pd.DataFrame(freehold_matching_lookup)
     leasehold_matching_lookup = pd.DataFrame(leasehold_matching_lookup)
-    # shared_leasehold_match = pd.concat(shared_leasehold_match)
-    # shared_freehold_match = pd.concat(shared_freehold_match)
 
-    # freehold_matching_lookup.to_excel("freehold_matching_lookup_new.xlsx")
-    # leasehold_matching_lookup.to_excel("leasehold_matching_lookup_new.xlsx")
-    # shared_leasehold_match.to_excel("shared_leasehold_match_new.xlsx")
-    # shared_freehold_match.to_excel("shared_freehold_match_new.xlsx")
+    # freehold_matching_lookup.to_excel("freehold_matching_lookup V2.xlsx")
+    # leasehold_matching_lookup.to_excel("leasehold_matching_lookup V2.xlsx")
 
     # The approximate matches aren't very good
     freehold_matching_lookup = freehold_matching_lookup[freehold_matching_lookup["match_type"] == "exact"]
@@ -477,10 +475,6 @@ def app():
     # We also have duplicates at a UPRN level
     combined_matching_lookup = remove_duplicate_uprn_matches(combined_matching_lookup, properties, company_ownership)
 
-    # There are some cases where we have duplicates
-    # freehold_matching_lookup = remove_duplicate_matches(freehold_matching_lookup, properties, company_ownership)
-    # leasehold_matching_lookup = remove_duplicate_matches(leasehold_matching_lookup, properties, company_ownership)
-
     matched_addresses = combined_matching_lookup.merge(
         properties[
             [
@@ -534,6 +528,7 @@ def app():
     land_registry["postcode"] = land_registry["postcode"].str.lower().str.strip()
     land_registry["street"] = land_registry["street"].str.lower().str.strip()
     land_registry["paon"] = land_registry["paon"].str.lower().str.strip()
+    land_registry["saon"] = land_registry["saon"].str.lower().str.strip()
     land_registry["date_of_transfer"] = pd.to_datetime(land_registry["date_of_transfer"])
 
     def is_substring(x, match_string):
@@ -576,8 +571,9 @@ def app():
         # Filter further, when the street is in in the address
         # street should be contained in epc_address
         lr_filtered = lr_filtered[
-            lr_filtered["street"].apply(lambda x: is_substring(x, match["epc_address"].lower()))
-        ]
+            lr_filtered["street"].apply(lambda x: is_substring(x, match["epc_address"].lower())) |
+            lr_filtered["street"].apply(lambda x: is_substring(x, match["Property Address"].lower()))
+            ]
 
         if lr_filtered.empty:
             continue
@@ -585,10 +581,11 @@ def app():
         # We now check if paon is in address 1
         lr_filtered["paon_match"] = lr_filtered["paon"].apply(lambda x: house_number_match(x, match["house_number"]))
         # We also try the secondary match
-        lr_filtered["saon_match"] = lr_filtered["saon"].apply(
-            lambda x: False if pd.isnull(x) else is_substring(x, match["epc_address1"])
+        lr_filtered["saon_match"] = (
+            lr_filtered["saon"].apply(
+                lambda x: False if pd.isnull(x) else is_substring(x, match["epc_address1"])
+            )
         )
-
         # We fileter where we have a primary or secondary match
         lr_filtered = lr_filtered[
             lr_filtered["paon_match"] | lr_filtered["saon_match"]
@@ -599,6 +596,7 @@ def app():
         elif lr_filtered.shape[0] == 1:
             land_registry_matches.append(
                 {
+                    "uprn": match["UPRN"],
                     "transaction_id": lr_filtered['transaction_id'].values[0],
                     "price": lr_filtered["price"].values[0],
                     "date_of_transfer": lr_filtered["date_of_transfer"].values[0],
@@ -616,11 +614,13 @@ def app():
                 lr_filtered = lr_filtered.head(1)
                 land_registry_matches.append(
                     {
+                        "uprn": match["UPRN"],
                         "transaction_id": lr_filtered['transaction_id'].values[0],
                         "price": lr_filtered["price"].values[0],
                         "date_of_transfer": lr_filtered["date_of_transfer"].values[0],
                     }
                 )
+                continue
             elif has_paon_match and all_street_equal:
                 # Peform filter on paon
                 lr_filtered = lr_filtered[lr_filtered["paon_match"]]
@@ -631,15 +631,92 @@ def app():
                     lr_filtered = lr_filtered.head(1)
                     land_registry_matches.append(
                         {
+                            "uprn": match["UPRN"],
                             "transaction_id": lr_filtered['transaction_id'].values[0],
                             "price": lr_filtered["price"].values[0],
                             "date_of_transfer": lr_filtered["date_of_transfer"].values[0],
                         }
                     )
                 else:
-                    raise NotImplementedError("wtf")
+                    # We do a match on saon
+                    lr_filtered["saon_match2"] = lr_filtered["saon"].apply(
+                        lambda x: False if pd.isnull(x) else is_substring(x, match["epc_address"])
+                    )
+
+                    lr_filtered = lr_filtered[lr_filtered["saon_match2"]]
+
+                    if lr_filtered.empty:
+                        continue
+                    elif lr_filtered.shape[0] == 1:
+                        land_registry_matches.append(
+                            {
+                                "uprn": match["UPRN"],
+                                "transaction_id": lr_filtered['transaction_id'].values[0],
+                                "price": lr_filtered["price"].values[0],
+                                "date_of_transfer": lr_filtered["date_of_transfer"].values[0],
+                            }
+                        )
+                        continue
+                    else:
+                        raise NotImplementedError("wtf")
             else:
-                raise NotImplementedError("wtf")
+                # We have a final check, based on an observed case
+                lr_address_1 = " ".join([x.lower().strip() for x in match["Property Address"].split(",")[0:2]])
+
+                lr_filtered["paon_match2"] = lr_filtered["paon"].apply(
+                    lambda x: False if pd.isnull(x) else is_substring(x, lr_address_1)
+                )
+
+                lr_filtered = lr_filtered[lr_filtered["paon_match2"]]
+
+                if lr_filtered.empty:
+                    continue
+                elif lr_filtered.shape[0] == 1:
+                    land_registry_matches.append(
+                        {
+                            "uprn": match["UPRN"],
+                            "transaction_id": lr_filtered['transaction_id'].values[0],
+                            "price": lr_filtered["price"].values[0],
+                            "date_of_transfer": lr_filtered["date_of_transfer"].values[0],
+                        }
+                    )
+                    continue
+                else:
+                    # Check all the same
+                    all_paon_equal, all_saon_equal, all_street_equal = check_equalities(lr_filtered)
+
+                    # Check saon is house number with exact match
+                    lr_filtered["saon_match2"] = lr_filtered["saon"].apply(
+                        lambda x: False if pd.isnull(x) else house_number_match(x, match["house_number"])
+                    )
+
+                    if all_paon_equal and all_saon_equal and all_street_equal:
+                        # Take the newest record
+                        lr_filtered = lr_filtered.sort_values("date_of_transfer", ascending=False)
+                        lr_filtered = lr_filtered.head(1)
+                        land_registry_matches.append(
+                            {
+                                "uprn": match["UPRN"],
+                                "transaction_id": lr_filtered['transaction_id'].values[0],
+                                "price": lr_filtered["price"].values[0],
+                                "date_of_transfer": lr_filtered["date_of_transfer"].values[0],
+                            }
+                        )
+                        continue
+                    elif any(lr_filtered["saon_match2"]):
+                        lr_filtered = lr_filtered[lr_filtered["saon_match2"]]
+                        if lr_filtered.shape[0] == 1:
+                            land_registry_matches.append(
+                                {
+                                    "uprn": match["UPRN"],
+                                    "transaction_id": lr_filtered['transaction_id'].values[0],
+                                    "price": lr_filtered["price"].values[0],
+                                    "date_of_transfer": lr_filtered["date_of_transfer"].values[0],
+                                }
+                            )
+                            continue
+
+                    raise NotImplementedError("wtf")
         else:
             raise NotImplementedError("What happened here?")
 

From b63ab89e8973b4e97ec7a8d17b37b887f4f0b270 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 28 Jul 2024 15:33:57 +0100
Subject: [PATCH 027/182] adding new logic to land registry match

---
 etl/customers/goldman/property_ownership.py | 26 +++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py
index f1f0de38..71c53a74 100644
--- a/etl/customers/goldman/property_ownership.py
+++ b/etl/customers/goldman/property_ownership.py
@@ -689,6 +689,15 @@ def app():
                     lr_filtered["saon_match2"] = lr_filtered["saon"].apply(
                         lambda x: False if pd.isnull(x) else house_number_match(x, match["house_number"])
                     )
+                    # We check if we have a flat
+                    match_flat_number = re.match("flat (\d+)", match["epc_address1"].lower())
+                    lr_filtered["saon_match3"] = False
+                    if match_flat_number is not None:
+                        # Get out the match
+                        match_flat_number = "flat " + match_flat_number.group(1)
+                        lr_filtered["saon_match3"] = lr_filtered["saon"].apply(
+                            lambda x: False if pd.isnull(x) else x == match_flat_number
+                        )
 
                     if all_paon_equal and all_saon_equal and all_street_equal:
                         # Take the newest record
@@ -705,6 +714,23 @@ def app():
                         continue
                     elif any(lr_filtered["saon_match2"]):
                         lr_filtered = lr_filtered[lr_filtered["saon_match2"]]
+                        all_saon_equal, all_paon_equal, all_street_equal = check_equalities(lr_filtered)
+                        if all_paon_equal and all_saon_equal and all_street_equal:
+                            # Filter on the newest record
+                            lr_filtered = lr_filtered.sort_values("date_of_transfer", ascending=False)
+                            lr_filtered = lr_filtered.head(1)
+                        if lr_filtered.shape[0] == 1:
+                            land_registry_matches.append(
+                                {
+                                    "uprn": match["UPRN"],
+                                    "transaction_id": lr_filtered['transaction_id'].values[0],
+                                    "price": lr_filtered["price"].values[0],
+                                    "date_of_transfer": lr_filtered["date_of_transfer"].values[0],
+                                }
+                            )
+                            continue
+                    elif any(lr_filtered["saon_match3"]):
+                        lr_filtered = lr_filtered[lr_filtered["saon_match3"]]
                         if lr_filtered.shape[0] == 1:
                             land_registry_matches.append(
                                 {

From e85936ae97f61b2fc64a41cbf1fd2435927832d9 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 28 Jul 2024 15:36:48 +0100
Subject: [PATCH 028/182] apartment string match

---
 etl/customers/goldman/property_ownership.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py
index 71c53a74..5d39f139 100644
--- a/etl/customers/goldman/property_ownership.py
+++ b/etl/customers/goldman/property_ownership.py
@@ -691,6 +691,7 @@ def app():
                     )
                     # We check if we have a flat
                     match_flat_number = re.match("flat (\d+)", match["epc_address1"].lower())
+                    match_apartment_number = re.match("apartment (\d+)", match["epc_address1"].lower())
                     lr_filtered["saon_match3"] = False
                     if match_flat_number is not None:
                         # Get out the match
@@ -699,6 +700,13 @@ def app():
                             lambda x: False if pd.isnull(x) else x == match_flat_number
                         )
 
+                    if match_apartment_number is not None:
+                        # Get out the match
+                        match_apartment_number = "apartment " + match_apartment_number.group(1)
+                        lr_filtered["saon_match3"] = lr_filtered["saon"].apply(
+                            lambda x: False if pd.isnull(x) else x == match_apartment_number
+                        )
+
                     if all_paon_equal and all_saon_equal and all_street_equal:
                         # Take the newest record
                         lr_filtered = lr_filtered.sort_values("date_of_transfer", ascending=False)

From d07e54ce8829408722084023281b9b50fc455da3 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sun, 28 Jul 2024 16:01:49 +0100
Subject: [PATCH 029/182] merged

---
 etl/customers/goldman/property_ownership.py | 56 ++++++++++++++++++++-
 1 file changed, 54 insertions(+), 2 deletions(-)

diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py
index 5d39f139..1c828566 100644
--- a/etl/customers/goldman/property_ownership.py
+++ b/etl/customers/goldman/property_ownership.py
@@ -754,6 +754,39 @@ def app():
         else:
             raise NotImplementedError("What happened here?")
 
+    land_registry_matches = pd.DataFrame(land_registry_matches)
+    # land_registry_matches.to_excel("land_registry_matches.xlsx")
+
+    # Check the matches against the addresses
+    # lr_to_addresses = matched_addresses[
+    #     ["UPRN", "epc_address", "epc_postcode", "Property Address", "Postcode"]
+    # ].merge(
+    #     land_registry_matches,
+    #     how="inner",
+    #     left_on="UPRN",
+    #     right_on="uprn"
+    # ).drop(columns=["uprn"]).merge(
+    #     land_registry[["transaction_id", "paon", "saon", "street", "postcode"]],
+    #     how="left", on="transaction_id"
+    # )
+
+    # Merge onto matched addresses
+    matched_addresses = matched_addresses.merge(
+        land_registry_matches,
+        how="left",
+        left_on="UPRN",
+        right_on="uprn"
+    ).drop(columns=["uprn"])
+
+    # Flat anything that sold in the last year
+    # TODO: Decide on what this logic should be!
+    matched_addresses["sold_recently"] = (
+        matched_addresses["date_of_transfer"] >= pd.Timestamp.now() - pd.DateOffset(years=1)
+    )
+
+    # Drop anything that sold recently
+    matched_addresses = matched_addresses[~matched_addresses["sold_recently"]]
+
     # shared_freehold_match = pd.DataFrame(shared_freehold_match)
     # Strore these files
     # freehold_matching_lookup.to_excel("freehold_matching_lookup.xlsx")
@@ -785,11 +818,30 @@ def app():
         matched_addresses["Company Registration No. (1)"].isin(investment_50m["Company Registration No. (1)"])
     ]
 
+    # Merge on the owner
+    al_rayan = investment_50m_properties[
+        investment_50m_properties["Proprietor Name (1)"].str.contains("AL RAYAN BANK PLC")]
+
     portfolio_epc_data_50m = properties[properties["UPRN"].isin(investment_50m_properties["UPRN"])]
     portfolio_epc_data_20m = properties[properties["UPRN"].isin(investment_20m_properties["UPRN"])]
 
-    investment_20m_properties.to_excel("investment_20m_properties 28th May.xlsx", index=False)
-    investment_50m_properties.to_excel("investment_50m_properties 28th May.xlsx", index=False)
+    # investment_20m_properties.to_excel("investment_20m_properties 28th July.xlsx", index=False)
+    # investment_50m_properties.to_excel("investment_50m_properties 28th July.xlsx", index=False)
+
+    z = pd.read_excel("investment_50m_properties 28th May.xlsx")
+    new = investment_50m_properties[~investment_50m_properties["UPRN"].isin(z["UPRN"])]
+    new_al_rayan = new[
+        new["Proprietor Name (1)"].str.contains("AL RAYAN BANK PLC")
+    ]
+    new_al_rayan = new_al_rayan.merge(
+        properties[["UPRN", "LODGEMENT_DATE"]],
+        how="left",
+        on="UPRN"
+    ).merge(
+        company_ownership[["Title Number", "Date Proprietor Added"]],
+        how="left",
+        on="Title Number",
+    )
 
     # Store the EPC data
     portfolio_epc_data_50m.to_excel("portfolio_epc_data_50m 28th May.xlsx", index=False)

From bd610c88813cbbb2e2d4534d7352302f4602f522 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 29 Jul 2024 14:29:07 +0100
Subject: [PATCH 030/182] working on electrical consumption estimates

---
 backend/Property.py                         |  13 ++
 backend/app/plan/router.py                  |  30 +++-
 etl/bill_savings/EnergyConsumptionModel.py  |  21 ++-
 etl/bill_savings/data_combining.py          |   2 +-
 etl/customers/goldman/property_ownership.py | 170 ++++++++++++--------
 recommendations/HeatingRecommender.py       |  15 +-
 6 files changed, 157 insertions(+), 94 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 6365bb0b..2098a2a4 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -1321,3 +1321,16 @@ class Property:
                 self.hot_water_energy_source = self.heating_energy_source
             else:
                 raise Exception("Investiage me")
+
+    def is_ashp_valid(self, exclusions):
+
+        if "air_source_heat_pump" in self.non_invasive_recommendations:
+            return True
+
+        if "air_source_heat_pump" in exclusions:
+            return False
+
+        suitable_property_type = self.data["property-type"] in ["House", "Bungalow"]
+        has_air_source_heat_pump = self.main_heating["has_air_source_heat_pump"]
+
+        return suitable_property_type and not has_air_source_heat_pump
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 4796cd9f..e21226fa 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -520,11 +520,37 @@ async def trigger_plan(body: PlanTriggerRequest):
                 # rating to the target SAP rating (ie 69C)
                 # TODO: Update this!
                 energy_consumption = energy_consumption_client.estimate_new_consumption(
-                    current_rating=p.data["current-energy-rating"],
-                    target_rating="C",
+                    current_energy_efficiency=p.data["current-energy-efficiency"],
+                    target_efficiency="69",
                     current_consumption=p.current_adjusted_energy
                 )
 
+                def convert_to_electric_consumption(self, p, energy_consumption, assumed_ashp_efficiency, exclusions):
+                    if (p.main_fuel["fuel_type"] == "electricity") or (
+                        p.main_fuel["fuel_type"] == "mains gas" and not p.is_ashp_valid(exclusions=exclusions)
+                    ):
+                        # if the primary fuel is already electricity, we don't need to adjust the consumpion
+                        return energy_consumption
+
+                    if p.main_fuel["fuel_type"] == "mains gas" and p.is_ashp_valid(exclusions=exclusions):
+                        # if the primary fuel is gas, we need to adjust the consumption to reflect the expected
+                        # efficiency of an ASHP.
+                        # We should adjust the energy consumption to reflect the 200-400% efficiency of an ASHP with
+                        # electrified heating, so that the solar panel can cover heating generation.
+                        heating_consumption = p.energy_consumption_estimates["adjusted"]["heating"]
+                        hot_water_consumption = p.energy_consumption_estimates["adjusted"]["hot_water"]
+
+                        systems_consumptions = heating_consumption + hot_water_consumption
+
+                        adjusted_consumption = systems_consumptions / (assumed_ashp_efficiency / 100)
+                        electric_consumption = (
+                            adjusted_consumption +
+                            p.energy_consumption_estimates["adjusted"]["lighting"] +
+                            p.energy_consumption_estimates["adjusted"]["appliances"]
+                        )
+
+                        return electric_consumption
+
                 # TODO: Should energy_consumption to adjusted to just electricity requirement?
                 # We should align our calculation of required energy consumption with expectations around decarbonising
                 # heating and hot water, so worse case we should take just the electrical consumption of the property
diff --git a/etl/bill_savings/EnergyConsumptionModel.py b/etl/bill_savings/EnergyConsumptionModel.py
index dfb0e574..ff225073 100644
--- a/etl/bill_savings/EnergyConsumptionModel.py
+++ b/etl/bill_savings/EnergyConsumptionModel.py
@@ -507,31 +507,36 @@ class EnergyConsumptionModel:
         return prediction
 
     @staticmethod
-    def calculate_percentage_decrease(start_rating, end_rating, consumption_averages):
+    def calculate_percentage_decrease(start_efficiency, end_efficiency, consumption_averages):
 
         start_consumption = consumption_averages.loc[
-            consumption_averages["current-energy-rating"] == start_rating, "total_consumption"
+            consumption_averages["current-energy-efficiency"].astype(str) == str(start_efficiency), "total_consumption"
         ].values[0]
+
         end_consumption = consumption_averages.loc[
-            consumption_averages["current-energy-rating"] == end_rating, "total_consumption"
+            consumption_averages["current-energy-efficiency"].astype(str) == str(end_efficiency), "total_consumption"
         ].values[0]
 
         percentage_decrease = ((start_consumption - end_consumption) / start_consumption) * 100
+        # percentage_decrease cannot be nehative
+        if percentage_decrease < 0:
+            percentage_decrease = 0
         return percentage_decrease
 
-    def estimate_new_consumption(self, current_rating, target_rating, current_consumption):
+    def estimate_new_consumption(self, current_energy_efficiency, target_efficiency, current_consumption):
         """
         Given then consumption_averages dataset, which is produced as a result of the data_combining.py script,
         for the energy kwh models, this function will estimate the new consumption based on the current consumption,
         based on the expected reduction in consumption from the current rating to the target rating.
-        :param current_rating:
-        :param target_rating:
+        :param current_energy_efficiency:
+        :param target_efficiency:
         :param current_consumption:
-        :param df:
         :return:
         """
         percentage_decrease = self.calculate_percentage_decrease(
-            current_rating, target_rating, self.consumption_averages
+            start_efficiency=current_energy_efficiency,
+            end_efficiency=target_efficiency,
+            consumption_averages=self.consumption_averages
         )
         new_consumption = current_consumption * (1 - percentage_decrease / 100)
         return new_consumption
diff --git a/etl/bill_savings/data_combining.py b/etl/bill_savings/data_combining.py
index d3a8d679..dece3834 100644
--- a/etl/bill_savings/data_combining.py
+++ b/etl/bill_savings/data_combining.py
@@ -94,7 +94,7 @@ def app():
 
     # We also estimate the energy consumption reduction from this data, by band
     df["total_consumption"] = df["heating_kwh"] + df["hot_water_kwh"]
-    consumption_averages = df.groupby("current-energy-rating")["total_consumption"].mean().reset_index()
+    consumption_averages = df.groupby("current-energy-efficiency")["total_consumption"].mean().reset_index()
 
     # Save the consumption averages back to s3
     save_dataframe_to_s3_parquet(
diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py
index 1c828566..ebd72732 100644
--- a/etl/customers/goldman/property_ownership.py
+++ b/etl/customers/goldman/property_ownership.py
@@ -11,7 +11,10 @@ from utils.s3 import read_dataframe_from_s3_parquet
 # The mode EPC rating is D, so we associate the £238k valuation with an EPC D property
 # Therefore value_of_F * 1.15 = value_of_D * 1.03
 # Therefore value_of_F = value_of_D * 1.03/1.15 = 238k * (1.03/1.15) = 213165
-PROPERTY_VALUE_ESTIMATE = 213_165
+PROPERTY_VALUE_ESTIMATE = 200_000
+
+# UPRNs of properties we need
+MANUAL_EXCLUSIONS = []
 
 
 def aggregate_matches(matching_lookup, company_ownership, properties):
@@ -283,6 +286,36 @@ def filter_land_registry(properties):
     )
 
 
+def is_substring(x, match_string):
+    if pd.isnull(x):
+        return False
+    return x in match_string.lower()
+
+
+def house_number_match(paon, house_number):
+    # Firstly try and convert to numberic
+    try:
+        paon_numeric = int(paon)
+        house_number_numeric = int(house_number)
+        return paon_numeric == house_number_numeric
+    except Exception as e:  # noqa
+        # If we can't convert both to numeric, we do an equality
+
+        return paon == house_number
+
+
+def check_equalities(lr_filtered):
+    all_paon_equal = all(lr_filtered["paon"] == lr_filtered["paon"].values[0])
+    if pd.isnull(lr_filtered["saon"].values[0]):
+        all_saon_equal = all(pd.isnull(lr_filtered["saon"]))
+    else:
+        all_saon_equal = all(lr_filtered["saon"] == lr_filtered["saon"].values[0])
+
+    all_street_equal = all(lr_filtered["street"] == lr_filtered["street"].values[0])
+
+    return all_paon_equal, all_saon_equal, all_street_equal
+
+
 def app():
     """
     This script is for scoping property ownership for EPC F & G rated properties in Birmingam, for Goldman Sachs
@@ -292,8 +325,8 @@ def app():
     #       https://epc.opendatacommunities.org/domestic/search?address=&postcode=&local-authority=&constituency
     #       =&uprn=100031179243&from-month=1&from-year=2008&to-month=12&to-year=2024
     #       is actually listed in two local authorities causing us to think it's an EPC F & G property, but it's
-    #       it's actually EPC E. Need to handle this, probably by reading in all of the EPC data, concatenating together
-    #       and performing a singular filter for most recent EPC by UPRN
+    #       it's actually EPC E. Need to handle this, probably by reading in all of the EPC data, concatenating
+    #       together and performing a singular filter for most recent EPC by UPRN
     # paths = [
     #     "local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv",
     #     "local_data/all-domestic-certificates/domestic-E08000031-Wolverhampton/certificates.csv",
@@ -356,10 +389,6 @@ def app():
     # Take the newest UPRN
     properties = properties.sort_values("LODGEMENT_DATE", ascending=False).drop_duplicates("UPRN")
 
-    # TODO: Do we want to filter properties based on lodgement dates?
-    #       E.g. we might want to filter properties that have had a sale EPC lodged in the last x months, because
-    #       this could be indicative of a sale happening, and the land registry data may not have caught up yet
-
     # Remove entries where the address begins with the term "land adjoining", or other records that don't reference the
     # the property itself
     starting_terms = [
@@ -461,6 +490,8 @@ def app():
 
     # freehold_matching_lookup.to_excel("freehold_matching_lookup V2.xlsx")
     # leasehold_matching_lookup.to_excel("leasehold_matching_lookup V2.xlsx")
+    # freehold_matching_lookup = pd.read_excel("freehold_matching_lookup V2.xlsx")
+    # leasehold_matching_lookup = pd.read_excel("leasehold_matching_lookup V2.xlsx")
 
     # The approximate matches aren't very good
     freehold_matching_lookup = freehold_matching_lookup[freehold_matching_lookup["match_type"] == "exact"]
@@ -483,7 +514,9 @@ def app():
                 "ADDRESS1",
                 "CURRENT_ENERGY_EFFICIENCY",
                 "CURRENT_ENERGY_RATING",
-                "POSTCODE"
+                "POSTCODE",
+                "LODGEMENT_DATE",
+                "TRANSACTION_TYPE"
             ]
         ].rename(
             columns={
@@ -501,7 +534,7 @@ def app():
                 "Postcode",
                 "Company Registration No. (1)",
                 "Proprietor Name (1)",
-
+                "Date Proprietor Added",
             ]
         ],
         how="left", on="Title Number"
@@ -531,35 +564,6 @@ def app():
     land_registry["saon"] = land_registry["saon"].str.lower().str.strip()
     land_registry["date_of_transfer"] = pd.to_datetime(land_registry["date_of_transfer"])
 
-    def is_substring(x, match_string):
-
-        if pd.isnull(x):
-            return False
-
-        return x in match_string.lower()
-
-    def house_number_match(paon, house_number):
-        # Firstly try and convert to numberic
-        try:
-            paon_numeric = int(paon)
-            house_number_numeric = int(house_number)
-            return paon_numeric == house_number_numeric
-        except Exception as e:  # noqa
-            # If we can't convert both to numeric, we do an equality
-
-            return paon == house_number
-
-    def check_equalities(lr_filtered):
-        all_paon_equal = all(lr_filtered["paon"] == lr_filtered["paon"].values[0])
-        if pd.isnull(lr_filtered["saon"].values[0]):
-            all_saon_equal = all(pd.isnull(lr_filtered["saon"]))
-        else:
-            all_saon_equal = all(lr_filtered["saon"] == lr_filtered["saon"].values[0])
-
-        all_street_equal = all(lr_filtered["street"] == lr_filtered["street"].values[0])
-
-        return all_paon_equal, all_saon_equal, all_street_equal
-
     land_registry_matches = []
     for _, match in tqdm(matched_addresses.iterrows(), total=len(matched_addresses)):
 
@@ -779,13 +783,25 @@ def app():
     ).drop(columns=["uprn"])
 
     # Flat anything that sold in the last year
-    # TODO: Decide on what this logic should be!
     matched_addresses["sold_recently"] = (
         matched_addresses["date_of_transfer"] >= pd.Timestamp.now() - pd.DateOffset(years=1)
     )
 
-    # Drop anything that sold recently
-    matched_addresses = matched_addresses[~matched_addresses["sold_recently"]]
+    matched_addresses["sale_lodged_recently"] = (
+        (pd.to_datetime(matched_addresses["LODGEMENT_DATE"]) >= pd.Timestamp.now() - pd.DateOffset(months=12)) &
+        (matched_addresses["TRANSACTION_TYPE"].isin(["marketed sale", "non marketed sale"]))
+    )
+
+    # Drop rows on the booleans
+    matched_addresses = matched_addresses[
+        ~matched_addresses["sold_recently"] &
+        ~matched_addresses["sale_lodged_recently"]
+        ]
+
+    # Filter combined_matching_lookup accordingly
+    combined_matching_lookup = combined_matching_lookup[
+        combined_matching_lookup["UPRN"].isin(matched_addresses["UPRN"])
+    ]
 
     # shared_freehold_match = pd.DataFrame(shared_freehold_match)
     # Strore these files
@@ -807,45 +823,19 @@ def app():
         properties=properties
     )
 
-    investment_20m = combined_aggregate[combined_aggregate["cumulative_value"] <= 20_500_000]
     investment_50m = combined_aggregate[combined_aggregate["cumulative_value"] <= 51_000_000]
 
-    investment_20m_properties = matched_addresses[
-        matched_addresses["Company Registration No. (1)"].isin(investment_20m["Company Registration No. (1)"])
-    ]
-
     investment_50m_properties = matched_addresses[
         matched_addresses["Company Registration No. (1)"].isin(investment_50m["Company Registration No. (1)"])
     ]
 
-    # Merge on the owner
-    al_rayan = investment_50m_properties[
-        investment_50m_properties["Proprietor Name (1)"].str.contains("AL RAYAN BANK PLC")]
-
     portfolio_epc_data_50m = properties[properties["UPRN"].isin(investment_50m_properties["UPRN"])]
-    portfolio_epc_data_20m = properties[properties["UPRN"].isin(investment_20m_properties["UPRN"])]
 
-    # investment_20m_properties.to_excel("investment_20m_properties 28th July.xlsx", index=False)
+    # Storing data
     # investment_50m_properties.to_excel("investment_50m_properties 28th July.xlsx", index=False)
 
-    z = pd.read_excel("investment_50m_properties 28th May.xlsx")
-    new = investment_50m_properties[~investment_50m_properties["UPRN"].isin(z["UPRN"])]
-    new_al_rayan = new[
-        new["Proprietor Name (1)"].str.contains("AL RAYAN BANK PLC")
-    ]
-    new_al_rayan = new_al_rayan.merge(
-        properties[["UPRN", "LODGEMENT_DATE"]],
-        how="left",
-        on="UPRN"
-    ).merge(
-        company_ownership[["Title Number", "Date Proprietor Added"]],
-        how="left",
-        on="Title Number",
-    )
-
     # Store the EPC data
-    portfolio_epc_data_50m.to_excel("portfolio_epc_data_50m 28th May.xlsx", index=False)
-    portfolio_epc_data_20m.to_excel("portfolio_epc_data_20m 28th May.xlsx", index=False)
+    # portfolio_epc_data_50m.to_excel("portfolio_epc_data_50m 29th July.xlsx", index=False)
 
     # We check if any of these properties are in a conservation area
     valuations = pd.read_excel("property value.xlsx")
@@ -891,6 +881,48 @@ def company_aggregation():
     aggregation.to_excel("Company ownership aggregation.xlsx")
 
 
+def extract_price_info(text):
+    # Use regex to find the relevant price information
+    match = re.search(r'Estimated price\n\nLow£([\d,]+)k\n\n£([\d,]+)k\n\nHigh£([\d,]+)k', text)
+    if match:
+        low_price = int(match.group(1).replace(',', '')) * 1000
+        est_price = int(match.group(2).replace(',', '')) * 1000
+        high_price = int(match.group(3).replace(',', '')) * 1000
+
+        price_info = {
+            'Zoopla Valuation': est_price,
+            'Zoopla Lower Bound': low_price,
+            'Zoopla Upper Bound': high_price
+        }
+
+        return price_info
+
+    return None
+
+
+def get_valuations(portfolio_epc_data_50m):
+    # This gets blocked pretty quickly by Zoopla
+    import requests
+    import time
+    from tqdm import tqdm
+    valuation_data = []
+    for _, property_data in tqdm(portfolio_epc_data_50m.iterrows(), total=len(portfolio_epc_data_50m)):
+        uprn = property_data["UPRN"]
+        response = requests.get(
+            f"https://r.jina.ai/https://www.zoopla.co.uk/property/uprn/{uprn}/"
+        )
+
+        pricing = extract_price_info(response.text)
+        valuation_data.append(
+            {
+                "UPRN": uprn,
+                **pricing
+            }
+        )
+
+        time.sleep(2)
+
+
 def prepare_anonymised_data():
     investment_50m_properties = pd.read_excel("investment_50m_properties 28th May.xlsx", header=0)
     investment_epc_data = pd.read_excel("portfolio_epc_data_50m 28th May.xlsx", header=0)
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 07bac2cd..1d409be6 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -116,7 +116,7 @@ class HeatingRecommender:
         # In the future, we'll allow overrides, so that non-intrusive surveys can contradict these conditions
         # and either allow or prevent the recommendation of an air source heat pump
 
-        if self.is_ashp_valid(exclusions=exclusions):
+        if self.property.is_ashp_valid(exclusions=exclusions):
             self.recommend_air_source_heat_pump(
                 phase=phase, has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations
             )
@@ -186,19 +186,6 @@ class HeatingRecommender:
             description = ("Replace the existing boiler and cylinder without a thermostat with a new electric combi "
                            "boiler")
 
-    def is_ashp_valid(self, exclusions):
-
-        if "air_source_heat_pump" in self.property.non_invasive_recommendations:
-            return True
-
-        if "air_source_heat_pump" in exclusions:
-            return False
-
-        suitable_property_type = self.property.data["property-type"] in ["House", "Bungalow"]
-        has_air_source_heat_pump = self.property.main_heating["has_air_source_heat_pump"]
-
-        return suitable_property_type and not has_air_source_heat_pump
-
     def recommend_air_source_heat_pump(self, phase, has_cavity_or_loft_recommendations, _return=False):
         """
         This method will implement the recommendation for an air source heat pump

From 87de0ce3c9e62d03898fe4374ee3b9ba55b90e3d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 29 Jul 2024 14:43:43 +0100
Subject: [PATCH 031/182] implemented electric consumption

---
 backend/Property.py        | 46 ++++++++++++++++++++++++++++
 backend/app/plan/router.py | 63 ++++++++++----------------------------
 2 files changed, 62 insertions(+), 47 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 2098a2a4..f5123b96 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -1334,3 +1334,49 @@ class Property:
         has_air_source_heat_pump = self.main_heating["has_air_source_heat_pump"]
 
         return suitable_property_type and not has_air_source_heat_pump
+
+    def estimate_electrical_consumption(self, assumed_ashp_efficiency, exclusions):
+        """
+        Given a property, this method estimates the electrical consumption of the property, based on the energy
+        consumption, the assumed efficiency of an ASHP and the exclusions.
+
+        What we're trying to do here is size up the future electricicty demand of the property, assuming that the
+        home is eligible for an ASHP. If the property is not eligible for an ASHP, we don't need to adjust the
+        consumption.
+
+        This figure is used to size up solar panels, so they can cover heat generation, even if the property
+        today doesn't generate its heat from electricity
+
+        :param assumed_ashp_efficiency:
+        :param exclusions:
+        :return:
+        """
+
+        exclusions = [] if exclusions is None else exclusions
+
+        if (self.main_fuel["fuel_type"] == "electricity") or (
+            self.main_fuel["fuel_type"] == "mains gas" and not self.is_ashp_valid(exclusions=exclusions)
+        ):
+            # if the primary fuel is already electricity, we don't need to adjust the consumpion
+            return self.current_adjusted_energy
+
+        if self.main_fuel["fuel_type"] == "mains gas" and self.is_ashp_valid(exclusions=exclusions):
+            # if the primary fuel is gas, we need to adjust the consumption to reflect the expected
+            # efficiency of an ASHP.
+            # We should adjust the energy consumption to reflect the 200-400% efficiency of an ASHP with
+            # electrified heating, so that the solar panel can cover heating generation.
+            heating_consumption = self.energy_consumption_estimates["adjusted"]["heating"]
+            hot_water_consumption = self.energy_consumption_estimates["adjusted"]["hot_water"]
+
+            systems_consumptions = heating_consumption + hot_water_consumption
+
+            adjusted_consumption = systems_consumptions / (assumed_ashp_efficiency / 100)
+            electric_consumption = (
+                adjusted_consumption +
+                self.energy_consumption_estimates["adjusted"]["lighting"] +
+                self.energy_consumption_estimates["adjusted"]["appliances"]
+            )
+
+            return electric_consumption
+
+        raise NotImplementedError("Have not implemented estimating electrical consumption for this fuel type")
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index e21226fa..68dcb916 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -437,9 +437,11 @@ async def trigger_plan(body: PlanTriggerRequest):
                 # We set the target rating to EPC C, which is the typical EPC rating we would expect the
                 # property to achieve post retrofit of just the fabric
                 "energy_consumption": energy_consumption_client.estimate_new_consumption(
-                    current_rating=p.data["current-energy-rating"],
-                    target_rating="C",
-                    current_consumption=p.current_adjusted_energy
+                    current_energy_efficiency=p.data["current-energy-efficiency"],
+                    target_efficiency="C",
+                    current_consumption=p.estimate_electrical_consumption(
+                        assumed_ashp_efficiency=300, exclusions=body.exclusions
+                    )
                 ),
                 "property_id": p.id,
                 "uprn": p.uprn
@@ -514,56 +516,23 @@ async def trigger_plan(body: PlanTriggerRequest):
                 # TODO: Complete me! - we probably won't do this for individual flats - IGNORE FLATS FROM THIS WITHOUT
                 #       BUILDING IDS
 
-                # if the property is already very close to an EPC C, we don't adjust the energy consumption based on
-                # expected movement to EPC C.
-                # To extend this, what we could do is adjust the based on the expected movement from the current SAP
-                # rating to the target SAP rating (ie 69C)
-                # TODO: Update this!
-                energy_consumption = energy_consumption_client.estimate_new_consumption(
-                    current_energy_efficiency=p.data["current-energy-efficiency"],
-                    target_efficiency="69",
-                    current_consumption=p.current_adjusted_energy
+                electric_consumption = p.estimate_electrical_consumption(
+                    assumed_ashp_efficiency=300, exclusions=body.exclusions
                 )
 
-                def convert_to_electric_consumption(self, p, energy_consumption, assumed_ashp_efficiency, exclusions):
-                    if (p.main_fuel["fuel_type"] == "electricity") or (
-                        p.main_fuel["fuel_type"] == "mains gas" and not p.is_ashp_valid(exclusions=exclusions)
-                    ):
-                        # if the primary fuel is already electricity, we don't need to adjust the consumpion
-                        return energy_consumption
-
-                    if p.main_fuel["fuel_type"] == "mains gas" and p.is_ashp_valid(exclusions=exclusions):
-                        # if the primary fuel is gas, we need to adjust the consumption to reflect the expected
-                        # efficiency of an ASHP.
-                        # We should adjust the energy consumption to reflect the 200-400% efficiency of an ASHP with
-                        # electrified heating, so that the solar panel can cover heating generation.
-                        heating_consumption = p.energy_consumption_estimates["adjusted"]["heating"]
-                        hot_water_consumption = p.energy_consumption_estimates["adjusted"]["hot_water"]
-
-                        systems_consumptions = heating_consumption + hot_water_consumption
-
-                        adjusted_consumption = systems_consumptions / (assumed_ashp_efficiency / 100)
-                        electric_consumption = (
-                            adjusted_consumption +
-                            p.energy_consumption_estimates["adjusted"]["lighting"] +
-                            p.energy_consumption_estimates["adjusted"]["appliances"]
-                        )
-
-                        return electric_consumption
-
-                # TODO: Should energy_consumption to adjusted to just electricity requirement?
-                # We should align our calculation of required energy consumption with expectations around decarbonising
-                # heating and hot water, so worse case we should take just the electrical consumption of the property
-                # if the property is current using gas for heating and hot water, then we should adjust the kwh demand
-                # to reflect the 200-400% efficiency of an ASHP with electrified heating, so that the solar panel can
-                # cover heating generation. While
-                # If the main fuel is electricity (not community) then we don't need to change the kwh demand, if it's
-                # gas we should adjust on the suitability of an ashp!
+                # We now decrease this, based on the expected energy efficiency of the property post retrofit to a C,
+                # which is the common level we would expect the property to reach when treating the fabric of the
+                # home
+                electric_consumption = energy_consumption_client.estimate_new_consumption(
+                    current_energy_efficiency=p.data["current-energy-efficiency"],
+                    target_efficiency="69",
+                    current_consumption=electric_consumption
+                )
 
                 solar_performance = solar_api_client.get(
                     longitude=p.spatial["longitude"],
                     latitude=p.spatial["latitude"],
-                    energy_consumption=energy_consumption,
+                    energy_consumption=electric_consumption,
                     is_building=False,
                     session=session,
                     uprn=p.uprn

From eec453670ceb3105d1d041f737db2125518bd27c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 29 Jul 2024 14:56:17 +0100
Subject: [PATCH 032/182] tweaked solar ranking algorithm

---
 backend/apis/GoogleSolarApi.py | 8 +++++++-
 backend/app/assumptions.py     | 3 +++
 backend/app/plan/router.py     | 7 ++++---
 3 files changed, 14 insertions(+), 4 deletions(-)
 create mode 100644 backend/app/assumptions.py

diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py
index 8d08b083..074a9ece 100644
--- a/backend/apis/GoogleSolarApi.py
+++ b/backend/apis/GoogleSolarApi.py
@@ -311,12 +311,19 @@ class GoogleSolarApi:
         )
 
         # Now that we know the lifetime cnsumption of ac kwh, we can estimate the roi
+        # Key things we estimate:
+        # - generation_value: this is the gbp value of the electricity generated
+        # - roi: the return on investment, calcualated as generation_value / total_cost
+        # - surplus: this is the amount of additional energy generated, and therefore how much will be exported
+        # - surplus_value: the value of the surplus energy - this feeds into generation_value, when relevant
+        # - expected_payback_years: the number of years it will take to pay back the initial investment
         lifetime_energy_consumption = energy_consumption * self.installation_life_span
         roi_results = []
         for _, panel_config in panel_performance.iterrows():
             lifetime_ac_kwh = panel_config["lifetime_ac_kwh"]
 
             surplus = 0
+            generation_deficit = 0
             if lifetime_ac_kwh < lifetime_energy_consumption:
                 # We estimate the amount of electricity generated, based on the price cap
                 generation_value = lifetime_ac_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP
@@ -329,7 +336,6 @@ class GoogleSolarApi:
                 surplus_value = surplus * AnnualBillSavings.ELECTRICITY_EXPORT_PAYMENT
                 generation_value = lifetime_energy_consumption * AnnualBillSavings.ELECTRICITY_PRICE_CAP
                 roi = (generation_value + surplus_value) / panel_config["total_cost"]
-                generation_deficit = surplus_value
 
             # Calculate expected payback years
             if generation_value > 0:
diff --git a/backend/app/assumptions.py b/backend/app/assumptions.py
new file mode 100644
index 00000000..13bd913f
--- /dev/null
+++ b/backend/app/assumptions.py
@@ -0,0 +1,3 @@
+# Assumes that the average efficiency of an air source heat pump is 300%, taking the median of the 200-400% range,
+# which is often quoted as a sensible efficiency range for air source heat pumps.
+AVERAGE_ASHP_EFFICIENCY = 300
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 68dcb916..c1e0b981 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -10,6 +10,7 @@ from sqlalchemy.exc import IntegrityError, OperationalError
 from sqlalchemy.orm import sessionmaker
 from starlette.responses import Response
 
+import backend.app.assumptions as assumptions
 from backend.app.config import get_settings, get_prediction_buckets
 from backend.app.db.connection import db_engine
 from backend.app.db.functions.materials_functions import get_materials
@@ -440,7 +441,7 @@ async def trigger_plan(body: PlanTriggerRequest):
                     current_energy_efficiency=p.data["current-energy-efficiency"],
                     target_efficiency="C",
                     current_consumption=p.estimate_electrical_consumption(
-                        assumed_ashp_efficiency=300, exclusions=body.exclusions
+                        assumed_ashp_efficiency=assumptions.AVERAGE_ASHP_EFFICIENCY, exclusions=body.exclusions
                     )
                 ),
                 "property_id": p.id,
@@ -517,7 +518,7 @@ async def trigger_plan(body: PlanTriggerRequest):
                 #       BUILDING IDS
 
                 electric_consumption = p.estimate_electrical_consumption(
-                    assumed_ashp_efficiency=300, exclusions=body.exclusions
+                    assumed_ashp_efficiency=assumptions.AVERAGE_ASHP_EFFICIENCY, exclusions=body.exclusions
                 )
 
                 # We now decrease this, based on the expected energy efficiency of the property post retrofit to a C,
@@ -529,7 +530,7 @@ async def trigger_plan(body: PlanTriggerRequest):
                     current_consumption=electric_consumption
                 )
 
-                solar_performance = solar_api_client.get(
+                solar_api_client.get(
                     longitude=p.spatial["longitude"],
                     latitude=p.spatial["latitude"],
                     energy_consumption=electric_consumption,

From 754d46073e5715449c2f251db44c54bbc74d034a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 29 Jul 2024 15:01:39 +0100
Subject: [PATCH 033/182] added db save

---
 backend/app/plan/router.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index c1e0b981..221075f9 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -539,6 +539,16 @@ async def trigger_plan(body: PlanTriggerRequest):
                     uprn=p.uprn
                 )
 
+                # Store the data in the database
+                # TODO: Rather than just doing a straight insert, we should overwrite what's already there if it exists
+                solar_api_client.save_to_db(
+                    session=session,
+                    uprns_to_location=[
+                        {"uprn": p.uprn, "longitude": p.spatial["longitude"], "latitude": p.spatial["latitude"]}
+                    ],
+                    scenario_type="unit"
+                )
+
                 # TODO: Insert the pitched roof area into the property class as we store the solar performance
                 #       in the property class
             print("Implement me")

From b85fde1b21742bb5edc5e0c5c1f678d3502e2602 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 29 Jul 2024 15:16:48 +0100
Subject: [PATCH 034/182] implemented unit level solar api

---
 backend/Property.py                       | 84 ++++++++---------------
 backend/app/plan/router.py                | 74 +++++++++++---------
 recommendations/SolarPvRecommendations.py | 19 +----
 3 files changed, 71 insertions(+), 106 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index f5123b96..f15a0d7b 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -159,7 +159,7 @@ class Property:
         self.floor_height = epc_record.prepared_epc.get("floor_height")
         self.insulation_wall_area = None
         self.floor_area = epc_record.prepared_epc.get("total_floor_area")
-        self.pitched_roof_area = None
+        self.roof_area = None
         self.insulation_floor_area = None
         self.number_lighting_outlets = epc_record.prepared_epc.get(
             "fixed_lighting_outlets_count"
@@ -604,18 +604,12 @@ class Property:
     def get_components(
         self,
         cleaned,
-        photo_supply_lookup,
-        floor_area_decile_thresholds,
         energy_consumption_client
     ):
         """
         Given the cleaning that has been performed, we'll use this to identify the property
         components, from roof to walls to windows, heating and hot water
         :param cleaned: This is the dictionary of components found in cleaner.cleaned
-        :param photo_supply_lookup: This is the lookup table for the photo supply, used to estimate the percentage
-                                    of the roof that is suitable for solar panels
-        :param floor_area_decile_thresholds: This is the decile thresholds for the floor area, used in estimating the
-                                             solar pv roof area
         :param energy_consumption_client: Contains the heating and hot water kwh models - used to predict current
                                         energy annual consumption in kWh
         :return:
@@ -680,20 +674,21 @@ class Property:
         self.set_floor_type()
         self.set_floor_level()
         self.set_windows_count()
-        self.set_solar_panel_area(
-            photo_supply_lookup=photo_supply_lookup,
-            floor_area_decile_thresholds=floor_area_decile_thresholds,
-        )
         self.set_energy_source()
         self.find_energy_sources()
         self.set_current_energy_bill(energy_consumption_client)
 
-    def set_solar_panel_configuration(self, solar_panel_configuration):
+    def set_solar_panel_configuration(
+        self, solar_panel_configuration, roof_area
+    ):
         """
         This funtion inserts the solar panel configuration into the property object
         """
         self.solar_panel_configuration = solar_panel_configuration
 
+        # We also set the roof area
+        self.roof_area = roof_area
+
     def set_current_energy_bill(self, energy_consumption_client):
         """
         Given what we know about the property now, estimates the current energy consumption using the UCL paper
@@ -1079,9 +1074,9 @@ class Property:
             if condition_data["main_dwelling_ground_floor_area"] is not None \
             else self.floor_area / self.number_of_floors
 
-        self.pitched_roof_area = esimtate_pitched_roof_area(
-            floor_area=self.insulation_floor_area, floor_height=self.floor_height
-        )
+        # self.pitched_roof_area = esimtate_pitched_roof_area(
+        #     floor_area=self.insulation_floor_area, floor_height=self.floor_height
+        # )
 
     def set_floor_level(self):
         self.floor_level = (
@@ -1195,48 +1190,6 @@ class Property:
             if condition_data["windows_area"] is not None \
             else None
 
-    def set_solar_panel_area(self, photo_supply_lookup, floor_area_decile_thresholds):
-        """
-        Sets the approximate area of the solar panels
-        :return:
-        """
-
-        if (self.insulation_floor_area is None) and (self.pitched_roof_area is None):
-            raise ValueError(
-                "Need to set insulation floor area and pitched roof area before setting solar pv roof area"
-            )
-
-        photo_supply_matched = SolarPhotoSupply.filter_photo_supply_lookup(
-            photo_supply_lookup=photo_supply_lookup,
-            floor_area_decile_thresholds=floor_area_decile_thresholds,
-            tenure=self.data["tenure"],
-            built_form=self.data["built-form"],
-            property_type=self.data["property-type"],
-            construction_age_band=self.construction_age_band,
-            is_flat=self.roof["is_flat"],
-            is_pitched=self.roof["is_pitched"],
-            is_roof_room=self.roof["is_roof_room"],
-            floor_area=self.floor_area,
-        )
-
-        percentage_of_roof = photo_supply_matched["photo_supply_median"].mean()
-        percentage_of_roof = percentage_of_roof / 100
-
-        self.solar_pv_percentage = percentage_of_roof
-
-    def get_solar_pv_roof_area(self, percentage_of_roof):
-        """
-        Given a percentage of the roof, this method will return the estimated area of the solar panels
-        :param percentage_of_roof:
-        :return:
-        """
-
-        return (
-            self.insulation_floor_area * percentage_of_roof
-            if self.roof["is_flat"]
-            else self.pitched_roof_area * percentage_of_roof
-        )
-
     def set_energy_source(self):
         """
         This method sets the energy source of the property, based on the mains gas flag and energy tariff.
@@ -1335,6 +1288,23 @@ class Property:
 
         return suitable_property_type and not has_air_source_heat_pump
 
+    def is_solar_pv_valid(self):
+
+        # If the property is a flat but we are looking at building solar potential, we can include this
+        if (self.building_id is not None) and (self.solar_panel_configuration is not None):
+            return True
+
+        is_valid_property_type = self.data["property-type"] in ["House", "Bungalow", "Maisonette"]
+        is_valid_roof_type = (
+            self.roof["is_flat"] or self.roof["is_pitched"] or self.roof["is_roof_room"]
+        )
+        # If there is no existing solar PV, the photo-supply field will be None or a missing value
+        has_no_existing_solar_pv = self.data["photo-supply"] in [
+            None, 0, self.DATA_ANOMALY_MATCHES
+        ]
+
+        return is_valid_property_type and is_valid_roof_type and has_no_existing_solar_pv
+
     def estimate_electrical_consumption(self, assumed_ashp_efficiency, exclusions):
         """
         Given a property, this method estimates the electrical consumption of the property, based on the energy
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 221075f9..563134ea 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -408,7 +408,6 @@ async def trigger_plan(body: PlanTriggerRequest):
         uprn_filenames = read_dataframe_from_s3_parquet(
             bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet"
         )
-        photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket=get_settings().DATA_BUCKET)
         solar_api_client = GoogleSolarApi(api_key=get_settings().GOOGLE_SOLAR_API_KEY)
 
         dataset_version = "2024-07-08"
@@ -425,10 +424,10 @@ async def trigger_plan(body: PlanTriggerRequest):
 
         logger.info("Getting spatial data")
         for p in input_properties:
-            p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds, energy_consumption_client)
+            p.get_components(cleaned=cleaned, energy_consumption_client=energy_consumption_client)
             p.get_spatial_data(uprn_filenames)
 
-        # TODO: Handle the case of modelling some units as buildings and some as properties individually
+        # TODO: Tidy this up
         building_ids = [
             {
                 "building_id": p.building_id,
@@ -439,7 +438,7 @@ async def trigger_plan(body: PlanTriggerRequest):
                 # property to achieve post retrofit of just the fabric
                 "energy_consumption": energy_consumption_client.estimate_new_consumption(
                     current_energy_efficiency=p.data["current-energy-efficiency"],
-                    target_efficiency="C",
+                    target_efficiency="69",
                     current_consumption=p.estimate_electrical_consumption(
                         assumed_ashp_efficiency=assumptions.AVERAGE_ASHP_EFFICIENCY, exclusions=body.exclusions
                     )
@@ -448,6 +447,24 @@ async def trigger_plan(body: PlanTriggerRequest):
                 "uprn": p.uprn
             } for p in input_properties if p.building_id is not None
         ]
+        individual_units = [
+            {
+                "longitude": p.spatial["longitude"],
+                "latitude": p.spatial["latitude"],
+                # Energy consumption is adjusted for the property's expected post retrofit state
+                # We set the target rating to EPC C, which is the typical EPC rating we would expect the
+                # property to achieve post retrofit of just the fabric
+                "energy_consumption": energy_consumption_client.estimate_new_consumption(
+                    current_energy_efficiency=p.data["current-energy-efficiency"],
+                    target_efficiency="69",
+                    current_consumption=p.estimate_electrical_consumption(
+                        assumed_ashp_efficiency=assumptions.AVERAGE_ASHP_EFFICIENCY, exclusions=body.exclusions
+                    ),
+                ),
+                "property_id": p.id,
+                "uprn": p.uprn
+            } for p in input_properties if p.building_id is None
+        ]
         if building_ids:
             # Find the unique longitude and latitude pairs for each building id
             unique_coordinates = {}
@@ -511,32 +528,21 @@ async def trigger_plan(body: PlanTriggerRequest):
                         )
                         p.set_solar_panel_configuration(unit_solar_panel_configuration)
 
-        else:
+        if individual_units:
             # Model the solar potential at the property level
-            for p in input_properties:
-                # TODO: Complete me! - we probably won't do this for individual flats - IGNORE FLATS FROM THIS WITHOUT
-                #       BUILDING IDS
-
-                electric_consumption = p.estimate_electrical_consumption(
-                    assumed_ashp_efficiency=assumptions.AVERAGE_ASHP_EFFICIENCY, exclusions=body.exclusions
-                )
-
-                # We now decrease this, based on the expected energy efficiency of the property post retrofit to a C,
-                # which is the common level we would expect the property to reach when treating the fabric of the
-                # home
-                electric_consumption = energy_consumption_client.estimate_new_consumption(
-                    current_energy_efficiency=p.data["current-energy-efficiency"],
-                    target_efficiency="69",
-                    current_consumption=electric_consumption
-                )
+            for unit in individual_units:
+                property_instance = [p for p in input_properties if p.id == unit["property_id"]][0]
+                # At this level, we check if the property is suitable for solar and if now, skip
+                if not property_instance.is_solar_pv_valid():
+                    continue
 
                 solar_api_client.get(
-                    longitude=p.spatial["longitude"],
-                    latitude=p.spatial["latitude"],
-                    energy_consumption=electric_consumption,
+                    longitude=unit["longitude"],
+                    latitude=unit["latitude"],
+                    energy_consumption=unit["energy_consumption"],
                     is_building=False,
                     session=session,
-                    uprn=p.uprn
+                    uprn=unit["uprn"]
                 )
 
                 # Store the data in the database
@@ -544,16 +550,22 @@ async def trigger_plan(body: PlanTriggerRequest):
                 solar_api_client.save_to_db(
                     session=session,
                     uprns_to_location=[
-                        {"uprn": p.uprn, "longitude": p.spatial["longitude"], "latitude": p.spatial["latitude"]}
+                        {
+                            "uprn": property_instance.uprn,
+                            "longitude": property_instance.spatial["longitude"],
+                            "latitude": property_instance.spatial["latitude"]
+                        }
                     ],
                     scenario_type="unit"
                 )
 
-                # TODO: Insert the pitched roof area into the property class as we store the solar performance
-                #       in the property class
-            print("Implement me")
-
-            # TODO: We can set the pitched roof area based on the results of the solar api!
+                property_instance.set_solar_panel_configuration(
+                    solar_panel_configuration={
+                        "insights_data": solar_api_client.insights_data,
+                        "panel_performance": solar_api_client.panel_performance
+                    },
+                    roof_area=solar_api_client.roof_area
+                )
 
         logger.info("Getting components and epc recommendations")
         recommendations = {}
diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index 276573ec..4eece985 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -78,23 +78,6 @@ class SolarPvRecommendations:
             }
         ]
 
-    def is_solar_pv_valid(self):
-
-        # If the property is a flat but we are looking at building solar potential, we can include this
-        if (self.property.building_id is not None) and (self.property.solar_panel_configuration is not None):
-            return True
-
-        is_valid_property_type = self.property.data["property-type"] in ["House", "Bungalow", "Maisonette"]
-        is_valid_roof_type = (
-            self.property.roof["is_flat"] or self.property.roof["is_pitched"] or self.property.roof["is_roof_room"]
-        )
-        # If there is no existing solar PV, the photo-supply field will be None or a missing value
-        has_no_existing_solar_pv = self.property.data["photo-supply"] in [
-            None, 0, self.property.DATA_ANOMALY_MATCHES
-        ]
-
-        return is_valid_property_type and is_valid_roof_type and has_no_existing_solar_pv
-
     def recommend_building_analysis(self, phase):
         """
         This recommendation approach handles the case of producing solar PV recommendations at the building level,
@@ -159,7 +142,7 @@ class SolarPvRecommendations:
         :return:
         """
 
-        if not self.is_solar_pv_valid():
+        if not self.property.is_solar_pv_valid():
             return
 
         # If we have a buiilding level analysis, we implement separate logic

From 53b2ca05b628f656855ccb119aa0d6079fa8284f Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 29 Jul 2024 15:31:19 +0100
Subject: [PATCH 035/182] inserting county

---
 backend/app/plan/router.py       | 4 ++++
 etl/xml_survey_extraction/app.py | 2 ++
 2 files changed, 6 insertions(+)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 563134ea..ced67bfe 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -241,6 +241,10 @@ def create_epc_records(epc_searcher: SearchEpc, energy_assessment: dict):
     epc = energy_assessment["epc"]
     energy_assessment_date = epc["inspection-date"].strftime("%Y-%m-%d")
 
+    # We insert county into the epc, since right now this isn't something that we pull out from the energy
+    # assessment
+    epc["county"] = epc_searcher.newest_epc["county"]
+
     # We check if the energy assessment is newer than the newest EPC
     if pd.to_datetime(energy_assessment_date) > pd.to_datetime(epc_searcher.newest_epc["inspection-date"]):
         # In this case, our energy assessment is newer than the EPCs available for this property
diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py
index c4f6091f..18f84ba2 100644
--- a/etl/xml_survey_extraction/app.py
+++ b/etl/xml_survey_extraction/app.py
@@ -29,6 +29,8 @@ def main():
     #      The data is stored in a folder called {surveyors}/{project_code}/{uprn}
     #       We'll need to get the uprn from the folder name, which we can do with EpcSearcher class
 
+    # TODO: Pull out county, as in create_epc_records in the router, we pull it from the latest EPC, but we should
+    #       be able to deduce it from just the address
     #
     energy_assessments = list_files_and_subfolders_in_s3_folder(
         bucket_name=BUCKET, folder_name=f"{SURVEYORS}/{PROJECT_CODE}/"

From d45059e40df1b266f26ef2046380ba3128353fd2 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 29 Jul 2024 16:28:18 +0100
Subject: [PATCH 036/182] updating solar recommender

---
 backend/apis/GoogleSolarApi.py            |  36 +++++--
 backend/app/plan/router.py                |   3 +-
 recommendations/SolarPvRecommendations.py | 118 ++++++++--------------
 recommendations/WindowsRecommendations.py |   6 +-
 4 files changed, 73 insertions(+), 90 deletions(-)

diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py
index 074a9ece..c6bb3dde 100644
--- a/backend/apis/GoogleSolarApi.py
+++ b/backend/apis/GoogleSolarApi.py
@@ -8,6 +8,7 @@ import time
 from backend.app.db.functions.solar_functions import get_solar_data, store_batch_data
 from utils.logger import setup_logger
 from sklearn.preprocessing import MinMaxScaler
+from recommendations.Costs import Costs
 
 logger = setup_logger()
 
@@ -107,7 +108,14 @@ class GoogleSolarApi:
 
     @lru_cache(maxsize=128)
     def get(
-        self, longitude, latitude, energy_consumption, required_quality="MEDIUM", is_building=False, session=None,
+        self,
+        longitude,
+        latitude,
+        energy_consumption,
+        property_instance=None,
+        required_quality="MEDIUM",
+        is_building=False,
+        session=None,
         uprn=None
     ):
         """
@@ -116,6 +124,7 @@ class GoogleSolarApi:
         :param longitude: The longitude of the location.
         :param latitude: The latitude of the location.
         :param energy_consumption: The energy consumption of the building/unit associated to the longitude and latitude.
+        :param property_instance: The property instance associated to the longitude and latitude.
         :param required_quality: The required quality of the data (default is "MEDIUM").
         :param is_building: Whether the energy consumption is for a building or a unit.
         :param session: The database session to use for the query (default is None).
@@ -158,7 +167,9 @@ class GoogleSolarApi:
         self.roof_segment_indexes = [segment['segmentIndex'] for segment in self.roof_segments]
 
         # We now start finding the solar panel configurations
-        self.optimise_solar_configuration(energy_consumption=energy_consumption, is_building=is_building)
+        self.optimise_solar_configuration(
+            energy_consumption=energy_consumption, is_building=is_building, property_instance=property_instance
+        )
 
     def save_to_db(self, session, uprns_to_location, scenario_type):
         if self.insights_data is None:
@@ -178,7 +189,7 @@ class GoogleSolarApi:
                 "yearly_dc_energy",
                 "total_cost",
                 "panneled_roof_area",
-                "array_warrage",
+                "array_wattage",
                 "initial_ac_kwh_per_year",
                 "lifetime_ac_kwh",
                 "roi",
@@ -191,7 +202,7 @@ class GoogleSolarApi:
                 "yearly_dc_energy": "yearly_dc_kwh",
                 "total_cost": "cost",
                 "panneled_roof_area": "panelled_roof_area",
-                "array_warrage": "array_kwhp",
+                "array_wattage": "array_kwhp",
                 "initial_ac_kwh_per_year": "yearly_ac_kwh",
             }
         )
@@ -226,12 +237,14 @@ class GoogleSolarApi:
                 installation_life_span)) /
             (1 - efficiency_depreciation_factor))
 
-    def optimise_solar_configuration(self, energy_consumption, is_building=False):
+    def optimise_solar_configuration(self, energy_consumption, is_building=False, property_instance=None):
         """
         Optimise the solar panel configuration for the building.
         :return:
         """
 
+        cost_instance = Costs(property_instance=property_instance) if property_instance is not None else None
+
         # Remove any north facing roof segments
         panel_performance = []
         for config in self.insights_data["solarPotential"]["solarPanelConfigs"]:
@@ -246,7 +259,14 @@ class GoogleSolarApi:
                 wattage = segment["panelsCount"] * self.insights_data["solarPotential"]["panelCapacityWatts"]
                 generated_dc_energy = segment["yearlyEnergyDcKwh"]
                 ratio = generated_dc_energy / wattage
-                cost = MCS_SOLAR_PV_COST_DATA["average_cost_per_kwh"] * (wattage / 1000)
+
+                if cost_instance is None:
+                    cost = MCS_SOLAR_PV_COST_DATA["average_cost_per_kwh"] * (wattage / 1000)
+                else:
+                    cost = cost_instance.solar_pv(
+                        wattage=wattage, has_battery=False
+                    )["total"]
+                    
                 roi_summary.append(
                     {
                         "segmentIndex": segment["segmentIndex"],
@@ -274,7 +294,7 @@ class GoogleSolarApi:
                     "total_cost": total_cost,
                     "weighted_ratio": weighted_ratio,
                     "panneled_roof_area": roi_summary["panneled_roof_area"].sum(),
-                    "array_warrage": roi_summary["n_panels"].sum() * self.panel_wattage
+                    "array_wattage": roi_summary["n_panels"].sum() * self.panel_wattage
                 }
             )
 
@@ -290,7 +310,7 @@ class GoogleSolarApi:
 
         # Remove anything where the total ac energy is less than half of the array wattage
         panel_performance = panel_performance[
-            (panel_performance["initial_ac_kwh_per_year"] / panel_performance["array_warrage"]) >= 0.5
+            (panel_performance["initial_ac_kwh_per_year"] / panel_performance["array_wattage"]) >= 0.5
             ]
 
         # 2) Calculate the liftime solar energy production
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index ced67bfe..f5eba1de 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -546,7 +546,8 @@ async def trigger_plan(body: PlanTriggerRequest):
                     energy_consumption=unit["energy_consumption"],
                     is_building=False,
                     session=session,
-                    uprn=unit["uprn"]
+                    uprn=unit["uprn"],
+                    property_instance=property_instance
                 )
 
                 # Store the data in the database
diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index 4eece985..18a170e2 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -100,7 +100,7 @@ class SolarPvRecommendations:
             roof_coverage_percent = round(recommendation_config["panneled_roof_area"] / total_roof_area * 100)
             # Spread the cost to the individual units - adding a 20% contingency
             total_cost = recommendation_config["total_cost"] / n_units
-            kw = np.floor(recommendation_config["array_warrage"] / 100) / 10
+            kw = np.floor(recommendation_config["array_wattage"] / 100) / 10
             # Default to a weeks work for a team of 3 people doing 8 hour days
             labour_days = 5
             labour_hours = 3 * 8 * labour_days
@@ -150,84 +150,46 @@ class SolarPvRecommendations:
             self.recommend_building_analysis(phase)
             return
 
-        solar_pv_percentage = self.property.solar_pv_percentage
-        # We round up to the neaest 10%
-        solar_pv_percentage = np.ceil(solar_pv_percentage * 10) / 10
+        panel_performance = self.property.solar_panel_configuration["panel_performance"]
+        roof_area = self.property.roof_area
 
-        # For the solar recommendations, we produce the following scenarios:
-        # 1) Solar panels only, we present a high, medium and low coverage
-        # 2) With and without battery
-        roof_coverage_scenarios = [
-            solar_pv_percentage - 0.1, solar_pv_percentage,
-        ]
-        if solar_pv_percentage <= 0.4:
-            roof_coverage_scenarios.append(solar_pv_percentage + 0.1)
-        # We make sure we haven't gone too low or high - we allow no more than 60% coverage
-        roof_coverage_scenarios = [v for v in roof_coverage_scenarios if 0 <= v <= 0.6]
-        # If we only have two scenarios, we add a coverage scenario 10% less than the smallest
-        if len(roof_coverage_scenarios) == 2:
-            roof_coverage_scenarios.insert(0, roof_coverage_scenarios[0] - 0.1)
-        battery_scenarios = [False, True]
+        solar_configurations = panel_performance.head(3).reset_index(drop=True)
 
-        scenarios_with_wattage = []
-        for roof_coverage in roof_coverage_scenarios:
-            # We now have a property which is potentially suitable for solar PV
-            solar_pv_roof_area = self.property.get_solar_pv_roof_area(roof_coverage)
+        # We combine each of these configurations with estimates with and without a battery
+        for rank, recommendation_config in solar_configurations.iterrows():
+            roof_coverage_percent = round(recommendation_config["panneled_roof_area"] / roof_area * 100)
+            for has_battery in [False, True]:
+                cost_result = self.costs.solar_pv(
+                    wattage=recommendation_config["array_wattage"], has_battery=has_battery
+                )
+                kw = np.floor(recommendation_config["array_wattage"] / 100) / 10
+                if has_battery:
+                    description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) panel system on "
+                                   f"{round(roof_coverage_percent)}% the roof, with a battery storage system.")
+                else:
+                    description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) p"
+                                   f"anel system on {round(roof_coverage_percent)}% the roof.")
 
-            number_solar_panels = np.floor(solar_pv_roof_area / self.SOLAR_PANEL_AREA)
-            solar_panel_wattage = number_solar_panels * self.SOLAR_PANEL_WATTAGE
+                already_installed = "solar_pv" in self.property.already_installed
+                if already_installed:
+                    cost_result = override_costs(cost_result)
 
-            if solar_panel_wattage < self.MIN_SYSTEM_WATTAGE:
-                continue
-
-            solar_panel_wattage = np.clip(
-                a=solar_panel_wattage, a_min=self.MIN_SYSTEM_WATTAGE, a_max=self.MAX_SYSTEM_WATTAGE
-            )
-            scenarios_with_wattage.append((roof_coverage, solar_panel_wattage))
-
-        # We trim the scenarios, so that we don't have duplicate wattages
-        scenarios_with_wattage = self.trim_solar_wattage_options(scenarios_with_wattage)
-
-        # Produce the cross product of the scenarios
-        scenarios = [
-            (roof, wattage, battery) for roof, wattage in scenarios_with_wattage for battery in battery_scenarios
-        ]
-        # We deduce the wattage of the solar panels based on the roof coverage
-
-        for roof_coverage, solar_panel_wattage, has_battery in scenarios:
-            # We now have a property which is potentially suitable for solar PV
-            roof_coverage_percent = round(roof_coverage * 100)
-            # Given the wattage, we estimate the cost of the solar PV system. This is based on the MCS database
-            # of solar PV installations
-            cost_result = self.costs.solar_pv(wattage=solar_panel_wattage, has_battery=has_battery)
-            kw = np.floor(solar_panel_wattage / 100) / 10
-
-            if has_battery:
-                description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) panel system on "
-                               f"{round(roof_coverage_percent)}% the roof, with a battery storage system.")
-            else:
-                description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) p"
-                               f"anel system on {round(roof_coverage_percent)}% the roof.")
-
-            already_installed = "solar_pv" in self.property.already_installed
-            if already_installed:
-                cost_result = override_costs(cost_result)
-
-            self.recommendation.append(
-                {
-                    "phase": phase,
-                    "parts": [],
-                    "type": "solar_pv",
-                    "description": description,
-                    "starting_u_value": None,
-                    "new_u_value": None,
-                    "sap_points": None,
-                    "already_installed": already_installed,
-                    **cost_result,
-                    # This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we scale
-                    # back up here
-                    "photo_supply": 100 * roof_coverage,
-                    "has_battery": has_battery,
-                    "description_simulation": {"photo-supply": 100 * roof_coverage},
-                }
-            )
+                self.recommendation.append(
+                    {
+                        "phase": phase,
+                        "parts": [],
+                        "type": "solar_pv",
+                        "description": description,
+                        "starting_u_value": None,
+                        "new_u_value": None,
+                        "sap_points": None,
+                        "already_installed": already_installed,
+                        **cost_result,
+                        # This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we
+                        # scale
+                        # back up here
+                        "photo_supply": roof_coverage_percent,
+                        "has_battery": has_battery,
+                        "description_simulation": {"photo-supply": roof_coverage_percent},
+                    }
+                )
diff --git a/recommendations/WindowsRecommendations.py b/recommendations/WindowsRecommendations.py
index 9a30cd2e..3826a470 100644
--- a/recommendations/WindowsRecommendations.py
+++ b/recommendations/WindowsRecommendations.py
@@ -53,14 +53,14 @@ class WindowsRecommendations:
         if not number_of_windows:
             raise ValueError("Number of windows not specified")
 
-        if windows_area is not None:
-            raise Exception("We have windows area, we should use this data for our recommendations!!!")
-
         if self.property.windows["has_glazing"] & (
             self.property.windows["glazing_coverage"] == "full"
         ):
             return
 
+        if windows_area is not None:
+            raise Exception("We have windows area, we should use this data for our recommendations!!!")
+
         # We scale the number of windows based on the proportion of existing glazing
         if self.property.data["multi-glaze-proportion"] != "":
             n_windows_scalar = 1 - (

From cf3b603a367cac9a04e372cba834d5dd8944e5d1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 29 Jul 2024 18:07:16 +0100
Subject: [PATCH 037/182] filling constituency

---
 backend/Property.py                           | 20 ++++++++-----------
 .../functions/energy_assessment_functions.py  |  3 ++-
 backend/app/db/models/energy_assessments.py   |  6 +++---
 backend/app/plan/router.py                    |  1 +
 etl/xml_survey_extraction/app.py              |  4 +++-
 5 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index f15a0d7b..1586835a 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -728,14 +728,14 @@ class Property:
         energy_consumption_client.data = None
 
         heating_prediction = (
-            float(condition_data["space_heating_kwh"]) if condition_data["space_heating_kwh"]
+            float(condition_data["space_heating_kwh"]) if condition_data.get("space_heating_kwh") is not None
             else energy_consumption_client.score_new_data(
                 new_data=scoring_df, target="heating_kwh"
             )[0]
         )
 
         hot_water_prediction = (
-            float(condition_data["water_heating_kwh"]) if condition_data["water_heating_kwh"]
+            float(condition_data["water_heating_kwh"]) if condition_data.get("water_heating_kwh") is not None
             else energy_consumption_client.score_new_data(
                 new_data=scoring_df, target="hot_water_kwh"
             )[0]
@@ -1051,18 +1051,18 @@ class Property:
 
         # We can update the number of floors if we have this information in the condition data
         self.number_of_floors = int(self.energy_assessment_condition_data["number_of_floors"]) \
-            if condition_data["number_of_floors"] is not None \
+            if condition_data.get("number_of_floors") is not None \
             else self.number_of_floors
 
         self.perimeter = float(self.energy_assessment_condition_data["perimeter"]) \
-            if condition_data["perimeter"] is not None \
+            if condition_data.get("perimeter") is not None \
             else estimate_perimeter(
             floor_area=self.floor_area / self.number_of_floors,
             num_rooms=self.number_of_rooms / self.number_of_floors
         )
 
         self.insulation_wall_area = float(self.energy_assessment_condition_data["insulation_wall_area"]) \
-            if condition_data["insulation_wall_area"] is not None \
+            if condition_data.get("insulation_wall_area") is not None \
             else estimate_external_wall_area(
             num_floors=self.number_of_floors,
             floor_height=self.floor_height,
@@ -1071,13 +1071,9 @@ class Property:
         )
 
         self.insulation_floor_area = float(self.energy_assessment_condition_data["main_dwelling_ground_floor_area"]) \
-            if condition_data["main_dwelling_ground_floor_area"] is not None \
+            if condition_data.get("main_dwelling_ground_floor_area") is not None \
             else self.floor_area / self.number_of_floors
 
-        # self.pitched_roof_area = esimtate_pitched_roof_area(
-        #     floor_area=self.insulation_floor_area, floor_height=self.floor_height
-        # )
-
     def set_floor_level(self):
         self.floor_level = (
             FLOOR_LEVEL_MAP[self.data["floor-level"]]
@@ -1177,7 +1173,7 @@ class Property:
         condition_data = self.energy_assessment_condition_data.copy()
 
         self.number_of_windows = int(condition_data["number_of_windows"]) \
-            if condition_data["number_of_windows"] is not None \
+            if condition_data.get("number_of_windows") is not None \
             else estimate_windows(
             property_type=self.data["property-type"],
             built_form=self.data["built-form"],
@@ -1187,7 +1183,7 @@ class Property:
         )
 
         self.windows_area = float(condition_data["windows_area"]) \
-            if condition_data["windows_area"] is not None \
+            if condition_data.get("windows_area") is not None \
             else None
 
     def set_energy_source(self):
diff --git a/backend/app/db/functions/energy_assessment_functions.py b/backend/app/db/functions/energy_assessment_functions.py
index 45fb2b8b..b223d2f5 100644
--- a/backend/app/db/functions/energy_assessment_functions.py
+++ b/backend/app/db/functions/energy_assessment_functions.py
@@ -55,7 +55,8 @@ def get_latest_assessment_by_uprn(session: Session, uprn: int) -> Optional[Energ
         # Query the EnergyAssessment model, filter by uprn, order by inspection_date in descending order
         latest_assessment = session.query(EnergyAssessment).filter_by(uprn=uprn).order_by(
             desc(EnergyAssessment.inspection_date)).first()
-        return latest_assessment.to_dict() if latest_assessment else latest_assessment.empty_response()
+
+        return latest_assessment.to_dict() if latest_assessment else EnergyAssessment.empty_response()
     except Exception as e:
         print(f"An error occurred: {e}")
         return None
diff --git a/backend/app/db/models/energy_assessments.py b/backend/app/db/models/energy_assessments.py
index 2c3cc144..3928f9fa 100644
--- a/backend/app/db/models/energy_assessments.py
+++ b/backend/app/db/models/energy_assessments.py
@@ -1,4 +1,4 @@
-from sqlalchemy import Column, Integer, BigInteger, Text, Float, DateTime, Boolean
+from sqlalchemy import Column, Integer, BigInteger, Text, Float, DateTime, Boolean, Date
 from sqlalchemy.ext.declarative import declarative_base
 
 Base = declarative_base()
@@ -94,8 +94,8 @@ class EnergyAssessment(Base):
     hot_water_cost_potential = Column(Text, nullable=False)
     lighting_cost_current = Column(Text, nullable=False)
     energy_consumption_current = Column(Text, nullable=False)
-    lodgement_date = Column(DateTime(timezone=True), nullable=False)
-    lodgement_datetime = Column(DateTime(timezone=True), nullable=False)
+    lodgement_date = Column(Date, nullable=False)
+    lodgement_datetime = Column(DateTime(timezone=False), nullable=False)
     mainheat_description = Column(Text, nullable=False)
     floor_height = Column(Float, nullable=False)
     glazed_type = Column(Text, nullable=False)
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index f5eba1de..2d024f21 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -244,6 +244,7 @@ def create_epc_records(epc_searcher: SearchEpc, energy_assessment: dict):
     # We insert county into the epc, since right now this isn't something that we pull out from the energy
     # assessment
     epc["county"] = epc_searcher.newest_epc["county"]
+    epc["constituency"] = epc_searcher.newest_epc["constituency"]
 
     # We check if the energy assessment is newer than the newest EPC
     if pd.to_datetime(energy_assessment_date) > pd.to_datetime(epc_searcher.newest_epc["inspection-date"]):
diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py
index 18f84ba2..73551d09 100644
--- a/etl/xml_survey_extraction/app.py
+++ b/etl/xml_survey_extraction/app.py
@@ -30,7 +30,9 @@ def main():
     #       We'll need to get the uprn from the folder name, which we can do with EpcSearcher class
 
     # TODO: Pull out county, as in create_epc_records in the router, we pull it from the latest EPC, but we should
-    #       be able to deduce it from just the address
+    #       be able to deduce it from just the address. Same for constituency and constituency_label
+
+    # TODO: Store the project code in the database
     #
     energy_assessments = list_files_and_subfolders_in_s3_folder(
         bucket_name=BUCKET, folder_name=f"{SURVEYORS}/{PROJECT_CODE}/"

From 45d74b0d8c8d047ee1b5cf1872876d02f62e38f0 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 29 Jul 2024 18:13:37 +0100
Subject: [PATCH 038/182] added description simulation to fireplace
 recommendations

---
 recommendations/FireplaceRecommendations.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/recommendations/FireplaceRecommendations.py b/recommendations/FireplaceRecommendations.py
index 601a8eb0..9a9d7f76 100644
--- a/recommendations/FireplaceRecommendations.py
+++ b/recommendations/FireplaceRecommendations.py
@@ -50,5 +50,8 @@ class FireplaceRecommendations(Definitions):
                 # Take a very basic estimate of 6 hours, multipled by the number of open fireplaces to seal
                 "labour_hours": 6 * number_open_fireplaces,
                 "labour_days": 6 * number_open_fireplaces / 8,  # Assume 8 hour day
+                "description_simulation": {
+                    "number-open-fireplaces": 0
+                }
             }
         ]

From b73860c742414b51dd28a31a78456f6739608123 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 29 Jul 2024 18:51:15 +0100
Subject: [PATCH 039/182] debugging unit share of energy

---
 backend/app/plan/router.py                | 3 ++-
 recommendations/SolarPvRecommendations.py | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 2d024f21..811ec4a3 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -568,7 +568,8 @@ async def trigger_plan(body: PlanTriggerRequest):
                 property_instance.set_solar_panel_configuration(
                     solar_panel_configuration={
                         "insights_data": solar_api_client.insights_data,
-                        "panel_performance": solar_api_client.panel_performance
+                        "panel_performance": solar_api_client.panel_performance,
+                        "unit_share_of_energy": 1
                     },
                     roof_area=solar_api_client.roof_area
                 )
diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index 18a170e2..63519d02 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -190,6 +190,7 @@ class SolarPvRecommendations:
                         # back up here
                         "photo_supply": roof_coverage_percent,
                         "has_battery": has_battery,
+                        "initial_ac_kwh_per_year": recommendation_config["initial_ac_kwh_per_year"],
                         "description_simulation": {"photo-supply": roof_coverage_percent},
                     }
                 )

From 54d2dce05da102c09804c4010d9391f1b9d8e3e2 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 30 Jul 2024 10:23:22 +0100
Subject: [PATCH 040/182] Added check for room roof insulated

---
 recommendations/RoofRecommendations.py  | 27 ++++++++++++++++++++++---
 recommendations/recommendation_utils.py | 13 +++++++++++-
 2 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py
index a1f8c67c..615289de 100644
--- a/recommendations/RoofRecommendations.py
+++ b/recommendations/RoofRecommendations.py
@@ -87,6 +87,17 @@ class RoofRecommendations:
 
         return (self.insulation_thickness > self.MINIMUM_LOFT_ISULATION_MM) and self.property.roof["is_pitched"]
 
+    def is_room_roof_insulated(self):
+
+        """
+        Check if the room roof is already insulated
+        """
+
+        return (
+            self.property.roof["is_roof_room"] and
+            self.property.roof["insulation_thickness"] in ["average", "above_average"]
+        )
+
     def recommend(self, phase):
 
         if self.property.roof["has_dwelling_above"]:
@@ -105,8 +116,8 @@ class RoofRecommendations:
         if (self.insulation_thickness >= self.MINIMUM_FLAT_ROOF_ISULATION_MM) and self.property.roof["is_flat"]:
             return
 
-        if self.property.roof["is_roof_room"]:
-            raise ValueError("Update convert_thickness_to_numeric for room roof and implement")
+        if self.is_room_roof_insulated():
+            return
 
         # If we have a u-value already, need to implement this
         if u_value:
@@ -118,7 +129,17 @@ class RoofRecommendations:
                 return
             raise NotImplementedError("Implement me")
 
-        u_value = get_roof_u_value(**{**self.property.roof, "age_band": self.property.age_band})
+        u_value = get_roof_u_value(
+            insulation_thickness=self.property.roof["insulation_thickness"],
+            has_dwelling_above=self.property.roof["has_dwelling_above"],
+            is_loft=self.property.roof["is_loft"],
+            is_roof_room=self.property.roof["is_roof_room"],
+            is_thatched=self.property.roof["is_thatched"],
+            age_band=self.property.age_band,
+            is_flat=self.property.roof["is_flat"],
+            is_pitched=self.property.roof["is_pitched"],
+            is_at_rafters=self.property.roof["is_at_rafters"],
+        )
 
         self.estimated_u_value = u_value
         if (u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE) and (
diff --git a/recommendations/recommendation_utils.py b/recommendations/recommendation_utils.py
index 9b5e22d1..4980f30a 100644
--- a/recommendations/recommendation_utils.py
+++ b/recommendations/recommendation_utils.py
@@ -207,6 +207,17 @@ def get_wall_u_value(
 
 def get_u_value_from_s9(thickness, s9, is_loft, is_roof_room, is_thatched):
     """Get the U-value from table S9 based on the insulation thickness."""
+
+    if is_roof_room:
+        # We re-map the thickness
+        thickness_map = {
+            "below average": 50,
+            "average": 100,
+            "above average": 270,
+            "none": 0,
+        }
+        thickness = thickness_map[thickness]
+
     if thickness in ["below average", "average", "above average", "none", None] or (
         not is_loft and not is_roof_room
     ):
@@ -676,7 +687,7 @@ def estimate_windows(
     property_type, built_form, construction_age_band, floor_area, number_habitable_rooms
 ):
     # If there is an extension, that will boost the number of habitable rooms
-    
+
     # Base window count based on habitable rooms
     window_count = number_habitable_rooms
 

From d4d9b8e518ae81d307d81657533b4edf4d616840 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 30 Jul 2024 11:17:21 +0100
Subject: [PATCH 041/182] VE recommendations pushed to front end

---
 backend/Property.py                     |  6 ++++-
 backend/app/plan/router.py              | 30 ++++++++++++++++---------
 backend/ml_models/Valuation.py          |  3 +++
 etl/xml_survey_extraction/app.py        |  2 +-
 recommendations/recommendation_utils.py |  8 +++----
 5 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 1586835a..f82c03a7 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -183,6 +183,7 @@ class Property:
         # This additional condition data should change how we pass kwargs to this. We should no longer need to pass
         # kwargs to this class, but instead, we should pass the energy assessment condition data
         self.energy_assessment_condition_data = energy_assessment["condition"]
+        self.energy_assessment_is_newer = energy_assessment["energy_assessment_is_newer"]
 
         # TODO: We keep this but only temporarily until we add bathrooms, bedrooms, building id to the condition data
         self.parse_kwargs(kwargs)
@@ -877,7 +878,10 @@ class Property:
         property_data = {
             "creation_status": "READY",
             "uprn": int(self.data["uprn"]),
-            "building_reference_number": int(self.data["building-reference-number"]),
+            "building_reference_number": (
+                int(self.data["building-reference-number"]) if
+                self.data["building-reference-number"] is not None else None
+            ),
             "has_pre_condition_report": True,
             "has_recommendations": True,
             "property_type": self.data["property-type"],
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 811ec4a3..e28b4d27 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -232,11 +232,12 @@ def create_epc_records(epc_searcher: SearchEpc, energy_assessment: dict):
     """
 
     if not energy_assessment["epc"]:
+        energy_assessment_is_newer = False
         return {
             'original_epc': epc_searcher.newest_epc.copy(),
             'full_sap_epc': epc_searcher.full_sap_epc.copy(),
             'old_data': epc_searcher.older_epcs.copy(),
-        }
+        }, energy_assessment_is_newer
 
     epc = energy_assessment["epc"]
     energy_assessment_date = epc["inspection-date"].strftime("%Y-%m-%d")
@@ -249,11 +250,12 @@ def create_epc_records(epc_searcher: SearchEpc, energy_assessment: dict):
     # We check if the energy assessment is newer than the newest EPC
     if pd.to_datetime(energy_assessment_date) > pd.to_datetime(epc_searcher.newest_epc["inspection-date"]):
         # In this case, our energy assessment is newer than the EPCs available for this property
+        energy_assessment_is_newer = True
         return {
             "original_epc": epc,
             "full_sap_epc": epc_searcher.full_sap_epc.copy(),
             "old_data": epc_searcher.older_epcs.copy() + [epc_searcher.newest_epc.copy()]
-        }
+        }, energy_assessment_is_newer
 
     # We check if the EPC we have produced is contained in the set of EPCs done for the property
     # We do this based on inspection-date and SAP
@@ -262,6 +264,7 @@ def create_epc_records(epc_searcher: SearchEpc, energy_assessment: dict):
         if x["inspection-date"] == energy_assessment_date and
                    x["current-energy-efficiency"] == epc["current-energy-efficiency"]
     ]
+    energy_assessment_is_newer = False
 
     if epc_in_historicals:
         # Then the EPC we have produced is already in the set of EPCs, and our EPC is older than the newest
@@ -269,7 +272,7 @@ def create_epc_records(epc_searcher: SearchEpc, energy_assessment: dict):
             "original_epc": epc_searcher.newest_epc.copy(),
             "full_sap_epc": epc_searcher.full_sap_epc.copy(),
             "old_data": epc_searcher.older_epcs.copy()
-        }
+        }, energy_assessment_is_newer
 
     # In this case, our EPC is older than the newest publically avaible one, but is not contained in
     # the historicals, so it can't have been lodged, so we include it in the old data
@@ -277,7 +280,7 @@ def create_epc_records(epc_searcher: SearchEpc, energy_assessment: dict):
         'original_epc': epc_searcher.newest_epc.copy(),
         'full_sap_epc': epc_searcher.full_sap_epc.copy(),
         'old_data': epc_searcher.older_epcs.copy() + [epc],
-    }
+    }, energy_assessment_is_newer
 
 
 router = APIRouter(
@@ -364,8 +367,11 @@ async def trigger_plan(body: PlanTriggerRequest):
 
             # If we have an energy assessment in place, that is newer than all of the previous EPCs, we use that.
             # Otherwise, we use the newest EPC
-            epc_records = create_epc_records(epc_searcher, energy_assessment)
-
+            # energy_assessment_is_newer will tell us if the energy assessment is newer than the newest EPC that
+            # has been publically lodged
+            epc_records, energy_assessment["energy_assessment_is_newer"] = create_epc_records(
+                epc_searcher, energy_assessment
+            )
             patch = next((
                 x for x in patches if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
             ), {})
@@ -432,6 +438,7 @@ async def trigger_plan(body: PlanTriggerRequest):
             p.get_components(cleaned=cleaned, energy_consumption_client=energy_consumption_client)
             p.get_spatial_data(uprn_filenames)
 
+        logger.info("Performing solar analysis")
         # TODO: Tidy this up
         building_ids = [
             {
@@ -729,10 +736,13 @@ async def trigger_plan(body: PlanTriggerRequest):
                     property_value_increase_ranges[p.id] = valuations
 
                     # Your existing operations
-                    property_details_epc = p.get_property_details_epc(
-                        portfolio_id=body.portfolio_id, rating_lookup=rating_lookup,
-                    )
-                    create_property_details_epc(session, property_details_epc)
+                    # If we have an energy assessment, which is more recent than the EPC, we don't need to store
+                    # the EPC details in the database
+                    if not p.energy_assessment_is_newer:
+                        property_details_epc = p.get_property_details_epc(
+                            portfolio_id=body.portfolio_id, rating_lookup=rating_lookup,
+                        )
+                        create_property_details_epc(session, property_details_epc)
 
                     update_or_create_property_spatial_details(session, p.uprn, p.spatial)
 
diff --git a/backend/ml_models/Valuation.py b/backend/ml_models/Valuation.py
index b87f156b..cbcebb9f 100644
--- a/backend/ml_models/Valuation.py
+++ b/backend/ml_models/Valuation.py
@@ -100,6 +100,9 @@ class PropertyValuation:
         200140647: 481_000,
         200140648: 373_000,
         200140649: 373_000,
+        # Vander Elliot Intrusive surveys
+        12103116: 1_537_000,
+        12103117: 1_404_000,
     }
 
     # We base our valuation uplifts on a number of sources
diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py
index 73551d09..ab9eae2d 100644
--- a/etl/xml_survey_extraction/app.py
+++ b/etl/xml_survey_extraction/app.py
@@ -127,7 +127,7 @@ def main():
         "already_installed_file_path": "",
         "patches_file_path": "",
         "non_invasive_recommendations_file_path": "",
-        # "exclusions": [],
+        "exclusions": ["floor_insulation", "fireplace"],
         "budget": None,
     }
     print(body)
diff --git a/recommendations/recommendation_utils.py b/recommendations/recommendation_utils.py
index 4980f30a..d14a0d4c 100644
--- a/recommendations/recommendation_utils.py
+++ b/recommendations/recommendation_utils.py
@@ -211,10 +211,10 @@ def get_u_value_from_s9(thickness, s9, is_loft, is_roof_room, is_thatched):
     if is_roof_room:
         # We re-map the thickness
         thickness_map = {
-            "below average": 50,
-            "average": 100,
-            "above average": 270,
-            "none": 0,
+            "below average": "50",
+            "average": "100",
+            "above average": "270",
+            "none": "0",
         }
         thickness = thickness_map[thickness]
 

From c948c240611130313724511318deb29400db2fec Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 30 Jul 2024 11:37:02 +0100
Subject: [PATCH 042/182] Adding back storage of property details epc

---
 backend/app/plan/router.py | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index e28b4d27..099d0827 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -734,15 +734,11 @@ async def trigger_plan(body: PlanTriggerRequest):
 
                     valuations = PropertyValuation.estimate(property_instance=p, target_epc=new_epc)
                     property_value_increase_ranges[p.id] = valuations
-
-                    # Your existing operations
-                    # If we have an energy assessment, which is more recent than the EPC, we don't need to store
-                    # the EPC details in the database
-                    if not p.energy_assessment_is_newer:
-                        property_details_epc = p.get_property_details_epc(
-                            portfolio_id=body.portfolio_id, rating_lookup=rating_lookup,
-                        )
-                        create_property_details_epc(session, property_details_epc)
+                    
+                    property_details_epc = p.get_property_details_epc(
+                        portfolio_id=body.portfolio_id, rating_lookup=rating_lookup,
+                    )
+                    create_property_details_epc(session, property_details_epc)
 
                     update_or_create_property_spatial_details(session, p.uprn, p.spatial)
 

From e50d82ac24d61296302195419005f0021d9af96d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 30 Jul 2024 11:45:24 +0100
Subject: [PATCH 043/182] rounding photo supply to the nearest 5

---
 recommendations/SolarPvRecommendations.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index 63519d02..66d2ac78 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -174,6 +174,10 @@ class SolarPvRecommendations:
                 if already_installed:
                     cost_result = override_costs(cost_result)
 
+                # We calculate the photo_supply we're going to simulate the impact with and we round this to the
+                # nearest 5
+                photo_supply = round(roof_coverage_percent / 5) * 5
+
                 self.recommendation.append(
                     {
                         "phase": phase,
@@ -188,9 +192,9 @@ class SolarPvRecommendations:
                         # This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we
                         # scale
                         # back up here
-                        "photo_supply": roof_coverage_percent,
+                        "photo_supply": photo_supply,
                         "has_battery": has_battery,
                         "initial_ac_kwh_per_year": recommendation_config["initial_ac_kwh_per_year"],
-                        "description_simulation": {"photo-supply": roof_coverage_percent},
+                        "description_simulation": {"photo-supply": photo_supply},
                     }
                 )

From 996c71c1893deac7df89fa50bc76b5cc11c18880 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 30 Jul 2024 15:52:38 +0100
Subject: [PATCH 044/182] handle recommendations for insulated room roof

---
 backend/apis/GoogleSolarApi.py            |  5 +++--
 recommendations/RoofRecommendations.py    | 10 +++++++++-
 recommendations/SolarPvRecommendations.py |  8 ++------
 recommendations/recommendation_utils.py   |  8 +++++---
 4 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py
index c6bb3dde..579e985d 100644
--- a/backend/apis/GoogleSolarApi.py
+++ b/backend/apis/GoogleSolarApi.py
@@ -123,7 +123,8 @@ class GoogleSolarApi:
 
         :param longitude: The longitude of the location.
         :param latitude: The latitude of the location.
-        :param energy_consumption: The energy consumption of the building/unit associated to the longitude and latitude.
+        :param energy_consumption: The energy consumption of the building/unit associated to the longitude and latitude,
+                                    that we wish to size the solar panels up against
         :param property_instance: The property instance associated to the longitude and latitude.
         :param required_quality: The required quality of the data (default is "MEDIUM").
         :param is_building: Whether the energy consumption is for a building or a unit.
@@ -266,7 +267,7 @@ class GoogleSolarApi:
                     cost = cost_instance.solar_pv(
                         wattage=wattage, has_battery=False
                     )["total"]
-                    
+
                 roi_summary.append(
                     {
                         "segmentIndex": segment["segmentIndex"],
diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py
index 615289de..56f3721a 100644
--- a/recommendations/RoofRecommendations.py
+++ b/recommendations/RoofRecommendations.py
@@ -93,11 +93,19 @@ class RoofRecommendations:
         Check if the room roof is already insulated
         """
 
-        return (
+        full_insulated_room_roof = (
             self.property.roof["is_roof_room"] and
             self.property.roof["insulation_thickness"] in ["average", "above_average"]
         )
 
+        room_roof_insulated_at_rafters = (
+            self.property.roof["is_pitched"] and
+            self.property.roof["is_at_rafters"] and
+            self.property.roof["insulation_thickness"] in ["average", "above_average"]
+        )
+
+        return full_insulated_room_roof or room_roof_insulated_at_rafters
+
     def recommend(self, phase):
 
         if self.property.roof["has_dwelling_above"]:
diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index 66d2ac78..63519d02 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -174,10 +174,6 @@ class SolarPvRecommendations:
                 if already_installed:
                     cost_result = override_costs(cost_result)
 
-                # We calculate the photo_supply we're going to simulate the impact with and we round this to the
-                # nearest 5
-                photo_supply = round(roof_coverage_percent / 5) * 5
-
                 self.recommendation.append(
                     {
                         "phase": phase,
@@ -192,9 +188,9 @@ class SolarPvRecommendations:
                         # This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we
                         # scale
                         # back up here
-                        "photo_supply": photo_supply,
+                        "photo_supply": roof_coverage_percent,
                         "has_battery": has_battery,
                         "initial_ac_kwh_per_year": recommendation_config["initial_ac_kwh_per_year"],
-                        "description_simulation": {"photo-supply": photo_supply},
+                        "description_simulation": {"photo-supply": roof_coverage_percent},
                     }
                 )
diff --git a/recommendations/recommendation_utils.py b/recommendations/recommendation_utils.py
index d14a0d4c..ce32e061 100644
--- a/recommendations/recommendation_utils.py
+++ b/recommendations/recommendation_utils.py
@@ -205,10 +205,11 @@ def get_wall_u_value(
     return float(mapped_value)
 
 
-def get_u_value_from_s9(thickness, s9, is_loft, is_roof_room, is_thatched):
+def get_u_value_from_s9(thickness, s9, is_loft, is_roof_room, is_thatched, is_at_rafters):
     """Get the U-value from table S9 based on the insulation thickness."""
 
-    if is_roof_room:
+    # If the roof as pitched & insulated at the rafters, it's a room roof
+    if is_roof_room or is_at_rafters:
         # We re-map the thickness
         thickness_map = {
             "below average": "50",
@@ -219,7 +220,7 @@ def get_u_value_from_s9(thickness, s9, is_loft, is_roof_room, is_thatched):
         thickness = thickness_map[thickness]
 
     if thickness in ["below average", "average", "above average", "none", None] or (
-        not is_loft and not is_roof_room
+        not is_loft and not is_roof_room and not is_at_rafters
     ):
         return None
     elif thickness.endswith("+"):
@@ -291,6 +292,7 @@ def get_roof_u_value(
         is_loft=is_loft,
         is_roof_room=is_roof_room,
         is_thatched=is_thatched,
+        is_at_rafters=is_at_rafters
     )
 
     if u_value is not None:

From cadbd4f48a038eaa9774ef7efd374ee990376909 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 30 Jul 2024 17:42:36 +0100
Subject: [PATCH 045/182] Adding scenario structure to backend

---
 backend/Property.py                           |   2 +
 .../app/db/functions/portfolio_functions.py   |   2 +-
 .../db/functions/recommendations_functions.py |  26 +-
 backend/app/db/models/recommendations.py      |  19 ++
 backend/app/plan/router.py                    |  66 +++--
 backend/app/plan/schemas.py                   |   4 +
 etl/xml_survey_extraction/app.py              | 261 ++++++++++++------
 7 files changed, 268 insertions(+), 112 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index f82c03a7..a1bfe265 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -77,12 +77,14 @@ class Property:
         non_invasive_recommendations=None,
         measures=None,
         energy_assessment=None,
+        is_new=True,
         **kwargs
     ):
 
         self.epc_record = epc_record
 
         self.id = id
+        self.is_new = is_new
 
         self.address = address
         self.postcode = postcode
diff --git a/backend/app/db/functions/portfolio_functions.py b/backend/app/db/functions/portfolio_functions.py
index 402675e8..008c4b8b 100644
--- a/backend/app/db/functions/portfolio_functions.py
+++ b/backend/app/db/functions/portfolio_functions.py
@@ -30,7 +30,7 @@ def aggregate_portfolio_recommendations(
         **aggregated_data
     }
 
-    # Get the portfolio and update the fields
+    # Get the portfolio and update the fields. This data needs to be stored against the plan, not the portfolio
     portfolio = session.query(Portfolio).filter_by(id=portfolio_id).one()
     # Update the data
     for key, value in aggregates_dict.items():
diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py
index 365829e4..cfb3d570 100644
--- a/backend/app/db/functions/recommendations_functions.py
+++ b/backend/app/db/functions/recommendations_functions.py
@@ -1,8 +1,11 @@
 from sqlalchemy import insert, delete
 from sqlalchemy.orm import Session
-from backend.app.db.models.recommendations import Plan, Recommendation, RecommendationMaterials, PlanRecommendations
-from backend.app.db.models.portfolio import PropertyModel, PropertyTargetsModel, PropertyDetailsMeter, \
-    PropertyDetailsEpcModel
+from backend.app.db.models.recommendations import (
+    Plan, Recommendation, RecommendationMaterials, PlanRecommendations, Scenario
+)
+from backend.app.db.models.portfolio import (
+    PropertyModel, PropertyTargetsModel, PropertyDetailsMeter, PropertyDetailsEpcModel
+)
 
 
 def create_plan(session: Session, plan):
@@ -19,6 +22,20 @@ def create_plan(session: Session, plan):
     return new_plan.id
 
 
+def create_scenario(session: Session, scenario):
+    """
+    This function will create a record for the scenario in the database if it does not exist.
+    :param session: The database session
+    :param scenario: dictionary of data representing a scenario to be created
+    """
+
+    new_scenario = Scenario(**scenario)
+    session.add(new_scenario)
+    session.flush()
+
+    return new_scenario.id
+
+
 def create_recommendation(session: Session, recommendation):
     """
     This function will create a record for the recommendation in the database if it does not exist.
@@ -148,6 +165,9 @@ def clear_portfolio(session: Session, portfolio_id: int):
     # Delete all Plans associated with the portfolio
     session.execute(delete(Plan).where(Plan.portfolio_id == portfolio_id))
 
+    # Delete all Scenarios associated with the portfolio
+    session.execute(delete(Scenario).where(Scenario.portfolio_id == portfolio_id))
+
     # Delete all Recommendations associated with the properties
     session.execute(delete(Recommendation).where(Recommendation.property_id.in_(property_ids)))
 
diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py
index 8ab7908f..6eddae1f 100644
--- a/backend/app/db/models/recommendations.py
+++ b/backend/app/db/models/recommendations.py
@@ -50,6 +50,7 @@ class Plan(Base):
     __tablename__ = 'plan'
 
     id = Column(BigInteger, primary_key=True, autoincrement=True)
+    name = Column(String, nullable=True, default="")
     portfolio_id = Column(BigInteger, ForeignKey(Portfolio.id), nullable=False)
     property_id = Column(BigInteger, ForeignKey(PropertyModel.id), nullable=False)
     created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
@@ -65,3 +66,21 @@ class PlanRecommendations(Base):
     id = Column(BigInteger, primary_key=True, autoincrement=True)
     plan_id = Column(BigInteger, ForeignKey('plan.id'), nullable=False)
     recommendation_id = Column(BigInteger, ForeignKey('recommendation.id'), nullable=False)
+
+
+class Scenario(Base):
+    __tablename__ = 'scenario'
+
+    id = Column(BigInteger, primary_key=True, autoincrement=True)
+    name = Column(String, nullable=False)
+    created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
+    budget = Column(Float)
+    portfolio_id = Column(BigInteger, ForeignKey(Portfolio.id), nullable=False)
+    housing_type = Column(String, nullable=False)
+    goal = Column(String, nullable=False)
+    trigger_file_path = Column(String, nullable=False)
+    already_installed_file_path = Column(String)
+    patches_file_path = Column(String)
+    non_invasive_recommendations_file_path = Column(String)
+    exclusions = Column(String)
+    multi_plan = Column(Boolean, default=False)
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 099d0827..1340bae3 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -20,7 +20,7 @@ from backend.app.db.functions.property_functions import (
     update_or_create_property_spatial_details
 )
 from backend.app.db.functions.recommendations_functions import (
-    create_plan, create_plan_recommendations, upload_recommendations
+    create_plan, create_plan_recommendations, upload_recommendations, create_scenario
 )
 from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn
 from backend.app.db.models.portfolio import rating_lookup
@@ -354,16 +354,17 @@ async def trigger_plan(body: PlanTriggerRequest):
             property_id, is_new = create_property(
                 session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn
             )
-            if not is_new:
+            if not is_new and not body.multi_plan:
                 continue
 
-            create_property_targets(
-                session,
-                property_id=property_id,
-                portfolio_id=body.portfolio_id,
-                epc_target=body.goal_value,
-                heat_demand_target=None
-            )
+            if is_new:
+                create_property_targets(
+                    session,
+                    property_id=property_id,
+                    portfolio_id=body.portfolio_id,
+                    epc_target=body.goal_value,
+                    heat_demand_target=None
+                )
 
             # If we have an energy assessment in place, that is newer than all of the previous EPCs, we use that.
             # Otherwise, we use the newest EPC
@@ -396,6 +397,7 @@ async def trigger_plan(body: PlanTriggerRequest):
             input_properties.append(
                 Property(
                     id=property_id,
+                    is_new=is_new,
                     address=epc_searcher.address_clean,
                     postcode=epc_searcher.postcode_clean,
                     epc_record=prepared_epc,
@@ -409,6 +411,25 @@ async def trigger_plan(body: PlanTriggerRequest):
         if not input_properties:
             return Response(status_code=204)
 
+        # If we have any work to do, we create a new scenario
+        scenario = create_scenario(
+            session=session,
+            scenario={
+                "name": body.scenario_name,
+                "created_at": created_at,
+                "budget": body.budget,
+                "portfolio_id": body.portfolio_id,
+                "housing_type": body.housing_type,
+                "goal": body.goal,
+                "trigger_file_path": body.trigger_file_path,
+                "already_installed_file_path": body.already_installed_file_path,
+                "patches_file_path": body.patches_file_path,
+                "non_invasive_recommendations_file_path": body.non_invasive_recommendations_file_path,
+                "exclusions": body.exclusions,
+                "multi_plan": body.multi_plan
+            }
+        )
+
         # The materials data could be cached or local so we don't need to make
         # consistent requests to the backend for
         # the same data
@@ -734,18 +755,19 @@ async def trigger_plan(body: PlanTriggerRequest):
 
                     valuations = PropertyValuation.estimate(property_instance=p, target_epc=new_epc)
                     property_value_increase_ranges[p.id] = valuations
-                    
-                    property_details_epc = p.get_property_details_epc(
-                        portfolio_id=body.portfolio_id, rating_lookup=rating_lookup,
-                    )
-                    create_property_details_epc(session, property_details_epc)
 
-                    update_or_create_property_spatial_details(session, p.uprn, p.spatial)
+                    if p.is_new:
+                        property_details_epc = p.get_property_details_epc(
+                            portfolio_id=body.portfolio_id, rating_lookup=rating_lookup,
+                        )
+                        create_property_details_epc(session, property_details_epc)
 
-                    property_data = p.get_full_property_data(current_valuation=valuations["current_value"])
-                    update_property_data(
-                        session, property_id=p.id, portfolio_id=body.portfolio_id, property_data=property_data
-                    )
+                        update_or_create_property_spatial_details(session, p.uprn, p.spatial)
+
+                        property_data = p.get_full_property_data(current_valuation=valuations["current_value"])
+                        update_property_data(
+                            session, property_id=p.id, portfolio_id=body.portfolio_id, property_data=property_data
+                        )
 
                     if not recommendations_to_upload:
                         continue
@@ -753,7 +775,8 @@ async def trigger_plan(body: PlanTriggerRequest):
                     new_plan_id = create_plan(session, {
                         "portfolio_id": body.portfolio_id,
                         "property_id": p.id,
-                        "is_default": True,
+                        "is_default": True if p.is_new else False,
+                        "name": body.scenario_name,
                         "valuation_increase_lower_bound": (
                             valuations["lower_bound_increased_value"] - valuations["current_value"]
                         ),
@@ -807,6 +830,8 @@ async def trigger_plan(body: PlanTriggerRequest):
         aggregate_portfolio_recommendations(
             session,
             portfolio_id=body.portfolio_id,
+            multi_plan=body.multi_plan,
+
             total_valuation_increase=total_valuation_increase,
             labour_days=labour_days,
             aggregated_data=aggregated_data
@@ -941,6 +966,7 @@ async def build_mds(body: MdsRequest):
                     # already_installed=property_already_installed,
                     # non_invasive_recommendations=property_non_invasive_recommendations,
                     measures=measures,
+                    is_new=is_new,
                     **Property.extract_kwargs(config)
                 )
             )
diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py
index 77ac4217..263115af 100644
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@@ -13,6 +13,10 @@ class PlanTriggerRequest(BaseModel):
     patches_file_path: Optional[str] = None
     non_invasive_recommendations_file_path: Optional[str] = None
     exclusions: Optional[conlist(str, min_items=1)] = None
+    scenario_name: Optional[str] = ""
+    # If true, will allow us to create multiple plans for the same portfolio, whereas if this is false, if this property
+    # exists in the portfolio, it will be ignored
+    multi_plan: Optional[bool] = False
 
     # Pre-defined list of possibilities for exclusions
     _allowed_exclusions = {
diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py
index ab9eae2d..aeaf8abe 100644
--- a/etl/xml_survey_extraction/app.py
+++ b/etl/xml_survey_extraction/app.py
@@ -10,11 +10,97 @@ from io import BytesIO
 
 logger = setup_logger()
 
-SURVEYORS = "JAFFERSONS ENERGY CONSULTANTS"
-PROJECT_CODE = "VDE001"
 BUCKET = "retrofit-energy-assessments-dev"
-PORTFOLIO_ID = 86
 USER_ID = 8
+SCENARIOS = {
+    86: {
+        "project_code": "VDE001",
+        "surveyor": "JAFFERSONS ENERGY CONSULTANTS",
+        "bodies": [
+            # Scenario A: Cavity wall insulation
+            {
+                "portfolio_id": str(86),
+                "housing_type": "Private",
+                "goal": "Increase EPC",
+                "goal_value": "A",
+                "trigger_file_path": "",
+                "already_installed_file_path": "",
+                "patches_file_path": "",
+                "non_invasive_recommendations_file_path": "",
+                "exclusions": ["floor_insulation", "fireplace", "solar_pv", "heating"],
+                "budget": None,
+                "scenario_name": "Low Hanging Fruit",
+                "multi_plan": True,
+            },
+            # Scenario B: CWI, Solar PV, AHSP
+            {
+                "portfolio_id": str(86),
+                "housing_type": "Private",
+                "goal": "Increase EPC",
+                "goal_value": "A",
+                "trigger_file_path": "",
+                "already_installed_file_path": "",
+                "patches_file_path": "",
+                "non_invasive_recommendations_file_path": "",
+                "exclusions": ["floor_insulation", "fireplace"],
+                "budget": None,
+                "Scenario Name": "Deep Retrofit",
+                "multi_plan": True,
+            },
+            # Scenario C, CWI, floor insulation, PV, AHSP
+            {
+                "portfolio_id": str(86),
+                "housing_type": "Private",
+                "goal": "Increase EPC",
+                "goal_value": "A",
+                "trigger_file_path": "",
+                "already_installed_file_path": "",
+                "patches_file_path": "",
+                "non_invasive_recommendations_file_path": "",
+                "exclusions": ["fireplace"],
+                "budget": None,
+                "Scenario Name": "Whole House Retrofit",
+                "multi_plan": True,
+            }
+        ]
+    },
+    87: {
+        "project_code": "VDE002",
+        "surveyor": "JAFFERSONS ENERGY CONSULTANTS",
+        "bodies": [
+            # Scenario A: Solar PV, AHSP
+            {
+                "portfolio_id": str(87),
+                "housing_type": "Private",
+                "goal": "Increase EPC",
+                "goal_value": "A",
+                "trigger_file_path": "",
+                "already_installed_file_path": "",
+                "patches_file_path": "",
+                "non_invasive_recommendations_file_path": "",
+                "exclusions": ["floor_insulation", "fireplace"],
+                "budget": None,
+                "Scenario Name": "Deep Retrofit",
+                "multi_plan": True,
+            },
+            # Scenario B, floor insulation, PV, AHSP
+            {
+                "portfolio_id": str(87),
+                "housing_type": "Private",
+                "goal": "Increase EPC",
+                "goal_value": "A",
+                "trigger_file_path": "",
+                "already_installed_file_path": "",
+                "patches_file_path": "",
+                "non_invasive_recommendations_file_path": "",
+                "exclusions": ["fireplace"],
+                "budget": None,
+                "Scenario Name": "Whole House Retrofit",
+                "multi_plan": True,
+            }
+        ]
+    }
+}
 
 
 def main():
@@ -34,103 +120,102 @@ def main():
 
     # TODO: Store the project code in the database
     #
-    energy_assessments = list_files_and_subfolders_in_s3_folder(
-        bucket_name=BUCKET, folder_name=f"{SURVEYORS}/{PROJECT_CODE}/"
-    )
 
-    logger.info(f"Found {len(energy_assessments)} energy assessments for {SURVEYORS} and {PROJECT_CODE}")
-    assessments_map = {}
-    for assessment in energy_assessments:
-        uploaded_xmls = list_xmls_in_s3_folder(
-            bucket_name=BUCKET, folder_name=os.path.join(assessment, "docs & plans")
+    for scenario_config in SCENARIOS.values():
+        energy_assessments = list_files_and_subfolders_in_s3_folder(
+            bucket_name=BUCKET, folder_name=f"{scenario_config['surveyor']}/{scenario_config['project_code']}/"
         )
-        uprn = int(assessment.rstrip("/").split("/")[-1])
-        assessments_map[uprn] = uploaded_xmls
 
-    logger.info(f"Exatracted XMLS for the energy assessments")
-
-    # TODO: IF we have many uploads, we can do them in a batch so we don't try and upload huge amounts of data to
-    #       the database at onece
-
-    # TODO: We now have detailed information about primary and secondary walls, so we should use this information
-    #       in our recommendations when we have it
-    #       For example, for 77 Peryn Road, W3 7LT, the energy assessment has a main dwelling and two extensions, where
-    #       the physical dimensions and the fabric of each building is constructed in a way as if each building is
-    #       separate. We should use this information to make recommendations that are specific to each building
-    #       part, though the problem here is that while the fabric and dimensions are separate, the actual SAP, CO2, etc
-    #       figures span across the entire property.
-    #       Idea: We can collect all of this information by building part and store it separately in the database
-    #             against the uprn. We can have key data for the EPC, but then also additional data for each building
-    #             part. We can then use this data to make recommendations that are specific to each building part
-    #       We should probably re-think this data model, so we break up the data in a more considered fasion and produce
-    #       the underlying EPC data as a summary of the building parts. Not only do we have data against the main
-    #       dwelling and extensions, but we also have multiple windows with individiaul pieces of information that
-    #       we can use to make recommendations. We should store this data in a way that we can easily access it and
-    #       use it to make recommendations (e.g. we should have a Windows table)
-
-    # For each property, we download the xmls and extract the data
-    database_data = []
-    for uprn, xmls in assessments_map.items():
-        extracted_data = {}
-        for xml in xmls:
-            xml_data = read_from_s3(bucket_name=BUCKET, s3_file_name=xml)
-            xml_data_io = BytesIO(xml_data)
-            xml_parser = XmlParser(
-                file=xml_data_io,
-                filekey=os.path.join(f"s3://{BUCKET}", xml),
-                uprn=uprn,
-                surveyor_company=SURVEYORS,
+        logger.info(
+            f"Found {len(energy_assessments)} energy assessments for {scenario_config['surveyor']} and "
+            f"{scenario_config['project_code']}"
+        )
+        assessments_map = {}
+        for assessment in energy_assessments:
+            uploaded_xmls = list_xmls_in_s3_folder(
+                bucket_name=BUCKET, folder_name=os.path.join(assessment, "docs & plans")
             )
-            xml_parser.run()
-            if xml_parser.is_lig:
-                logger.info(f"Extracted data from {xml}")
-            extracted_epc = xml_parser.epc
-            extracted_additional_data = xml_parser.additional_data
+            uprn = int(assessment.rstrip("/").split("/")[-1])
+            assessments_map[uprn] = uploaded_xmls
 
-            data_to_update = {
-                **extracted_epc, **extracted_additional_data
-            }
+        logger.info(f"Exatracted XMLS for the energy assessments")
 
-            # We need to update the keys to match the database schema - i.e. we should replace all hyphens with
-            # underscores
-            data_to_update = {k.replace("-", "_"): v for k, v in data_to_update.items()}
+        # TODO: IF we have many uploads, we can do them in a batch so we don't try and upload huge amounts of data to
+        #       the database at onece
 
-            extracted_data.update(data_to_update)
+        # TODO: We now have detailed information about primary and secondary walls, so we should use this information
+        #       in our recommendations when we have it
+        #       For example, for 77 Peryn Road, W3 7LT, the energy assessment has a main dwelling and two extensions,
+        #       where
+        #       the physical dimensions and the fabric of each building is constructed in a way as if each building is
+        #       separate. We should use this information to make recommendations that are specific to each building
+        #       part, though the problem here is that while the fabric and dimensions are separate, the actual SAP,
+        #       CO2, etc
+        #       figures span across the entire property.
+        #       Idea: We can collect all of this information by building part and store it separately in the database
+        #             against the uprn. We can have key data for the EPC, but then also additional data for each
+        #             building
+        #             part. We can then use this data to make recommendations that are specific to each building part
+        #       We should probably re-think this data model, so we break up the data in a more considered fasion and
+        #       produce
+        #       the underlying EPC data as a summary of the building parts. Not only do we have data against the main
+        #       dwelling and extensions, but we also have multiple windows with individiaul pieces of information that
+        #       we can use to make recommendations. We should store this data in a way that we can easily access it and
+        #       use it to make recommendations (e.g. we should have a Windows table)
 
-        database_data.append(extracted_data)
+        # For each property, we download the xmls and extract the data
+        database_data = []
+        for uprn, xmls in assessments_map.items():
+            extracted_data = {}
+            for xml in xmls:
+                xml_data = read_from_s3(bucket_name=BUCKET, s3_file_name=xml)
+                xml_data_io = BytesIO(xml_data)
+                xml_parser = XmlParser(
+                    file=xml_data_io,
+                    filekey=os.path.join(f"s3://{BUCKET}", xml),
+                    uprn=uprn,
+                    surveyor_company=scenario_config["surveyor"],
+                )
+                xml_parser.run()
+                if xml_parser.is_lig:
+                    logger.info(f"Extracted data from {xml}")
+                extracted_epc = xml_parser.epc
+                extracted_additional_data = xml_parser.additional_data
 
-    logger.info("Uploading data to the database")
-    session = sessionmaker(bind=db_engine)()
-    bulk_insert_energy_assessments(session, database_data)
-    session.close()
+                data_to_update = {
+                    **extracted_epc, **extracted_additional_data
+                }
 
-    # Create the asset list
-    asset_list = [
-        {"uprn": x["uprn"], "address": x["address1"], "postcode": x["postcode"]} for x in database_data
-    ]
-    asset_list = pd.DataFrame(asset_list)
+                # We need to update the keys to match the database schema - i.e. we should replace all hyphens with
+                # underscores
+                data_to_update = {k.replace("-", "_"): v for k, v in data_to_update.items()}
 
-    # Store the asset list in s3
-    filename = f"{USER_ID}/{PORTFOLIO_ID}/non_intrusives.csv"
-    save_csv_to_s3(
-        dataframe=asset_list,
-        bucket_name="retrofit-plan-inputs-dev",
-        file_name=filename
-    )
+                extracted_data.update(data_to_update)
 
-    body = {
-        "portfolio_id": str(PORTFOLIO_ID),
-        "housing_type": "Private",
-        "goal": "Increase EPC",
-        "goal_value": "A",
-        "trigger_file_path": filename,
-        "already_installed_file_path": "",
-        "patches_file_path": "",
-        "non_invasive_recommendations_file_path": "",
-        "exclusions": ["floor_insulation", "fireplace"],
-        "budget": None,
-    }
-    print(body)
+            database_data.append(extracted_data)
+
+        logger.info("Uploading data to the database")
+        session = sessionmaker(bind=db_engine)()
+        bulk_insert_energy_assessments(session, database_data)
+        session.close()
+
+        # Create the asset list
+        asset_list = [
+            {"uprn": x["uprn"], "address": x["address1"], "postcode": x["postcode"]} for x in database_data
+        ]
+        asset_list = pd.DataFrame(asset_list)
+
+        # Store the asset list in s3
+        filename = f"{USER_ID}/{scenario_config['bodies'][0]['portfolio_id']}/non_intrusives.csv"
+        save_csv_to_s3(
+            dataframe=asset_list,
+            bucket_name="retrofit-plan-inputs-dev",
+            file_name=filename
+        )
+
+        for body in scenario_config["bodies"]:
+            body["trigger_file_path"] = filename
+            print(body)
 
     # TODO: In order to get the full data associated to the heating system, we need to download and parse the pcdb which
     #       can be found here: https://www.ncm-pcdb.org.uk/pcdb/pcdb10.dat

From ccacdaac65865bdff15a0225a05f845ade8130a1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 30 Jul 2024 17:50:55 +0100
Subject: [PATCH 046/182] adding try except for some db functions

---
 .../db/functions/recommendations_functions.py | 46 +++++++++++--------
 backend/app/plan/router.py                    |  5 +-
 backend/app/plan/schemas.py                   |  1 +
 etl/xml_survey_extraction/app.py              | 10 ++--
 .../optimiser/optimiser_functions.py          |  2 +-
 5 files changed, 36 insertions(+), 28 deletions(-)

diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py
index cfb3d570..c7765039 100644
--- a/backend/app/db/functions/recommendations_functions.py
+++ b/backend/app/db/functions/recommendations_functions.py
@@ -1,5 +1,6 @@
 from sqlalchemy import insert, delete
 from sqlalchemy.orm import Session
+from sqlalchemy.exc import SQLAlchemyError
 from backend.app.db.models.recommendations import (
     Plan, Recommendation, RecommendationMaterials, PlanRecommendations, Scenario
 )
@@ -14,12 +15,15 @@ def create_plan(session: Session, plan):
     :param session: The database session
     :param plan: dictionary of data representing a plan to be created
     """
-
-    new_plan = Plan(**plan)
-    session.add(new_plan)
-    session.flush()
-
-    return new_plan.id
+    try:
+        new_plan = Plan(**plan)
+        session.add(new_plan)
+        session.flush()
+        session.commit()
+        return new_plan.id
+    except SQLAlchemyError as e:
+        session.rollback()
+        raise e
 
 
 def create_scenario(session: Session, scenario):
@@ -28,12 +32,15 @@ def create_scenario(session: Session, scenario):
     :param session: The database session
     :param scenario: dictionary of data representing a scenario to be created
     """
-
-    new_scenario = Scenario(**scenario)
-    session.add(new_scenario)
-    session.flush()
-
-    return new_scenario.id
+    try:
+        new_scenario = Scenario(**scenario)
+        session.add(new_scenario)
+        session.flush()
+        session.commit()
+        return new_scenario
+    except SQLAlchemyError as e:
+        session.rollback()
+        raise e
 
 
 def create_recommendation(session: Session, recommendation):
@@ -42,12 +49,15 @@ def create_recommendation(session: Session, recommendation):
     :param session: The database session
     :param recommendation: dictionary of data representing a recommendation to be created
     """
-
-    new_recommendation = Recommendation(**recommendation)
-    session.add(new_recommendation)
-    session.flush()
-
-    return new_recommendation.id
+    try:
+        new_recommendation = Recommendation(**recommendation)
+        session.add(new_recommendation)
+        session.flush()
+        session.commit()
+        return new_recommendation.id
+    except SQLAlchemyError as e:
+        session.rollback()
+        raise e
 
 
 def create_recommendation_material(session: Session, recommendation_id, material_id, depth):
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 1340bae3..4d73778e 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -297,9 +297,6 @@ async def trigger_plan(body: PlanTriggerRequest):
     session = sessionmaker(bind=db_engine)()
     created_at = datetime.now().isoformat()
 
-    # TODO: We should store the trigger file path in the database with the plan so we can track the file that
-    #       triggered the plan
-
     # TODO: if the measure is already installed, it should actually be the very first phase
 
     try:
@@ -412,7 +409,7 @@ async def trigger_plan(body: PlanTriggerRequest):
             return Response(status_code=204)
 
         # If we have any work to do, we create a new scenario
-        scenario = create_scenario(
+        engine_scenario = create_scenario(
             session=session,
             scenario={
                 "name": body.scenario_name,
diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py
index 263115af..b1e3a43a 100644
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@@ -4,6 +4,7 @@ from typing import Optional
 
 class PlanTriggerRequest(BaseModel):
     budget: Optional[float] = None
+    # This can only have a fixed set of values
     goal: str
     housing_type: str
     goal_value: str
diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py
index aeaf8abe..ed2d20b6 100644
--- a/etl/xml_survey_extraction/app.py
+++ b/etl/xml_survey_extraction/app.py
@@ -21,7 +21,7 @@ SCENARIOS = {
             {
                 "portfolio_id": str(86),
                 "housing_type": "Private",
-                "goal": "Increase EPC",
+                "goal": "Increasing EPC",
                 "goal_value": "A",
                 "trigger_file_path": "",
                 "already_installed_file_path": "",
@@ -36,7 +36,7 @@ SCENARIOS = {
             {
                 "portfolio_id": str(86),
                 "housing_type": "Private",
-                "goal": "Increase EPC",
+                "goal": "Increasing EPC",
                 "goal_value": "A",
                 "trigger_file_path": "",
                 "already_installed_file_path": "",
@@ -51,7 +51,7 @@ SCENARIOS = {
             {
                 "portfolio_id": str(86),
                 "housing_type": "Private",
-                "goal": "Increase EPC",
+                "goal": "Increasing EPC",
                 "goal_value": "A",
                 "trigger_file_path": "",
                 "already_installed_file_path": "",
@@ -72,7 +72,7 @@ SCENARIOS = {
             {
                 "portfolio_id": str(87),
                 "housing_type": "Private",
-                "goal": "Increase EPC",
+                "goal": "Increasing EPC",
                 "goal_value": "A",
                 "trigger_file_path": "",
                 "already_installed_file_path": "",
@@ -87,7 +87,7 @@ SCENARIOS = {
             {
                 "portfolio_id": str(87),
                 "housing_type": "Private",
-                "goal": "Increase EPC",
+                "goal": "Increasing EPC",
                 "goal_value": "A",
                 "trigger_file_path": "",
                 "already_installed_file_path": "",
diff --git a/recommendations/optimiser/optimiser_functions.py b/recommendations/optimiser/optimiser_functions.py
index 083a7c25..c1123e3d 100644
--- a/recommendations/optimiser/optimiser_functions.py
+++ b/recommendations/optimiser/optimiser_functions.py
@@ -9,7 +9,7 @@ def prepare_input_measures(property_recommendations, goal):
     """
 
     goal_map = {
-        "Increase EPC": "sap_points"
+        "Increasing EPC": "sap_points"
     }
 
     goal_key = goal_map[goal]

From b1f4f154ddb9371faa7cd49e9fbc52f02963bcbc Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 30 Jul 2024 20:00:32 +0100
Subject: [PATCH 047/182] Refactored recommendation uploading to return ids
 explicitly on upload

---
 .../app/db/functions/portfolio_functions.py   |  30 +++--
 .../db/functions/recommendations_functions.py | 104 +++++++++---------
 backend/app/db/models/recommendations.py      |   1 +
 backend/app/plan/router.py                    |  10 +-
 backend/app/plan/schemas.py                   |   3 +-
 etl/xml_survey_extraction/app.py              |   8 +-
 6 files changed, 84 insertions(+), 72 deletions(-)

diff --git a/backend/app/db/functions/portfolio_functions.py b/backend/app/db/functions/portfolio_functions.py
index 008c4b8b..ffdabfb6 100644
--- a/backend/app/db/functions/portfolio_functions.py
+++ b/backend/app/db/functions/portfolio_functions.py
@@ -1,10 +1,14 @@
 from sqlalchemy import func
-from backend.app.db.models.recommendations import Plan, PlanRecommendations, Recommendation
-from backend.app.db.models.portfolio import Portfolio
+from backend.app.db.models.recommendations import Plan, PlanRecommendations, Recommendation, Scenario
 
 
 def aggregate_portfolio_recommendations(
-    session, portfolio_id: int, total_valuation_increase: float, labour_days: float, aggregated_data: dict
+    session,
+    portfolio_id: int,
+    scenario_id: int,
+    total_valuation_increase: float,
+    labour_days: float,
+    aggregated_data: dict
 ):
     # Aggregate multiple fields
     aggregates = (
@@ -17,7 +21,11 @@ def aggregate_portfolio_recommendations(
         )
         .join(PlanRecommendations, PlanRecommendations.recommendation_id == Recommendation.id)
         .join(Plan, Plan.id == PlanRecommendations.plan_id)
-        .filter(Plan.portfolio_id == portfolio_id, Plan.is_default == True, Recommendation.default == True)
+        .filter(
+            Plan.portfolio_id == portfolio_id,
+            Plan.scenario_id == scenario_id,
+            Recommendation.default == True
+        )
         .one()
     )
 
@@ -30,16 +38,16 @@ def aggregate_portfolio_recommendations(
         **aggregated_data
     }
 
-    # Get the portfolio and update the fields. This data needs to be stored against the plan, not the portfolio
-    portfolio = session.query(Portfolio).filter_by(id=portfolio_id).one()
+    # Get the scenario and update the fields. This data needs to be stored against the scenario, not the portfolio
+    portfolio_scenario = session.query(Scenario).filter_by(id=scenario_id).one()
     # Update the data
     for key, value in aggregates_dict.items():
-        setattr(portfolio, key, value)
+        setattr(portfolio_scenario, key, value)
 
     # Insert total valuation increase and labour days
-    portfolio.property_valuation_increase = total_valuation_increase
-    portfolio.labour_days = labour_days
+    portfolio_scenario.property_valuation_increase = total_valuation_increase
+    portfolio_scenario.labour_days = labour_days
 
-    # Merge the updated portfolio back into the session
-    session.merge(portfolio)
+    # Merge the updated portfolio plan back into the session
+    session.merge(portfolio_scenario)
     session.flush()
diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py
index c7765039..7ff09f22 100644
--- a/backend/app/db/functions/recommendations_functions.py
+++ b/backend/app/db/functions/recommendations_functions.py
@@ -95,62 +95,68 @@ def create_plan_recommendations(session: Session, plan_id, recommendation_ids):
     session.execute(insert(PlanRecommendations).values(data))
 
 
-def upload_recommendations(session: Session, recommendations_to_upload, property_id):
-    # Prepare data for bulk insert for Recommendation
-    recommendations_data = [
-        {
-            "property_id": property_id,
-            "type": rec["type"],
-            "description": rec["description"],
-            "estimated_cost": rec["total"],
-            "default": rec["default"],
-            "starting_u_value": rec.get("starting_u_value"),
-            "new_u_value": rec.get("new_u_value"),
-            "sap_points": rec["sap_points"],
-            "energy_savings": rec["heat_demand"],
-            "kwh_savings": rec["kwh_savings"],
-            "co2_equivalent_savings": rec["co2_equivalent_savings"],
-            "total_work_hours": rec["labour_hours"],
-            "energy_cost_savings": rec["energy_cost_savings"],
-            "labour_days": rec["labour_days"],
-            "already_installed": rec["already_installed"],
-        }
-        for rec in recommendations_to_upload
-    ]
+def upload_recommendations(session: Session, recommendations_to_upload, property_id, new_plan_id):
+    try:
+        # Prepare data for bulk insert for Recommendation
+        recommendations_data = [
+            {
+                "property_id": property_id,
+                "type": rec["type"],
+                "description": rec["description"],
+                "estimated_cost": rec["total"],
+                "default": rec["default"],
+                "starting_u_value": rec.get("starting_u_value"),
+                "new_u_value": rec.get("new_u_value"),
+                "sap_points": rec["sap_points"],
+                "energy_savings": rec["heat_demand"],
+                "kwh_savings": rec["kwh_savings"],
+                "co2_equivalent_savings": rec["co2_equivalent_savings"],
+                "total_work_hours": rec["labour_hours"],
+                "energy_cost_savings": rec["energy_cost_savings"],
+                "labour_days": rec["labour_days"],
+                "already_installed": rec["already_installed"],
+            }
+            for rec in recommendations_to_upload
+        ]
 
-    session.bulk_insert_mappings(Recommendation, recommendations_data)
+        # Insert the recommendations, get back the IDs
+        stmt = insert(Recommendation).returning(Recommendation.id).values(recommendations_data)
+        result = session.execute(stmt)
+        uploaded_recommendation_ids = [row[0] for row in result]
 
-    # To get the IDs of the newly inserted recommendations, we need to flush the session
-    session.flush()
+        # Prepare data for bulk insert for RecommendationMaterials
+        recommendation_materials_data = [
+            {
+                "recommendation_id": recommendation_id,
+                "material_id": part["id"],
+                "depth": int(part["depth"]) if part["depth"] else None,
+                "quantity": part["quantity"],
+                "quantity_unit": part["quantity_unit"],
+                "estimated_cost": part["total"],
+            }
+            for rec, recommendation_id in zip(recommendations_to_upload, uploaded_recommendation_ids)
+            for part in rec["parts"]
+        ]
 
-    # Map the uploaded_recommendation_ids with the original data for reference
-    uploaded_recommendation_ids = [rec.id for rec in session.query(Recommendation).filter(
-        Recommendation.property_id == property_id,
-        Recommendation.description.in_([rec["description"] for rec in recommendations_to_upload])
-    )]
+        session.bulk_insert_mappings(RecommendationMaterials, recommendation_materials_data)
 
-    # Prepare data for bulk insert for RecommendationMaterials
-    # We can have multiple materials per recommendation. The aggregation of the materials will total the
-    # recommendation figures
-    recommendation_materials_data = [
-        {
-            "recommendation_id": recommendation_id,
-            "material_id": part["id"],
-            "depth": int(part["depth"]) if part["depth"] else None,
-            "quantity": part["quantity"],
-            "quantity_unit": part["quantity_unit"],
-            "estimated_cost": part["total"],
-        }
-        for rec, recommendation_id in zip(recommendations_to_upload, uploaded_recommendation_ids)
-        for part in rec["parts"]
-    ]
+        # flush the changes to get the newly created IDs
+        session.flush()
 
-    session.bulk_insert_mappings(RecommendationMaterials, recommendation_materials_data)
+        create_plan_recommendations(
+            session, plan_id=new_plan_id, recommendation_ids=uploaded_recommendation_ids
+        )
 
-    # flush the changes to get the newly created IDs
-    session.flush()
+        # Commit the transaction
+        session.commit()
 
-    return uploaded_recommendation_ids
+        return True
+
+    except SQLAlchemyError as e:
+        # Rollback the transaction in case of an error
+        session.rollback()
+        print(f"An error occurred: {e}")
+        return False
 
 
 def clear_portfolio(session: Session, portfolio_id: int):
diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py
index 6eddae1f..6ccfe7f7 100644
--- a/backend/app/db/models/recommendations.py
+++ b/backend/app/db/models/recommendations.py
@@ -53,6 +53,7 @@ class Plan(Base):
     name = Column(String, nullable=True, default="")
     portfolio_id = Column(BigInteger, ForeignKey(Portfolio.id), nullable=False)
     property_id = Column(BigInteger, ForeignKey(PropertyModel.id), nullable=False)
+    scenario_id = Column(BigInteger, ForeignKey('scenario.id'))  # Doesn't have to be linked to a scenario
     created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
     is_default = Column(Boolean, nullable=False)
     valuation_increase_lower_bound = Column(Float)
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 4d73778e..a0d4e585 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -772,6 +772,7 @@ async def trigger_plan(body: PlanTriggerRequest):
                     new_plan_id = create_plan(session, {
                         "portfolio_id": body.portfolio_id,
                         "property_id": p.id,
+                        "scenario_id": engine_scenario.id,
                         "is_default": True if p.is_new else False,
                         "name": body.scenario_name,
                         "valuation_increase_lower_bound": (
@@ -785,10 +786,8 @@ async def trigger_plan(body: PlanTriggerRequest):
                         ),
                     })
 
-                    uploaded_recommendation_ids = upload_recommendations(session, recommendations_to_upload, p.id)
-
-                    create_plan_recommendations(
-                        session, plan_id=new_plan_id, recommendation_ids=uploaded_recommendation_ids
+                    upload_recommendations(
+                        session, recommendations_to_upload, p.id, new_plan_id
                     )
 
                     property_valuation_increases.append(
@@ -827,8 +826,7 @@ async def trigger_plan(body: PlanTriggerRequest):
         aggregate_portfolio_recommendations(
             session,
             portfolio_id=body.portfolio_id,
-            multi_plan=body.multi_plan,
-
+            scenario_id=engine_scenario.id,
             total_valuation_increase=total_valuation_increase,
             labour_days=labour_days,
             aggregated_data=aggregated_data
diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py
index b1e3a43a..108eb1ae 100644
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@@ -4,7 +4,6 @@ from typing import Optional
 
 class PlanTriggerRequest(BaseModel):
     budget: Optional[float] = None
-    # This can only have a fixed set of values
     goal: str
     housing_type: str
     goal_value: str
@@ -36,7 +35,7 @@ class PlanTriggerRequest(BaseModel):
         "air_source_heat_pump",
     }
 
-    _allowed_goals = {"Increase EPC"}
+    _allowed_goals = {"Increasing EPC"}
 
     _allowed_housing_types = {"Social", "Private"}
 
diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py
index ed2d20b6..a8bffc73 100644
--- a/etl/xml_survey_extraction/app.py
+++ b/etl/xml_survey_extraction/app.py
@@ -44,7 +44,7 @@ SCENARIOS = {
                 "non_invasive_recommendations_file_path": "",
                 "exclusions": ["floor_insulation", "fireplace"],
                 "budget": None,
-                "Scenario Name": "Deep Retrofit",
+                "scenario_name": "Deep Retrofit",
                 "multi_plan": True,
             },
             # Scenario C, CWI, floor insulation, PV, AHSP
@@ -59,7 +59,7 @@ SCENARIOS = {
                 "non_invasive_recommendations_file_path": "",
                 "exclusions": ["fireplace"],
                 "budget": None,
-                "Scenario Name": "Whole House Retrofit",
+                "scenario_name": "Whole House Retrofit",
                 "multi_plan": True,
             }
         ]
@@ -80,7 +80,7 @@ SCENARIOS = {
                 "non_invasive_recommendations_file_path": "",
                 "exclusions": ["floor_insulation", "fireplace"],
                 "budget": None,
-                "Scenario Name": "Deep Retrofit",
+                "scenario_name": "Deep Retrofit",
                 "multi_plan": True,
             },
             # Scenario B, floor insulation, PV, AHSP
@@ -95,7 +95,7 @@ SCENARIOS = {
                 "non_invasive_recommendations_file_path": "",
                 "exclusions": ["fireplace"],
                 "budget": None,
-                "Scenario Name": "Whole House Retrofit",
+                "scenario_name": "Whole House Retrofit",
                 "multi_plan": True,
             }
         ]

From 096915bf336b54c547cc80200cbe400abee31acc Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 30 Jul 2024 21:11:04 +0100
Subject: [PATCH 048/182] added missing fields to scenarios model

---
 .../app/db/functions/portfolio_functions.py   |  1 +
 backend/app/db/models/recommendations.py      | 22 +++++++++++++++++++
 2 files changed, 23 insertions(+)

diff --git a/backend/app/db/functions/portfolio_functions.py b/backend/app/db/functions/portfolio_functions.py
index ffdabfb6..ac340ab5 100644
--- a/backend/app/db/functions/portfolio_functions.py
+++ b/backend/app/db/functions/portfolio_functions.py
@@ -40,6 +40,7 @@ def aggregate_portfolio_recommendations(
 
     # Get the scenario and update the fields. This data needs to be stored against the scenario, not the portfolio
     portfolio_scenario = session.query(Scenario).filter_by(id=scenario_id).one()
+
     # Update the data
     for key, value in aggregates_dict.items():
         setattr(portfolio_scenario, key, value)
diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py
index 6ccfe7f7..ed3f326e 100644
--- a/backend/app/db/models/recommendations.py
+++ b/backend/app/db/models/recommendations.py
@@ -85,3 +85,25 @@ class Scenario(Base):
     non_invasive_recommendations_file_path = Column(String)
     exclusions = Column(String)
     multi_plan = Column(Boolean, default=False)
+
+    # Add in the fields we need, which were previously sitting at the portfolio level
+    cost = Column(Float)
+    total_work_hours = Column(Float)
+    energy_savings = Column(Float)
+    co2_equivalent_savings = Column(Float)
+    energy_cost_savings = Column(Float)
+    epc_breakdown_pre_retrofit = Column(String)
+    epc_breakdown_post_retrofit = Column(String)
+    number_of_properties = Column(BigInteger)
+    n_units_to_retrofit = Column(BigInteger)
+    co2_per_unit_pre_retrofit = Column(String)
+    co2_per_unit_post_retrofit = Column(String)
+    energy_bill_per_unit_pre_retrofit = Column(String)
+    energy_bill_per_unit_post_retrofit = Column(String)
+    energy_consumption_per_unit_pre_retrofit = Column(String)
+    energy_consumption_per_unit_post_retrofit = Column(String)
+    valuation_improvement_per_unit = Column(String)
+    cost_per_unit = Column(String)
+    cost_per_co2_saved = Column(String)
+    cost_per_sap_point = Column(String)
+    valuation_return_on_investment = Column(String)

From 8596878fc010991c647695b13fa3de1abddb9ff3 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 31 Jul 2024 11:59:29 +0100
Subject: [PATCH 049/182] recommendaions process working

---
 backend/app/db/functions/recommendations_functions.py | 6 ++++++
 backend/app/db/models/recommendations.py              | 3 +++
 2 files changed, 9 insertions(+)

diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py
index 7ff09f22..b03909ee 100644
--- a/backend/app/db/functions/recommendations_functions.py
+++ b/backend/app/db/functions/recommendations_functions.py
@@ -33,6 +33,12 @@ def create_scenario(session: Session, scenario):
     :param scenario: dictionary of data representing a scenario to be created
     """
     try:
+
+        # Before creating a new scenario, we check if there is a scenario for this portfolio id already
+        # If there is, it means that any new scnario created will NOT be the default scenario
+        existing_scenario = session.query(Scenario).filter_by(portfolio_id=scenario["portfolio_id"]).first()
+        scenario["is_default"] = True if not existing_scenario else False
+
         new_scenario = Scenario(**scenario)
         session.add(new_scenario)
         session.flush()
diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py
index ed3f326e..a1743436 100644
--- a/backend/app/db/models/recommendations.py
+++ b/backend/app/db/models/recommendations.py
@@ -85,6 +85,7 @@ class Scenario(Base):
     non_invasive_recommendations_file_path = Column(String)
     exclusions = Column(String)
     multi_plan = Column(Boolean, default=False)
+    is_default = Column(Boolean, default=False, nullable=False)
 
     # Add in the fields we need, which were previously sitting at the portfolio level
     cost = Column(Float)
@@ -107,3 +108,5 @@ class Scenario(Base):
     cost_per_co2_saved = Column(String)
     cost_per_sap_point = Column(String)
     valuation_return_on_investment = Column(String)
+    property_valuation_increase = Column(Float)
+    labour_days = Column(Float)

From 39bc6c53b867c66601042c313649f912adaae8d7 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 1 Aug 2024 12:21:10 +0100
Subject: [PATCH 050/182] sfr investiation in progress

---
 etl/customers/goldman/property_ownership.py | 254 +++++++++++++++++++-
 1 file changed, 248 insertions(+), 6 deletions(-)

diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py
index ebd72732..c1f37d4c 100644
--- a/etl/customers/goldman/property_ownership.py
+++ b/etl/customers/goldman/property_ownership.py
@@ -75,10 +75,15 @@ def find_f_g_properties(paths):
         epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
         epc_data["UPRN"] = epc_data["UPRN"].astype(int).astype(str)
 
-        # Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this
-        epc_data["LODGEMENT_DATETIME"] = pd.to_datetime(epc_data["LODGEMENT_DATETIME"], format='mixed', errors="coerce")
+        if pd.isnull(pd.to_datetime(epc_data["LODGEMENT_DATETIME"], errors="coerce")).sum():
+            raise Exception("wtf")
 
-        epc_data = epc_data.sort_values("LODGEMENT_DATETIME", ascending=False).drop_duplicates("UPRN")
+        # Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this
+        epc_data["LODGEMENT_DATETIME"] = pd.to_datetime(epc_data["LODGEMENT_DATETIME"], errors="coerce")
+
+        epc_data = epc_data.sort_values(
+            ["LODGEMENT_DATE", "LODGEMENT_DATETIME"], ascending=False
+        ).drop_duplicates("UPRN")
 
         # Get G & F properties
         epc_data = epc_data[epc_data["CURRENT_ENERGY_RATING"].isin(["G", "F"])]
@@ -401,6 +406,8 @@ def app():
             ~company_ownership["Property Address"].str.lower().str.startswith(starting_term)
         ]
 
+    # address = properties[properties["UPRN"] == 100030253055].squeeze()
+
     freehold_matching_lookup = []  # 634
     leasehold_matching_lookup = []  # 86
     shared_leasehold_match = []
@@ -493,12 +500,18 @@ def app():
     # freehold_matching_lookup = pd.read_excel("freehold_matching_lookup V2.xlsx")
     # leasehold_matching_lookup = pd.read_excel("leasehold_matching_lookup V2.xlsx")
 
+    # freehold_matching_lookup.shape
+    # (1537, 4)
+    # leasehold_matching_lookup.shape
+    # (390, 4)
+
     # The approximate matches aren't very good
     freehold_matching_lookup = freehold_matching_lookup[freehold_matching_lookup["match_type"] == "exact"]
     leasehold_matching_lookup = leasehold_matching_lookup[leasehold_matching_lookup["match_type"] == "exact"]
 
     # Combine
     combined_matching_lookup = pd.concat([freehold_matching_lookup, leasehold_matching_lookup])
+
     # Remove duplicates
     combined_matching_lookup = remove_duplicate_matches(
         matching_lookup=combined_matching_lookup, properties=properties, company_ownership=company_ownership
@@ -566,7 +579,6 @@ def app():
 
     land_registry_matches = []
     for _, match in tqdm(matched_addresses.iterrows(), total=len(matched_addresses)):
-
         # Filter land registry on the postcode
         lr_filtered = land_registry[
             (land_registry["postcode"] == match["epc_postcode"].lower().strip())
@@ -782,7 +794,7 @@ def app():
         right_on="uprn"
     ).drop(columns=["uprn"])
 
-    # Flat anything that sold in the last year
+    # Flag anything that sold in the last year
     matched_addresses["sold_recently"] = (
         matched_addresses["date_of_transfer"] >= pd.Timestamp.now() - pd.DateOffset(years=1)
     )
@@ -792,6 +804,9 @@ def app():
         (matched_addresses["TRANSACTION_TYPE"].isin(["marketed sale", "non marketed sale"]))
     )
 
+    # Save this
+    # matched_addresses.to_excel("combined_aggregate - pre filter 28th July.xlsx", index=False)
+
     # Drop rows on the booleans
     matched_addresses = matched_addresses[
         ~matched_addresses["sold_recently"] &
@@ -835,7 +850,7 @@ def app():
     # investment_50m_properties.to_excel("investment_50m_properties 28th July.xlsx", index=False)
 
     # Store the EPC data
-    # portfolio_epc_data_50m.to_excel("portfolio_epc_data_50m 29th July.xlsx", index=False)
+    # portfolio_epc_data_50m.to_excel("portfolio_epc_data_50m 28th July.xlsx", index=False)
 
     # We check if any of these properties are in a conservation area
     valuations = pd.read_excel("property value.xlsx")
@@ -997,3 +1012,230 @@ def prepare_anonymised_data():
     )
 
     df.to_excel("Property List - 50% redacted.xlsx", index=False)
+
+
+def adhoc_change_of_portfolio_analysis_july_2024():
+    """
+    This is just some adhoc analysis, which answers some questions which arose upon refreshing the SFR portfolio
+    in late July 2024
+    :return:
+    """
+
+    # Question 1: Which properties in the previous portfolio were in conservation areas or had listed/heritage status?
+    def answer_q1():
+        # Data was just stored here:
+        geospatial_data = pd.read_excel("geospatial_data.xlsx")
+
+        special_buildings = geospatial_data[
+            (geospatial_data["conservation_status"] == 1) |
+            geospatial_data["is_listed_building"] |
+            geospatial_data["is_heritage_building"]
+            ]
+
+        print(
+            f"There were {special_buildings.shape[0]} properties in the previous portfolio which were in conservation "
+            f"areas or had listed/heritage status"
+        )
+        print(f"{(special_buildings['conservation_status'] == 1).sum()} were in a conservation area")
+        print(f"{special_buildings['is_listed_building'].sum()} were listed buildings")
+        print(f"{special_buildings['is_heritage_building'].sum()} were heritage buildings")
+
+    answer_q1()
+
+    # Question 2: For each property in the old portfolio, why was it lost?
+    def answer_q2():
+        # We read in the previous 50m portfolio
+        previous_portfolio = pd.read_excel("investment_50m_properties 28th May.xlsx")  # 39 owners
+
+        new_matched_addresses = pd.read_excel("combined_aggregate - pre filter 28th July.xlsx")
+        new_portfolio = pd.read_excel("investment_50m_properties 28th July.xlsx")  # 69 owners
+
+        # dropped units
+        dropped_units = previous_portfolio[
+            ~previous_portfolio["UPRN"].isin(new_portfolio["UPRN"].values)
+        ]
+        # Lots of properties are missed out - why
+        # 1) What was dropped, but was in the matched addresses and therefore was maybe filtered out
+        dropped_units_matched = dropped_units[
+            dropped_units["UPRN"].isin(new_matched_addresses["UPRN"])
+        ].copy()
+
+        dropped_units_matched = dropped_units_matched.merge(
+            new_matched_addresses[
+                ["UPRN", 'transaction_id', 'price', 'date_of_transfer', 'sold_recently', 'sale_lodged_recently']
+            ],
+            how="left", on="UPRN"
+        )
+
+        # 97 units here - how mant were sold
+        of_which_sold = dropped_units_matched[
+            dropped_units_matched["sold_recently"]
+        ]
+        n_sold = of_which_sold.shape[0]
+        print(f"{n_sold} sold recently ({n_sold / previous_portfolio.shape[0] * 100})%")
+
+        of_which_have_sale_epc_but_not_sold = dropped_units_matched[
+            ~dropped_units_matched["sold_recently"] & dropped_units_matched["sale_lodged_recently"]
+            ]
+        n_with_sale_epc_but_not_yet_sold = of_which_have_sale_epc_but_not_sold.shape[0]
+        print(
+            f"{n_with_sale_epc_but_not_yet_sold} have a sale EPC but have not sold yet ("
+            f"{n_with_sale_epc_but_not_yet_sold / previous_portfolio.shape[0] * 100})%"
+        )
+
+        # What about things that haven't sold or don't look likely to sell
+        not_sold = dropped_units_matched[
+            ~dropped_units_matched["sold_recently"] & ~dropped_units_matched["sale_lodged_recently"]
+            ]
+
+        new_owner_sizes = new_portfolio.groupby(
+            ["Company Registration No. (1)"]
+        ).size().reset_index().rename(columns={0: "Number of Properties"})
+        new_owner_sizes = new_owner_sizes.sort_values("Number of Properties", ascending=False)
+
+        previous_owner_sizes = previous_portfolio.groupby(
+            ["Company Registration No. (1)"]
+        ).size().reset_index().rename(columns={0: "Number of Properties"})
+        previous_owner_sizes = previous_owner_sizes.sort_values("Number of Properties", ascending=False)
+
+        # Let's just confirm that we took in a bigger owner, as we see this unit was still matched
+        owner_too_small = []
+        owner_big_enough = []
+        for _, property in not_sold.iterrows():
+            owner_reg_id = property["Company Registration No. (1)"]
+            old_portfolio_owner_size = previous_owner_sizes[
+                previous_owner_sizes["Company Registration No. (1)"] == owner_reg_id
+                ]
+            # We make sure that the number of properties is smaller than the new smallest number
+            if (
+                old_portfolio_owner_size["Number of Properties"].values[0] >
+                new_owner_sizes["Number of Properties"].min()
+            ):
+                owner_big_enough.append(property.to_dict())
+                continue
+
+            owner_too_small.append(property.to_dict())
+
+        n_owner_too_small = len(owner_too_small)
+        owner_big_enough = pd.DataFrame(owner_big_enough)
+
+        summary = []
+        for _, record in owner_big_enough.iterrows():
+            # Do we have this new owner?
+            new_owner = new_portfolio[
+                new_portfolio["Company Registration No. (1)"] == record["Company Registration No. (1)"]
+                ]
+            if new_owner.empty:
+                # Why don't we have this new owner
+                new_owner_data = new_matched_addresses[
+                    new_matched_addresses["Company Registration No. (1)"] == record["Company Registration No. (1)"]
+                    ]
+
+                new_owner_data_filtered = new_owner_data[
+                    ~new_owner_data["sold_recently"] & ~new_owner_data["sale_lodged_recently"]
+                    ]
+
+                summary.append(
+                    {
+                        "Owner Name": record["Proprietor Name (1)"],
+                        "Owner reg id": record["Company Registration No. (1)"],
+                        "N properties in new portfolio before filtering": new_owner_data.shape[0],
+                        "N properties in new portfolio after filtering": new_owner_data_filtered.shape[0],
+                    }
+
+                )
+                continue
+            raise Exception("something went wrong")
+
+        summary = pd.DataFrame(summary)
+
+        not_accounted_for = summary[
+            (
+                summary["N properties in new portfolio before filtering"] <
+                previous_owner_sizes["Number of Properties"].min()
+            )
+        ]
+
+        # We have two owners not accounted for:
+        # ALLMID LIMITED, 01959058
+        # CORAL RACING LIMITED, 541600
+        # What happened to these owners?
+        new_epc = pd.read_excel("EPC F & G Properties - V2.xlsx")
+        allmid = previous_portfolio[previous_portfolio["Company Registration No. (1)"] == "01959058"].copy()
+        # Check if any of the properties are not in the new EPC data
+        allmid["not_in_new_epc"] = ~allmid["UPRN"].isin(new_epc["UPRN"])
+        allmid["not_in_matched_pre_filtered"] = ~allmid["UPRN"].isin(new_matched_addresses["UPRN"])
+        # In the previous portfolio, Allmid had 4 properties and in the re-build, it has just 2. Why?
+        # Firstly, one of their properties was re-surveyed not at an F/G
+        # Secondly, one of their properties is no longer owned by them: 
+        # https://www.zoopla.co.uk/property/uprn/100070553074/
+        # So as an owner, they fell out of the ranking
+        coral_racing = previous_portfolio[previous_portfolio["Company Registration No. (1)"] == "541600"].copy()
+        coral_racing["not_in_new_epc"] = ~coral_racing["UPRN"].isin(new_epc["UPRN"])
+        coral_racing["not_in_matched_pre_filtered"] = ~coral_racing["UPRN"].isin(new_matched_addresses["UPRN"])
+        # Coral goes down from 4 -> 1 on refresh, so what happened?
+        # 1) 2 properties had new EPCs and re-scored higher
+        # 2) 1 property, 85A Market Street, Church Gresley, Swadlincote, DE11 9PN is no longer matched to the ownership
+        #    data, which is correct
+
+        # Why were these units lost?
+        # There's just 1 owner, who is BARHAM PROPERTY LTD
+        owner_too_big_ids = owner_big_enough["Company Registration No. (1)"].unique()
+        owner_too_big_names = owner_big_enough["Proprietor Name (1)"].unique()
+        previous_owner_size = previous_owner_sizes[
+            previous_owner_sizes["Company Registration No. (1)"].isin(owner_too_big_ids)
+        ]
+        new_owner_size = new_matched_addresses[
+            new_matched_addresses["Company Registration No. (1)"].isin(owner_too_big_ids) |
+            new_matched_addresses["Proprietor Name (1)"].isin(owner_too_big_names)
+            ]
+
+        n_unsold = new_owner_size[~new_owner_size["sold_recently"] & ~new_owner_size["sale_lodged_recently"]].shape
+
+        # Happy with the justification to this point
+        assert (
+            (n_sold + n_with_sale_epc_but_not_yet_sold + n_owner_too_small + len(owner_big_enough)) ==
+            dropped_units_matched.shape[0]
+        )
+
+        # We now have a list of properties that were lost from the previous iteration to the next that were not matched
+        dropped_units_unmatched = dropped_units[
+            ~dropped_units["UPRN"].isin(new_matched_addresses["UPRN"])
+        ].copy()
+
+        # A few possibilities: They aren't in the EPC data?
+        new_epc = pd.read_excel("EPC F & G Properties - V2.xlsx")
+        unmatched_not_in_epc = dropped_units_unmatched[
+            ~dropped_units_unmatched["UPRN"].isin(new_epc["UPRN"])
+        ]
+        # There are 17 units that have had new EPCs above a G
+        # Who were the owners? - various, nothing particularly remarkable
+        (
+            previous_portfolio[
+                previous_portfolio["UPRN"].isin(unmatched_not_in_epc["UPRN"])
+            ]["Proprietor Name (1)"].value_counts()
+        )
+
+        # 22 final units to be accounted for...!
+        unmatched_in_epc = dropped_units_unmatched[
+            dropped_units_unmatched["UPRN"].isin(new_epc["UPRN"])
+        ]
+
+        # Some of them will be due to ownership
+        # TODO: Read in freehold/leashold data and see how many of these were non-exact matches!
+        leasehold_matching_lookup = pd.read_excel("leasehold_matching_lookup V2.xlsx")
+        freehold_matching_lookup = pd.read_excel("freehold_matching_lookup V2.xlsx")
+        combined_matching_lookup = pd.concat([leasehold_matching_lookup, freehold_matching_lookup])
+        # THis is 13 matches, all of them approximate
+        weak_matches = unmatched_in_epc.merge(combined_matching_lookup, how="inner", on="UPRN")
+
+        # These have been lost due to ownership updates. This has been checked manually for every unit and there has
+        # been sale activity for each one, justifying the change in ownership data
+        remaining_matches = unmatched_in_epc[
+            ~unmatched_in_epc["UPRN"].isin(weak_matches["UPRN"])
+        ]
+
+        assert dropped_units.shape[0] == (
+            (n_sold + n_with_sale_epc_but_not_yet_sold + n_owner_too_small + len(owner_big_enough)) + len(
+            weak_matches) + unmatched_not_in_epc.shape[0]
+        )

From 027aa79a0b1a05562b9584a73cff7fae068f4f30 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 1 Aug 2024 12:44:07 +0100
Subject: [PATCH 051/182] adding bypass heating control recommendation

---
 backend/app/plan/router.py                   | 38 ++++++++---------
 etl/bill_savings/data_collection.py          |  5 ++-
 recommendations/Costs.py                     | 30 +++++++++++++
 recommendations/HeatingControlRecommender.py | 44 ++++++++++++++++++++
 4 files changed, 96 insertions(+), 21 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index a0d4e585..b6175153 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -408,25 +408,6 @@ async def trigger_plan(body: PlanTriggerRequest):
         if not input_properties:
             return Response(status_code=204)
 
-        # If we have any work to do, we create a new scenario
-        engine_scenario = create_scenario(
-            session=session,
-            scenario={
-                "name": body.scenario_name,
-                "created_at": created_at,
-                "budget": body.budget,
-                "portfolio_id": body.portfolio_id,
-                "housing_type": body.housing_type,
-                "goal": body.goal,
-                "trigger_file_path": body.trigger_file_path,
-                "already_installed_file_path": body.already_installed_file_path,
-                "patches_file_path": body.patches_file_path,
-                "non_invasive_recommendations_file_path": body.non_invasive_recommendations_file_path,
-                "exclusions": body.exclusions,
-                "multi_plan": body.multi_plan
-            }
-        )
-
         # The materials data could be cached or local so we don't need to make
         # consistent requests to the backend for
         # the same data
@@ -733,6 +714,25 @@ async def trigger_plan(body: PlanTriggerRequest):
         # 3) the recommendations
 
         logger.info("Uploading recommendations to the database")
+        # If we have any work to do, we create a new scenario
+        engine_scenario = create_scenario(
+            session=session,
+            scenario={
+                "name": body.scenario_name,
+                "created_at": created_at,
+                "budget": body.budget,
+                "portfolio_id": body.portfolio_id,
+                "housing_type": body.housing_type,
+                "goal": body.goal,
+                "trigger_file_path": body.trigger_file_path,
+                "already_installed_file_path": body.already_installed_file_path,
+                "patches_file_path": body.patches_file_path,
+                "non_invasive_recommendations_file_path": body.non_invasive_recommendations_file_path,
+                "exclusions": body.exclusions,
+                "multi_plan": body.multi_plan
+            }
+        )
+
         property_valuation_increases = []
         session.commit()
         new_epc_bands = {}
diff --git a/etl/bill_savings/data_collection.py b/etl/bill_savings/data_collection.py
index e6f6de6f..6cc2d581 100644
--- a/etl/bill_savings/data_collection.py
+++ b/etl/bill_savings/data_collection.py
@@ -133,12 +133,13 @@ def app():
     energy_consumption_data = []
     for i, directory in tqdm(enumerate(epc_directories), total=len(epc_directories)):
         # Skip the first 50
-        if i < 57:
-            continue
+        # if i < 57:
+        #     continue
 
         data = pd.read_csv(directory / "certificates.csv", low_memory=False)
         # Rename the columns to the same format as the api returns
         data.columns = [c.replace("_", "-").lower() for c in data.columns]
+
         # Take just date before the date threshold
         data = data[data["lodgement-date"] >= EARLIEST_EPC_DATE]
 
diff --git a/recommendations/Costs.py b/recommendations/Costs.py
index ce459528..738e9b07 100644
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@@ -64,6 +64,8 @@ SMART_APPLIANCE_THERMOSTAT_COST = 400
 PROGRAMMER_COST = 120
 ROOM_THERMOSTAT_COST = 150
 TRVS_COST = 35
+BYPASS_COST = 350  # Based on desktop research for a complex installation
+# https://www.checkatrade.com/blog/cost-guides/cost-install-water-shut-off-valve/
 
 # Cost for TTZC
 # Smart thermostat based on checkatrade https://www.checkatrade.com/blog/cost-guides/cost-smart-thermostat/
@@ -1254,6 +1256,34 @@ class Costs:
             "labour_days": labour_days,
         }
 
+    def programmer_trvs_bypass(self, number_heated_rooms, has_programmer, has_trvs, has_bypass):
+
+        total_cost = 0
+        labour_hours = 0
+
+        if not has_programmer:
+            total_cost += PROGRAMMER_COST
+            labour_hours += 1
+
+        if not has_trvs:
+            total_cost += TRVS_COST * number_heated_rooms
+            labour_hours += 0.25 * number_heated_rooms
+
+        if not has_bypass:
+            total_cost += BYPASS_COST
+            labour_hours += 0.5
+
+        subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
+        vat = total_cost - subtotal_before_vat
+
+        return {
+            "total": total_cost,
+            "subtotal": subtotal_before_vat,
+            "vat": vat,
+            "labour_hours": labour_hours,
+            "labour_days": 1,
+        }
+
     def heater_removal(self, n_rooms):
         """
         Estimates the costs of removal of heaters, including the redecoration costs of the space behind the heater
diff --git a/recommendations/HeatingControlRecommender.py b/recommendations/HeatingControlRecommender.py
index fe3e577d..80615b30 100644
--- a/recommendations/HeatingControlRecommender.py
+++ b/recommendations/HeatingControlRecommender.py
@@ -40,7 +40,10 @@ class HeatingControlRecommender:
             return
 
         if heating_description in ["Air source heat pump, radiators, electric"]:
+            # For an ASHP, we can recommend time and temperature zone controls, as well as programmer, trvs and a bypass
+            # which are common configurations for ASHPs
             self.recommend_time_temperature_zone_controls()
+            self.recommend_programmer_trvs_bypass()
 
     def recommend_room_heaters_electric_controls(self):
         """
@@ -279,3 +282,44 @@ class HeatingControlRecommender:
                 "description_simulation": description_simulation
             }
         )
+
+    def recommend_programmer_trvs_bypass(self):
+
+        # We don't perform any checks here - this is likely to be used in conjunction with an ASHP recommendation
+        new_controls_description = "Programmer, TRVs and bypass"
+        ending_config = MainheatControlAttributes(new_controls_description).process()
+        simulation_config = check_simulation_difference(
+            new_config=ending_config, old_config=self.property.main_heating_controls
+        )
+        simulation_config["mainheatc_energy_eff_ending"] = "Average"
+
+        description_simulation = {
+            "mainheatcont-description": new_controls_description,
+            "mainheatc-energy-eff": simulation_config["mainheatc_energy_eff_ending"]
+        }
+
+        cost_result = self.costs.programmer_trvs_bypass(
+            number_heated_rooms=int(self.property.data["number-heated-rooms"])
+        )
+
+        description = "Install a Bypass valve, TRVs and a Programmer"
+
+        already_installed = "heating_control" in self.property.already_installed
+        if already_installed:
+            cost_result = override_costs(cost_result)
+            description = "Heating controls have already been upgraded, no further action needed."
+
+        self.recommendation.append(
+            {
+                "type": "heating_control",
+                "parts": [],
+                "description": description,
+                **cost_result,
+                "starting_u_value": None,
+                "new_u_value": None,
+                "sap_points": None,
+                "already_installed": already_installed,
+                "simulation_config": simulation_config,
+                "description_simulation": description_simulation
+            }
+        )

From 59ae6647b3121f5ca3886f2055ebcce7d8d06355 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 1 Aug 2024 13:16:57 +0100
Subject: [PATCH 052/182] Allowing ashp recs to produce multiple
 recommendations for multiple heating controls

---
 recommendations/HeatingControlRecommender.py |   9 +-
 recommendations/HeatingRecommender.py        | 171 +++++++++++--------
 2 files changed, 106 insertions(+), 74 deletions(-)

diff --git a/recommendations/HeatingControlRecommender.py b/recommendations/HeatingControlRecommender.py
index 80615b30..ef0df011 100644
--- a/recommendations/HeatingControlRecommender.py
+++ b/recommendations/HeatingControlRecommender.py
@@ -298,8 +298,15 @@ class HeatingControlRecommender:
             "mainheatc-energy-eff": simulation_config["mainheatc_energy_eff_ending"]
         }
 
+        has_programmer = self.property.main_heating_controls["switch_system"] == "programmer"
+        has_trvs = self.property.main_heating_controls["trvs"] is not None
+        has_bypass = self.property.main_heating_controls["auxiliary_systems"] == "bypass"
+
         cost_result = self.costs.programmer_trvs_bypass(
-            number_heated_rooms=int(self.property.data["number-heated-rooms"])
+            number_heated_rooms=int(self.property.data["number-heated-rooms"]),
+            has_trvs=has_trvs,
+            has_programmer=has_programmer,
+            has_bypass=has_bypass
         )
 
         description = "Install a Bypass valve, TRVs and a Programmer"
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 1d409be6..fd2dfe38 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -204,15 +204,41 @@ class HeatingRecommender:
                 ashp_costs[key] += controls_recommender.recommendation[0][key]
 
         already_installed = "air_source_heat_pump" in self.property.already_installed
+
+        controls_recommendations = controls_recommender.recommendation
+        if already_installed or not controls_recommendations:
+            # We set an empty object, so we just produce one recommendation
+            controls_recommendations = [None]
+
         if already_installed:
             ashp_costs = override_costs(ashp_costs)
-            description = "The property already has an air source heat pump, no further action needed."
-        else:
-            if controls_recommender.recommendation:
-                description = ("Install an air source heat pump, and upgrade heating controls to Smart Thermostats, "
-                               "room sensors and smart radiator valves (time & temperature zone control).")
-            else:
+
+        # This is a map from the heating controls description to the description of the air source heat pump set up
+        ashp_descriptions = {
+            "Time and temperature zone control": (
+                "Install an air source heat pump, and upgrade heating controls to Smart Thermostats, "
+                "room sensors and smart radiator valves (time & temperature zone control)."
+            ),
+            "Programmer, TRVs and bypass": (
+                "Install an air source heat pump, with programmer, TRVs and a Bypass valve."
+            ),
+        }
+
+        new_heating_description = "Air source heat pump, radiators, electric"
+        new_hot_water_description = "From main system"
+        ashp_recommendations = []
+        for controls_rec in controls_recommendations:
+
+            if controls_rec:
+                for key in ashp_costs:
+                    ashp_costs[key] += controls_rec[key]
+
+            if controls_rec is None:
                 description = "Install an air source heat pump."
+            elif already_installed:
+                description = "The property already has an air source heat pump, no further action needed."
+            else:
+                description = ashp_descriptions[controls_rec["description_simulation"]["mainheatcont-description"]]
 
             # If the property does not have existing cavity and loft insulation, we include a note that the cost
             # includes the boiler upgrade scheme and that the cavity and loft need to be treated, to ensure access
@@ -226,85 +252,84 @@ class HeatingRecommender:
                 description = description + (f" The cost includes the £"
                                              f"{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant")
 
-        new_heating_description = "Air source heat pump, radiators, electric"
-        new_hot_water_description = "From main system"
-        simulation_config = {
-            "mainheat_energy_eff_ending": "Good",
-            "hot_water_energy_eff_ending": "Good"
-        }
-        description_simulation = {
-            "mainheat-description": new_heating_description,
-            "mainheat-energy-eff": simulation_config["mainheat_energy_eff_ending"],
-            "hot-water-energy-eff": simulation_config["hot_water_energy_eff_ending"],
-            "hotwater-description": new_hot_water_description,
-        }
-        # Installation of a boiler improves the hot water system so we need to reflect this in
-        # the outcome of the recommendation
-        heating_ending_config = MainHeatAttributes(new_heating_description).process()
-        hotwater_ending_config = HotWaterAttributes(new_hot_water_description).process()
-
-        # If the property does not currently have electric main fuel, we'll simulate the change
-        fuel_ending_config = {}
-        if self.property.main_fuel["fuel_type"] != "electricity":
-            new_fuel_description = "electricity (not community)"
-            fuel_ending_config = MainFuelAttributes(new_fuel_description).process()
-            description_simulation = {
-                **description_simulation,
-                "main-fuel": new_fuel_description
+            simulation_config = {
+                "mainheat_energy_eff_ending": "Good",
+                "hot_water_energy_eff_ending": "Good"
             }
+            description_simulation = {
+                "mainheat-description": new_heating_description,
+                "mainheat-energy-eff": simulation_config["mainheat_energy_eff_ending"],
+                "hot-water-energy-eff": simulation_config["hot_water_energy_eff_ending"],
+                "hotwater-description": new_hot_water_description,
+            }
+            # Installation of a boiler improves the hot water system so we need to reflect this in
+            # the outcome of the recommendation
+            heating_ending_config = MainHeatAttributes(new_heating_description).process()
+            hotwater_ending_config = HotWaterAttributes(new_hot_water_description).process()
 
-        # Check the simulation differences
-        heating_simulation_config = check_simulation_difference(
-            new_config=heating_ending_config, old_config=self.property.main_heating
-        )
-        hotwater_simulation_config = check_simulation_difference(
-            new_config=hotwater_ending_config, old_config=self.property.hotwater
-        )
-        fuel_simulation_config = check_simulation_difference(
-            new_config=fuel_ending_config, old_config=self.property.main_fuel
-        )
+            # If the property does not currently have electric main fuel, we'll simulate the change
+            fuel_ending_config = {}
+            if self.property.main_fuel["fuel_type"] != "electricity":
+                new_fuel_description = "electricity (not community)"
+                fuel_ending_config = MainFuelAttributes(new_fuel_description).process()
+                description_simulation = {
+                    **description_simulation,
+                    "main-fuel": new_fuel_description
+                }
 
-        simulation_config = {
-            **simulation_config,
-            **heating_simulation_config,
-            **hotwater_simulation_config,
-            **fuel_simulation_config,
-        }
+            # Check the simulation differences
+            heating_simulation_config = check_simulation_difference(
+                new_config=heating_ending_config, old_config=self.property.main_heating
+            )
+            hotwater_simulation_config = check_simulation_difference(
+                new_config=hotwater_ending_config, old_config=self.property.hotwater
+            )
+            fuel_simulation_config = check_simulation_difference(
+                new_config=fuel_ending_config, old_config=self.property.main_fuel
+            )
 
-        if controls_recommender.recommendation:
-            # We should have just the single recommendation for heat controls, which is time
-            # and temperature zone controls
-            if len(controls_recommender.recommendation) != 1:
-                raise NotImplementedError("More than one heat controls recommendation for air source heat pump")
             simulation_config = {
                 **simulation_config,
-                **controls_recommender.recommendation[0]["simulation_config"]
+                **heating_simulation_config,
+                **hotwater_simulation_config,
+                **fuel_simulation_config,
             }
 
-            description_simulation = {
-                **description_simulation,
-                **controls_recommender.recommendation[0]["description_simulation"]
+            if controls_rec is not None:
+                # We should have just the single recommendation for heat controls, which is time
+                # and temperature zone controls
+                simulation_config = {
+                    **simulation_config,
+                    **controls_rec["simulation_config"]
+                }
+
+                description_simulation = {
+                    **description_simulation,
+                    **controls_rec["description_simulation"]
+                }
+
+            ashp_recommendation = {
+                "phase": phase,
+                "parts": [
+                    # TODO
+                ],
+                "type": "heating",
+                "description": description,
+                "starting_u_value": None,
+                "new_u_value": None,
+                "sap_points": None,
+                "already_installed": already_installed,
+                "simulation_config": simulation_config,
+                "description_simulation": description_simulation,
+                **ashp_costs
             }
 
-        ashp_recommendation = {
-            "phase": phase,
-            "parts": [
-                # TODO
-            ],
-            "type": "heating",
-            "description": description,
-            "starting_u_value": None,
-            "new_u_value": None,
-            "sap_points": None,
-            "already_installed": already_installed,
-            "simulation_config": simulation_config,
-            "description_simulation": description_simulation,
-            **ashp_costs
-        }
+            ashp_recommendations.append(ashp_recommendation)
 
         if _return:
-            return [ashp_recommendation]
-        self.heating_recommendations.append(ashp_recommendation)
+            return [ashp_recommendations]
+
+        self.heating_recommendations.extend(ashp_recommendations)
 
     @staticmethod
     def check_simulation_difference(old_config, new_config):

From 40b89f7cb80aaa0a8cdfacad32bb4ad5ae2eb85a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 1 Aug 2024 13:21:12 +0100
Subject: [PATCH 053/182] debugging

---
 recommendations/HeatingRecommender.py | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index fd2dfe38..ab377369 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -229,9 +229,11 @@ class HeatingRecommender:
         ashp_recommendations = []
         for controls_rec in controls_recommendations:
 
+            ashp_costs_with_controls = ashp_costs.copy()
+
             if controls_rec:
-                for key in ashp_costs:
-                    ashp_costs[key] += controls_rec[key]
+                for key in ashp_costs_with_controls:
+                    ashp_costs_with_controls[key] += controls_rec[key]
 
             if controls_rec is None:
                 description = "Install an air source heat pump."
@@ -244,13 +246,16 @@ class HeatingRecommender:
             # includes the boiler upgrade scheme and that the cavity and loft need to be treated, to ensure access
             # to the funding
             if has_cavity_or_loft_recommendations:
-                description = description + (f" The cost includes the £"
-                                             f"{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant. "
-                                             f"You must ensure that the property has an insulated cavity and "
-                                             f"270mm+ loft insulation to qualify for the grant")
+                description = description + (
+                    f" The cost includes the £"
+                    f"{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant. "
+                    f"You must ensure that the property has an insulated cavity and "
+                    f"270mm+ loft insulation to qualify for the grant"
+                )
             else:
-                description = description + (f" The cost includes the £"
-                                             f"{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant")
+                description = description + (
+                    f" The cost includes the £{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant"
+                )
 
             simulation_config = {
                 "mainheat_energy_eff_ending": "Good",
@@ -321,7 +326,7 @@ class HeatingRecommender:
                 "already_installed": already_installed,
                 "simulation_config": simulation_config,
                 "description_simulation": description_simulation,
-                **ashp_costs
+                **ashp_costs_with_controls
             }
 
             ashp_recommendations.append(ashp_recommendation)

From 1aee76dac1318b5159e79f83821ddb37351622a2 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 1 Aug 2024 13:32:52 +0100
Subject: [PATCH 054/182] pruning rep recs so we don't have heating and heating
 contols as representatives separately

---
 backend/Property.py                |  7 +++++--
 recommendations/Recommendations.py | 10 ++++++++++
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index a1bfe265..618dfd67 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -377,7 +377,9 @@ class Property:
                     x["type"] != "internal_wall_insulation"
                 ]
             else:
-                epc_transformations = [x["description_simulation"] for x in represenative_recs_to_this_phase]
+                epc_transformations = [
+                    x["description_simulation"] for x in represenative_recs_to_this_phase
+                ]
 
             # It is possible that we could have two simulations applied to the same descriptions
             # We extract these out
@@ -407,7 +409,8 @@ class Property:
                             continue
 
                         raise NotImplementedError(
-                            "Already have this key in the phase_epc_transformation - implement me")
+                            "Already have this key in the phase_epc_transformation - implement me"
+                        )
                     phase_epc_transformation[k] = v
 
             simulation_epc = self.epc_record.prepared_epc.copy()
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 0469f501..81c26e15 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -230,6 +230,16 @@ class Recommendations:
             # When check if these recommendations have two different types, such as solid wall insulation
             # If we have multiple types, we group by type and then select the best recommendation for each type
 
+            # If we have a heating and heating control recommendation, we use JUST the heating reommendation
+            has_both_heating_types = all(
+                x in [rec["type"] for rec in recommendations_by_type] for x in ["heating", "heating_control"]
+            )
+            if has_both_heating_types:
+                # Take just heating
+                recommendations_by_type = [
+                    rec for rec in recommendations_by_type if rec["type"] == "heating"
+                ]
+
             recommendations_by_type = sorted(recommendations_by_type, key=lambda x: x["type"])
             representative_recommendations = []
             for _type, recommendations in groupby(recommendations_by_type, key=lambda x: x["type"]):

From e43842d9803cb6d0d42927ce19a2473daeb5bfc1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 1 Aug 2024 22:07:19 +0100
Subject: [PATCH 055/182] handling the odd case of a double property

---
 backend/apis/GoogleSolarApi.py                | 123 +++++++++++++++++-
 backend/app/plan/router.py                    |  13 ++
 etl/bill_savings/data_collection.py           |   2 +
 etl/energy_efficiency/app.py                  |  90 +++++++++++++
 etl/xml_survey_extraction/app.py              |   6 +-
 recommendations/HeatingControlRecommender.py  |   6 +-
 recommendations/HeatingRecommender.py         |   1 +
 recommendations/WallRecommendations.py        |  20 ++-
 .../wall_energy_efficiency_values.py          |  56 ++++++++
 9 files changed, 307 insertions(+), 10 deletions(-)
 create mode 100644 etl/energy_efficiency/app.py
 create mode 100644 recommendations/wall_energy_efficiency_values.py

diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py
index 579e985d..c5167e32 100644
--- a/backend/apis/GoogleSolarApi.py
+++ b/backend/apis/GoogleSolarApi.py
@@ -9,6 +9,7 @@ from backend.app.db.functions.solar_functions import get_solar_data, store_batch
 from utils.logger import setup_logger
 from sklearn.preprocessing import MinMaxScaler
 from recommendations.Costs import Costs
+from math import sin, cos, sqrt, atan2, radians
 
 logger = setup_logger()
 
@@ -70,6 +71,9 @@ class GoogleSolarApi:
         # Indicates if we need to store the data to the db
         self.need_to_store = False
 
+        # Indicates if we think we have both units attached to a semi-detached property
+        self.double_property = False
+
     def get_building_insights(self, longitude, latitude, required_quality="MEDIUM", max_retries=None):
         """
         Make an API request to retrieve building insights based on the given longitude and latitude, with retry
@@ -116,7 +120,7 @@ class GoogleSolarApi:
         required_quality="MEDIUM",
         is_building=False,
         session=None,
-        uprn=None
+        uprn=None,
     ):
         """
         Wrapper function that calls get_building_insights and extracts roof segments, with caching.
@@ -147,6 +151,12 @@ class GoogleSolarApi:
 
         # Extract key data from the insights response
         self.roof_segments = self.insights_data["solarPotential"].get('roofSegmentStats', [])
+        # Automatically exclude north-facing segments
+        self.exclude_north_facing_segments()
+        # If a property is semi-detached, it's possible for us to include segments from an attached unit
+        if property_instance.data["built-form"] == "Semi-Detached":
+            self.exclude_likely_duplicate_surfaces()
+
         self.roof_area = self.insights_data["solarPotential"]["wholeRoofStats"]['areaMeters2']
         self.floor_area = self.insights_data["solarPotential"]["wholeRoofStats"]['groundAreaMeters2']
         self.panel_area = (
@@ -162,9 +172,6 @@ class GoogleSolarApi:
             # It should be straightforward, but I'd rather see an actual instance of this happening
             raise NotImplementedError("Panel wattage is not 400W - implement me")
 
-        # Automatically exclude north-facing segments
-        self.exclude_north_facing_segments()
-
         self.roof_segment_indexes = [segment['segmentIndex'] for segment in self.roof_segments]
 
         # We now start finding the solar panel configurations
@@ -172,6 +179,11 @@ class GoogleSolarApi:
             energy_consumption=energy_consumption, is_building=is_building, property_instance=property_instance
         )
 
+        # Finally, if we have a double property, we half the data we stored area
+        if self.double_property:
+            self.roof_area = self.roof_area / 2
+            self.floor_area = self.floor_area / 2
+
     def save_to_db(self, session, uprns_to_location, scenario_type):
         if self.insights_data is None:
             raise ValueError("No api data to store")
@@ -338,7 +350,13 @@ class GoogleSolarApi:
         # - surplus: this is the amount of additional energy generated, and therefore how much will be exported
         # - surplus_value: the value of the surplus energy - this feeds into generation_value, when relevant
         # - expected_payback_years: the number of years it will take to pay back the initial investment
-        lifetime_energy_consumption = energy_consumption * self.installation_life_span
+
+        # If we have a double property (i.e. the solar api has returned data for two units) we size up the solar panels
+        # for double the consumption, as if for two units.
+        if self.double_property:
+            lifetime_energy_consumption = energy_consumption * 2 * self.installation_life_span
+        else:
+            lifetime_energy_consumption = energy_consumption * self.installation_life_span
         roi_results = []
         for _, panel_config in panel_performance.iterrows():
             lifetime_ac_kwh = panel_config["lifetime_ac_kwh"]
@@ -408,6 +426,31 @@ class GoogleSolarApi:
 
         panel_performance["expected_payback_years"] = np.ceil(panel_performance["expected_payback_years"]).astype(int)
 
+        if self.double_property:
+            # Now that we've optimise to an energy consumption that is double the original, we need to half the
+            # results
+            panel_performance["n_panels_halved"] = panel_performance["n_panels"] / 2
+            n_panels_required = {int(x) for x in np.floor(panel_performance["n_panels"] / 2)}
+            # We filter the data on this number of panels
+            panel_performance = panel_performance[panel_performance["n_panels_halved"].isin(n_panels_required)]
+            # We half the generation values
+            for col in [
+                "yearly_dc_energy",
+                "total_cost",
+                "panneled_roof_area",
+                "array_wattage",
+                "initial_ac_kwh_per_year",
+                "lifetime_ac_kwh",
+                "lifetime_dc_kwh",
+                "generation_value",
+                "generation_deficit",
+                "surplus"
+            ]:
+                panel_performance[col] = panel_performance[col] / 2
+
+            panel_performance["n_panels"] = panel_performance["n_panels_halved"]
+            panel_performance = panel_performance.drop(columns=["n_panels_halved"])
+
         self.panel_performance = panel_performance
 
     def exclude_north_facing_segments(self):
@@ -427,3 +470,73 @@ class GoogleSolarApi:
             filtered_segments.append(segment)
 
         self.roof_segments = filtered_segments
+
+    @staticmethod
+    def haversine(lat1, lon1, lat2, lon2):
+        """
+        Calculate the great-circle distance between two points on the Earth
+        given their latitude and longitude in decimal degrees. Using haversine formula.
+        """
+        R = 6373.0  # approximate radius of earth in km
+
+        lat1 = radians(lat1)
+        lon1 = radians(lon1)
+        lat2 = radians(lat2)
+        lon2 = radians(lon2)
+
+        dlon = lon2 - lon1
+        dlat = lat2 - lat1
+
+        a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
+        c = 2 * atan2(sqrt(a), sqrt(1 - a))
+
+        distance = R * c
+        return distance
+
+    def exclude_likely_duplicate_surfaces(self):
+        """
+        By checking the azimuth of the segments, we can exclude any segments that are likely to be duplicates
+        :return:
+        """
+
+        def is_similar(segment1, segment2, azimuth_tol=20):
+            azimuth_diff = abs(segment1['azimuthDegrees'] - segment2['azimuthDegrees'])
+            return azimuth_diff <= azimuth_tol
+
+        property_center = self.insights_data["center"]
+
+        deduped_segments = []
+        for segment in self.roof_segments:
+            if not deduped_segments:
+                deduped_segments.append(segment)
+                continue
+
+            similar_segments = [s for s in deduped_segments if is_similar(segment, s)]
+            if not similar_segments:
+                deduped_segments.append(segment)
+            else:
+                # Compare distances to the property center and keep the closer segment
+                for similar_segment in similar_segments:
+                    current_dist = self.haversine(
+                        property_center['latitude'], property_center['longitude'],
+                        segment['center']['latitude'], segment['center']['longitude']
+                    )
+                    similar_dist = self.haversine(
+                        property_center['latitude'], property_center['longitude'],
+                        similar_segment['center']['latitude'], similar_segment['center']['longitude']
+                    )
+
+                    if current_dist < similar_dist:
+                        deduped_segments.remove(similar_segment)
+                        deduped_segments.append(segment)
+
+        # If we have a semi-detached property that has duplicated segments, we should expect to half the number of
+        # segments
+        if len(deduped_segments) < len(self.roof_segments):
+            if len(deduped_segments) != len(self.roof_segments) / 2:
+                raise ValueError("We don't have half the number of segments that we started with")
+
+            # Because the segments are duplicated, but the sizes aren't necessarily split perfectly in half, what
+            # we need to do is perform the solar analysis and then half the results. We set an indicator which
+            # implies we should do this
+            self.double_property = True
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index b6175153..a108176b 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -439,6 +439,8 @@ async def trigger_plan(body: PlanTriggerRequest):
 
         logger.info("Performing solar analysis")
         # TODO: Tidy this up
+        # TODO: If a property is semi-detached, we might get roof surfaces for the main building + the neighbour
+        #
         building_ids = [
             {
                 "building_id": p.building_id,
@@ -709,6 +711,17 @@ async def trigger_plan(body: PlanTriggerRequest):
             ]
             recommendations[property_id] = final_recommendations
 
+        # df = []
+        # for rec in recommendations[list(recommendations.keys())[0]]:
+        #     df.append(
+        #         {
+        #             "id": rec["recommendation_id"],
+        #             "description": rec["description"],
+        #             "sap": rec["sap_points"],
+        #         }
+        #     )
+        # df = pd.DataFrame(df)
+
         # 1) the property data
         # 2) the property details (epc)
         # 3) the recommendations
diff --git a/etl/bill_savings/data_collection.py b/etl/bill_savings/data_collection.py
index 6cc2d581..15a52663 100644
--- a/etl/bill_savings/data_collection.py
+++ b/etl/bill_savings/data_collection.py
@@ -131,7 +131,9 @@ def app():
     sample_size = 500
 
     energy_consumption_data = []
+    cavity_walls_data = []
     for i, directory in tqdm(enumerate(epc_directories), total=len(epc_directories)):
+
         # Skip the first 50
         # if i < 57:
         #     continue
diff --git a/etl/energy_efficiency/app.py b/etl/energy_efficiency/app.py
new file mode 100644
index 00000000..23f9d33f
--- /dev/null
+++ b/etl/energy_efficiency/app.py
@@ -0,0 +1,90 @@
+import inspect
+import pandas as pd
+from tqdm import tqdm
+from pathlib import Path
+
+src_file_path = inspect.getfile(lambda: None)
+
+EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates"
+
+
+def app():
+    # For EPCs lodged from 2020 onwards, this collects data on the energy efficiency categories for wall insulation
+    # so that when we simulate, we know what the resulting energy efficiency category will be
+
+    epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
+
+    date_cutoff = "2020-01-01"
+    walls_data = []
+    ashp_data = []
+    for i, directory in tqdm(enumerate(epc_directories), total=len(epc_directories)):
+        data = pd.read_csv(directory / "certificates.csv", low_memory=False)
+        # Rename the columns to the same format as the api returns
+        data.columns = [c.replace("_", "-").lower() for c in data.columns]
+
+        insulated_walls = data[
+            data["walls-description"].isin(
+                [
+                    "Cavity wall, filled cavity",
+                    "Solid brick, with internal insulation",
+                    "Solid brick, with external insulation",
+                ]
+            )
+        ]
+        insulated_walls = insulated_walls[~pd.isnull(insulated_walls["uprn"])]
+        insulated_walls = insulated_walls[
+            pd.to_datetime(insulated_walls["lodgement-date"]) >= date_cutoff
+            ]
+
+        ashp = data[
+            data["mainheat-description"] == "Air source heat pump, radiators, electric"
+            ]
+        ashp = ashp[~pd.isnull(ashp["uprn"])]
+        ashp = ashp[
+            pd.to_datetime(ashp["lodgement-date"]) >= date_cutoff
+            ]
+
+        walls_data.append(insulated_walls)
+        ashp_data.append(ashp)
+
+    walls_df = pd.concat(walls_data)
+    ashp_df = pd.concat(ashp_data)
+
+    ashp_agg = (
+        ashp_df.
+        groupby(
+            ["construction-age-band", "mainheat-description", "mainheatcont-description", "mainheat-energy-eff",
+             "mainheatc-energy-eff"]
+        )
+        .size()
+        .reset_index()
+    )
+    ashp_agg = ashp_agg[
+        ashp_agg["mainheatcont-description"].isin(
+            ["Programmer, TRVs and bypass", "Time and temperature zone control"]
+        )
+    ]
+
+    aggregations = {}
+    for description in [
+        "Cavity wall, filled cavity", "Solid brick, with internal insulation", "Solid brick, with external insulation"
+    ]:
+        aggregation = walls_df[
+            walls_df["walls-description"] == description
+            ].groupby(
+            ["construction-age-band", "walls-energy-eff"]
+        ).size().reset_index().rename(columns={0: "count"})
+
+        # For each grouping of age band, we use the most populus energy efficiency category
+        aggregation_deduped = aggregation.sort_values("count", ascending=False).drop_duplicates("construction-age-band")
+        aggregations[description] = aggregation_deduped
+
+    # Since these tables are small, we just convert them to python dictionaries
+    # This data is just held in the wall_energy_efficiency_values script, rather than s3
+    df1 = aggregations["Cavity wall, filled cavity"]
+    df2 = aggregations["Solid brick, with internal insulation"]
+    df3 = aggregations["Solid brick, with external insulation"]
+
+    df1.to_dict("records")
+    df2.to_dict("records")
+    df3.to_dict("records")
diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py
index a8bffc73..92451d76 100644
--- a/etl/xml_survey_extraction/app.py
+++ b/etl/xml_survey_extraction/app.py
@@ -27,7 +27,7 @@ SCENARIOS = {
                 "already_installed_file_path": "",
                 "patches_file_path": "",
                 "non_invasive_recommendations_file_path": "",
-                "exclusions": ["floor_insulation", "fireplace", "solar_pv", "heating"],
+                "exclusions": ["floor_insulation", "fireplace", "solar_pv", "heating", 'lighting'],
                 "budget": None,
                 "scenario_name": "Low Hanging Fruit",
                 "multi_plan": True,
@@ -42,7 +42,7 @@ SCENARIOS = {
                 "already_installed_file_path": "",
                 "patches_file_path": "",
                 "non_invasive_recommendations_file_path": "",
-                "exclusions": ["floor_insulation", "fireplace"],
+                "exclusions": ["floor_insulation", "fireplace", 'lighting'],
                 "budget": None,
                 "scenario_name": "Deep Retrofit",
                 "multi_plan": True,
@@ -57,7 +57,7 @@ SCENARIOS = {
                 "already_installed_file_path": "",
                 "patches_file_path": "",
                 "non_invasive_recommendations_file_path": "",
-                "exclusions": ["fireplace"],
+                "exclusions": ["fireplace", 'lighting'],
                 "budget": None,
                 "scenario_name": "Whole House Retrofit",
                 "multi_plan": True,
diff --git a/recommendations/HeatingControlRecommender.py b/recommendations/HeatingControlRecommender.py
index ef0df011..6e827084 100644
--- a/recommendations/HeatingControlRecommender.py
+++ b/recommendations/HeatingControlRecommender.py
@@ -291,7 +291,11 @@ class HeatingControlRecommender:
         simulation_config = check_simulation_difference(
             new_config=ending_config, old_config=self.property.main_heating_controls
         )
-        simulation_config["mainheatc_energy_eff_ending"] = "Average"
+        # Only adjust if the current system is below good
+        if self.property.data["mainheatc-energy-eff"] in ["Poor", "Very Poor"]:
+            simulation_config["mainheatc_energy_eff_ending"] = "Average"
+        else:
+            simulation_config["mainheatc_energy_eff_ending"] = self.property.data["mainheatc-energy-eff"]
 
         description_simulation = {
             "mainheatcont-description": new_controls_description,
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index ab377369..4d91f21b 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -257,6 +257,7 @@ class HeatingRecommender:
                     f" The cost includes the £{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant"
                 )
 
+            print("TEMP UPDATED FOR 77 Perryn!!!!!")
             simulation_config = {
                 "mainheat_energy_eff_ending": "Good",
                 "hot_water_energy_eff_ending": "Good"
diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py
index 448b34e8..4ef747f7 100644
--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@@ -13,6 +13,7 @@ from recommendations.recommendation_utils import (
 )
 from recommendations.config import PARTIALLY_FILLED_PERCENTAGE_ASSUMPTION
 from recommendations.Costs import Costs
+from recommendations.wall_energy_efficiency_values import cavity_wall_energy_eff, iwi_energy_eff, ewi_energy_eff
 from utils.logger import setup_logger
 
 logger = setup_logger()
@@ -404,11 +405,28 @@ class WallRecommendations(Definitions):
 
         simulation_config = {}
         if self.property.data["walls-energy-eff"] not in ["Good", "Very Good"]:
+            if wall_ending_config["is_cavity_wall"]:
+                efficiency_data = [
+                    x for x in cavity_wall_energy_eff if
+                    x["construction-age-band"] == self.property.construction_age_band
+                ][0]
+            elif wall_ending_config["internal_insulation"]:
+                efficiency_data = [
+                    x for x in iwi_energy_eff if
+                    x["construction-age-band"] == self.property.construction_age_band
+                ][0]
+            else:
+                efficiency_data = [
+                    x for x in ewi_energy_eff if
+                    x["construction-age-band"] == self.property.construction_age_band
+                ][0]
+
             simulation_config = {
-                "walls_energy_eff_ending": "Good"
+                "walls_energy_eff_ending": efficiency_data["walls-energy-eff"]
             }
 
         # We check if we have double insulation in any instances
+        # TODO: We should pull the energy efficiency categories on double insulation instances, though it's quite rate
         double_insulation = (
             (wall_ending_config["is_filled_cavity"] and wall_ending_config["external_insulation"]) or
             (wall_ending_config["is_filled_cavity"] and wall_ending_config["internal_insulation"]) or
diff --git a/recommendations/wall_energy_efficiency_values.py b/recommendations/wall_energy_efficiency_values.py
new file mode 100644
index 00000000..bfd43eb2
--- /dev/null
+++ b/recommendations/wall_energy_efficiency_values.py
@@ -0,0 +1,56 @@
+cavity_wall_energy_eff = [
+    {'construction-age-band': 'England and Wales: 1950-1966', 'walls-energy-eff': 'Average', 'count': 605820},
+    {'construction-age-band': 'England and Wales: 1967-1975', 'walls-energy-eff': 'Average', 'count': 410998},
+    {'construction-age-band': 'England and Wales: 1930-1949', 'walls-energy-eff': 'Average', 'count': 263575},
+    {'construction-age-band': 'England and Wales: 1976-1982', 'walls-energy-eff': 'Good', 'count': 206654},
+    {'construction-age-band': 'England and Wales: 1983-1990', 'walls-energy-eff': 'Good', 'count': 106489},
+    {'construction-age-band': 'England and Wales: 1900-1929', 'walls-energy-eff': 'Average', 'count': 58399},
+    {'construction-age-band': 'England and Wales: 1991-1995', 'walls-energy-eff': 'Good', 'count': 58252},
+    {'construction-age-band': 'England and Wales: 1996-2002', 'walls-energy-eff': 'Good', 'count': 35141},
+    {'construction-age-band': 'England and Wales: 2003-2006', 'walls-energy-eff': 'Good', 'count': 7194},
+    {'construction-age-band': 'England and Wales: 2007-2011', 'walls-energy-eff': 'Good', 'count': 2639},
+    {'construction-age-band': 'England and Wales: before 1900', 'walls-energy-eff': 'Average', 'count': 2495},
+    {'construction-age-band': 'England and Wales: 2012 onwards', 'walls-energy-eff': 'Very Good', 'count': 1158},
+    {'construction-age-band': 'England and Wales: 2007 onwards', 'walls-energy-eff': 'Good', 'count': 357},
+    {'construction-age-band': 'INVALID!', 'walls-energy-eff': 'Very Good', 'count': 88}
+]
+
+iwi_energy_eff = [
+    {'construction-age-band': 'England and Wales: 1900-1929', 'walls-energy-eff': 'Good', 'count': 22415},
+    {'construction-age-band': 'England and Wales: before 1900', 'walls-energy-eff': 'Good',
+     'count': 13422},
+    {'construction-age-band': 'England and Wales: 1930-1949', 'walls-energy-eff': 'Good', 'count': 6640},
+    {'construction-age-band': 'England and Wales: 1950-1966', 'walls-energy-eff': 'Good', 'count': 1391},
+    {'construction-age-band': 'England and Wales: 1967-1975', 'walls-energy-eff': 'Good', 'count': 663},
+    {'construction-age-band': 'England and Wales: 2003-2006', 'walls-energy-eff': 'Very Good',
+     'count': 516},
+    {'construction-age-band': 'England and Wales: 2007-2011', 'walls-energy-eff': 'Very Good',
+     'count': 463},
+    {'construction-age-band': 'England and Wales: 2012 onwards', 'walls-energy-eff': 'Very Good',
+     'count': 353},
+    {'construction-age-band': 'England and Wales: 1996-2002', 'walls-energy-eff': 'Good', 'count': 218},
+    {'construction-age-band': 'England and Wales: 1983-1990', 'walls-energy-eff': 'Very Good',
+     'count': 166},
+    {'construction-age-band': 'England and Wales: 1976-1982', 'walls-energy-eff': 'Very Good',
+     'count': 121},
+    {'construction-age-band': 'England and Wales: 1991-1995', 'walls-energy-eff': 'Good', 'count': 104},
+    {'construction-age-band': 'England and Wales: 2007 onwards', 'walls-energy-eff': 'Very Good',
+     'count': 74}, {'construction-age-band': 'INVALID!', 'walls-energy-eff': 'Very Good', 'count': 26}
+]
+
+ewi_energy_eff = [
+    {'construction-age-band': 'England and Wales: 1900-1929', 'walls-energy-eff': 'Good', 'count': 18427},
+    {'construction-age-band': 'England and Wales: 1930-1949', 'walls-energy-eff': 'Good', 'count': 17803},
+    {'construction-age-band': 'England and Wales: 1950-1966', 'walls-energy-eff': 'Good', 'count': 4306},
+    {'construction-age-band': 'England and Wales: before 1900', 'walls-energy-eff': 'Good', 'count': 2955},
+    {'construction-age-band': 'England and Wales: 1967-1975', 'walls-energy-eff': 'Good', 'count': 647},
+    {'construction-age-band': 'England and Wales: 1976-1982', 'walls-energy-eff': 'Very Good', 'count': 188},
+    {'construction-age-band': 'England and Wales: 2007-2011', 'walls-energy-eff': 'Very Good', 'count': 73},
+    {'construction-age-band': 'England and Wales: 2003-2006', 'walls-energy-eff': 'Very Good', 'count': 49},
+    {'construction-age-band': 'England and Wales: 2012 onwards', 'walls-energy-eff': 'Very Good', 'count': 37},
+    {'construction-age-band': 'England and Wales: 1983-1990', 'walls-energy-eff': 'Good', 'count': 31},
+    {'construction-age-band': 'England and Wales: 1996-2002', 'walls-energy-eff': 'Very Good', 'count': 21},
+    {'construction-age-band': 'England and Wales: 1991-1995', 'walls-energy-eff': 'Good', 'count': 14},
+    {'construction-age-band': 'England and Wales: 2007 onwards', 'walls-energy-eff': 'Very Good', 'count': 8},
+    {'construction-age-band': 'INVALID!', 'walls-energy-eff': 'Very Good', 'count': 4}
+]

From 8aa81a2bf7908ec64c05a04c9727de7fa0f8685c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 1 Aug 2024 23:08:37 +0100
Subject: [PATCH 056/182] tweaking solar api

---
 backend/apis/GoogleSolarApi.py | 11 +++++++++--
 backend/app/plan/router.py     |  5 ++++-
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py
index c5167e32..1354bbff 100644
--- a/backend/apis/GoogleSolarApi.py
+++ b/backend/apis/GoogleSolarApi.py
@@ -154,7 +154,9 @@ class GoogleSolarApi:
         # Automatically exclude north-facing segments
         self.exclude_north_facing_segments()
         # If a property is semi-detached, it's possible for us to include segments from an attached unit
-        if property_instance.data["built-form"] == "Semi-Detached":
+        if (property_instance.data["built-form"] == "Semi-Detached") and (
+            property_instance.data["extension-count"] == 0
+        ):
             self.exclude_likely_duplicate_surfaces()
 
         self.roof_area = self.insights_data["solarPotential"]["wholeRoofStats"]['areaMeters2']
@@ -506,6 +508,7 @@ class GoogleSolarApi:
         property_center = self.insights_data["center"]
 
         deduped_segments = []
+        dropped_segments = []
         for segment in self.roof_segments:
             if not deduped_segments:
                 deduped_segments.append(segment)
@@ -529,12 +532,16 @@ class GoogleSolarApi:
                     if current_dist < similar_dist:
                         deduped_segments.remove(similar_segment)
                         deduped_segments.append(segment)
+                        dropped_segments.append(similar_segment)
+                    else:
+                        dropped_segments.append(segment)
 
         # If we have a semi-detached property that has duplicated segments, we should expect to half the number of
         # segments
         if len(deduped_segments) < len(self.roof_segments):
             if len(deduped_segments) != len(self.roof_segments) / 2:
-                raise ValueError("We don't have half the number of segments that we started with")
+                # We don't perform any dropping in this case
+                return
 
             # Because the segments are duplicated, but the sizes aren't necessarily split perfectly in half, what
             # we need to do is perform the solar analysis and then half the results. We set an indicator which
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index a108176b..7e14b61f 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -440,7 +440,10 @@ async def trigger_plan(body: PlanTriggerRequest):
         logger.info("Performing solar analysis")
         # TODO: Tidy this up
         # TODO: If a property is semi-detached, we might get roof surfaces for the main building + the neighbour
-        #
+        # TODO: If we can't get high image quality, should we use the solar API? Maybe just for semi-detached units with
+        #       extensions, since it doesn't seem to do a great job
+        # TODO: For simple properties, we should do a comparison/check between the solar API's roof area and the
+        #       basic estimate of roof area
         building_ids = [
             {
                 "building_id": p.building_id,

From 92fcd080a8287a7a6ae0f04bc83ff03ac0c25b5c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 5 Aug 2024 15:35:16 +0100
Subject: [PATCH 057/182] integrating the non-intrusive recommendations for
 ashp and solarpv into backend engine

---
 backend/Property.py                        |  48 +++-
 backend/app/plan/router.py                 | 213 ++++++++++--------
 backend/app/plan/schemas.py                |   2 +
 etl/bill_savings/EnergyConsumptionModel.py |  52 +++++
 etl/customers/newhaven/__init__.py         |   0
 etl/customers/newhaven/newhaven_study.py   | 249 +++++++++++++++++++++
 etl/testing_data/bills_model_testing.py    |  60 +++++
 recommendations/Costs.py                   |  10 +-
 recommendations/HeatingRecommender.py      |  67 ++++--
 recommendations/Recommendations.py         |   2 +-
 recommendations/SolarPvRecommendations.py  |  32 ++-
 recommendations/WallRecommendations.py     |  24 +-
 12 files changed, 613 insertions(+), 146 deletions(-)
 create mode 100644 etl/customers/newhaven/__init__.py
 create mode 100644 etl/customers/newhaven/newhaven_study.py
 create mode 100644 etl/testing_data/bills_model_testing.py

diff --git a/backend/Property.py b/backend/Property.py
index 618dfd67..309fb149 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -2,6 +2,7 @@ import os
 import ast
 from itertools import groupby
 import pandas as pd
+import numpy as np
 from datetime import datetime, timedelta
 
 from etl.epc.Dataset import TrainingDataset
@@ -211,9 +212,24 @@ class Property:
         if n_bedrooms not in [None, ""]:
             n_bedrooms = int(round(float(n_bedrooms) + 1e-5))
 
+        number_of_floors = kwargs.get("number_of_floors", None)
+        if number_of_floors not in [None, ""]:
+            number_of_floors = int(round(float(number_of_floors) + 1e-5))
+
+        insulation_floor_area = kwargs.get("insulation_floor_area", None)
+        if insulation_floor_area not in [None, ""]:
+            insulation_floor_area = float(insulation_floor_area)
+
+        insulation_wall_area = kwargs.get("insulation_wall_area", None)
+        if insulation_wall_area not in [None, ""]:
+            insulation_wall_area = float(insulation_wall_area)
+
         return {
             "n_bathrooms": n_bathrooms,
             "n_bedrooms": n_bedrooms,
+            "number_of_floors": number_of_floors,
+            "insulation_floor_area": insulation_floor_area,
+            "insulation_wall_area": insulation_wall_area,
             "building_id": kwargs.get("building_id", None),
         }
 
@@ -222,6 +238,9 @@ class Property:
         self.n_bathrooms = kwargs.get("n_bathrooms", None)
         self.n_bedrooms = kwargs.get("n_bedrooms", None)
         self.building_id = kwargs.get("building_id", None)
+        self.number_of_floors = kwargs.get("number_of_floors", None)
+        self.insulation_floor_area = kwargs.get("insulation_floor_area", None)
+        self.insulation_wall_area = kwargs.get("insulation_wall_area", None)
 
     def create_base_difference_epc_record(self, cleaned_lookup: dict):
         """
@@ -1060,18 +1079,22 @@ class Property:
 
         # We can update the number of floors if we have this information in the condition data
         self.number_of_floors = int(self.energy_assessment_condition_data["number_of_floors"]) \
-            if condition_data.get("number_of_floors") is not None \
+            if (condition_data.get("number_of_floors") is not None) and (self.number_of_floors is not None) \
             else self.number_of_floors
 
-        self.perimeter = float(self.energy_assessment_condition_data["perimeter"]) \
-            if condition_data.get("perimeter") is not None \
-            else estimate_perimeter(
-            floor_area=self.floor_area / self.number_of_floors,
-            num_rooms=self.number_of_rooms / self.number_of_floors
-        )
+        # If we already have this, we re-engineer the perimeter
+        if self.insulation_floor_area is not None:
+            self.perimeter = np.sqrt(self.insulation_floor_area) * 4
+        else:
+            self.perimeter = float(self.energy_assessment_condition_data["perimeter"]) \
+                if condition_data.get("perimeter") is not None \
+                else estimate_perimeter(
+                floor_area=self.floor_area / self.number_of_floors,
+                num_rooms=self.number_of_rooms / self.number_of_floors
+            )
 
         self.insulation_wall_area = float(self.energy_assessment_condition_data["insulation_wall_area"]) \
-            if condition_data.get("insulation_wall_area") is not None \
+            if (condition_data.get("insulation_wall_area") is not None) and (self.insulation_wall_area is not None) \
             else estimate_external_wall_area(
             num_floors=self.number_of_floors,
             floor_height=self.floor_height,
@@ -1079,9 +1102,12 @@ class Property:
             built_form=self.data["built-form"],
         )
 
-        self.insulation_floor_area = float(self.energy_assessment_condition_data["main_dwelling_ground_floor_area"]) \
-            if condition_data.get("main_dwelling_ground_floor_area") is not None \
-            else self.floor_area / self.number_of_floors
+        if self.insulation_floor_area is not None:
+            self.insulation_floor_area = float(
+                self.energy_assessment_condition_data["main_dwelling_ground_floor_area"]
+            ) if (condition_data.get("main_dwelling_ground_floor_area") is not None) else (
+                self.floor_area / self.number_of_floors
+            )
 
     def set_floor_level(self):
         self.floor_level = (
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 7e14b61f..db0ff552 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -388,7 +388,7 @@ async def trigger_plan(body: PlanTriggerRequest):
 
             property_non_invasive_recommendations = next((
                 x for x in non_invasive_recommendations if
-                (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
+                (x["uprn"] == config["uprn"])
             ), {})
 
             input_properties.append(
@@ -432,6 +432,25 @@ async def trigger_plan(body: PlanTriggerRequest):
             environment=get_settings().ENVIRONMENT
         )
 
+        epcs_for_scoring = pd.DataFrame([energy_consumption_client.prepare_new_data(p) for p in input_properties])
+        # What do we need?
+        # We need an estimate of each properties energy consumption now, as well as the cost of heating and hot water
+        # The newest EPC may have been done quite some time ago, and so we should take this into consideration when
+        # producing the estimate for cost. With that said, we already have a methodology which will re-map the cost
+        # when the EPC was produced to a cost for today, however could we use the ML models.
+        # In theory, we could just score the kwh models via the API, pass the results into the get_components function
+        # and insert the kwh figures into the property and we're done
+        # TODO: Need to check if we need to re-map when scoring new data or not
+
+        # We need to prepare the EPC so it's in the same format as the training data
+        # TODO: DELETE ME
+        # from utils.s3 import read_dataframe_from_s3_parquet
+        # train = read_dataframe_from_s3_parquet(
+        #     bucket_name="retrofit-data-dev",
+        #     file_key="energy_consumption/2024-07-08/energy_consumption_dataset.parquet"
+        # )
+        # We need to prepare the EPC so it's in the same format as the training data
+
         logger.info("Getting spatial data")
         for p in input_properties:
             p.get_components(cleaned=cleaned, energy_consumption_client=energy_consumption_client)
@@ -444,6 +463,7 @@ async def trigger_plan(body: PlanTriggerRequest):
         #       extensions, since it doesn't seem to do a great job
         # TODO: For simple properties, we should do a comparison/check between the solar API's roof area and the
         #       basic estimate of roof area
+        # TODO: TEMP SWITCHED OFF
         building_ids = [
             {
                 "building_id": p.building_id,
@@ -481,109 +501,112 @@ async def trigger_plan(body: PlanTriggerRequest):
                 "uprn": p.uprn
             } for p in input_properties if p.building_id is None
         ]
-        if building_ids:
-            # Find the unique longitude and latitude pairs for each building id
-            unique_coordinates = {}
-            building_uprns = {}
-            for entry in building_ids:
-                building_id = entry['building_id']
-                coordinate_pair = {'longitude': entry['longitude'], 'latitude': entry['latitude']}
+        if False:
+            if building_ids:
+                # Find the unique longitude and latitude pairs for each building id
+                unique_coordinates = {}
+                building_uprns = {}
+                for entry in building_ids:
+                    building_id = entry['building_id']
+                    coordinate_pair = {'longitude': entry['longitude'], 'latitude': entry['latitude']}
 
-                if building_id not in unique_coordinates:
-                    unique_coordinates[building_id] = []
+                    if building_id not in unique_coordinates:
+                        unique_coordinates[building_id] = []
 
-                if coordinate_pair not in unique_coordinates[building_id]:
-                    unique_coordinates[building_id].append(coordinate_pair)
+                    if coordinate_pair not in unique_coordinates[building_id]:
+                        unique_coordinates[building_id].append(coordinate_pair)
 
-                if building_id not in building_uprns:
-                    building_uprns[building_id] = []
+                    if building_id not in building_uprns:
+                        building_uprns[building_id] = []
 
-                if entry['uprn'] not in building_uprns[building_id]:
-                    building_uprns[building_id].append(
-                        {
-                            "uprn": entry['uprn'], "longitude": entry['longitude'], "latitude": entry['latitude']
-                        }
-                    )
-
-            solar_panel_configuration = {}
-            for building_id, coordinates in unique_coordinates.items():
-                if len(coordinates) > 1:
-                    raise NotImplementedError("more than one coordinate for a building - handle me")
-
-                coordinates = coordinates[0]
-                energy_consumption = sum(
-                    [entry['energy_consumption'] for entry in building_ids if entry['building_id'] == building_id]
-                )
-                solar_api_client.get(
-                    longitude=coordinates["longitude"],
-                    latitude=coordinates["latitude"],
-                    energy_consumption=energy_consumption,
-                    is_building=True,
-                    session=session
-                )
-                solar_panel_configuration[building_id] = {
-                    "insights_data": solar_api_client.insights_data,
-                    "panel_performance": solar_api_client.panel_performance,
-                    "n_units": len([entry for entry in building_ids if entry['building_id'] == building_id])
-                }
-
-                # Store the data in the database
-                # TODO: Rather than just doing a straight insert, we should overwrite what's already there if it exists
-                solar_api_client.save_to_db(
-                    session=session, uprns_to_location=building_uprns[building_id], scenario_type="building"
-                )
-
-                # Insert this into the properties that have this building id
-                for p in input_properties:
-                    if p.building_id == building_id:
-                        unit_solar_panel_configuration = solar_panel_configuration[building_id].copy()
-
-                        unit_solar_panel_configuration["unit_share_of_energy"] = (
-                            [x for x in building_ids if x["property_id"] == p.id][0]["energy_consumption"] /
-                            energy_consumption
+                    if entry['uprn'] not in building_uprns[building_id]:
+                        building_uprns[building_id].append(
+                            {
+                                "uprn": entry['uprn'], "longitude": entry['longitude'], "latitude": entry['latitude']
+                            }
                         )
-                        p.set_solar_panel_configuration(unit_solar_panel_configuration)
 
-        if individual_units:
-            # Model the solar potential at the property level
-            for unit in individual_units:
-                property_instance = [p for p in input_properties if p.id == unit["property_id"]][0]
-                # At this level, we check if the property is suitable for solar and if now, skip
-                if not property_instance.is_solar_pv_valid():
-                    continue
+                solar_panel_configuration = {}
+                for building_id, coordinates in unique_coordinates.items():
+                    if len(coordinates) > 1:
+                        raise NotImplementedError("more than one coordinate for a building - handle me")
 
-                solar_api_client.get(
-                    longitude=unit["longitude"],
-                    latitude=unit["latitude"],
-                    energy_consumption=unit["energy_consumption"],
-                    is_building=False,
-                    session=session,
-                    uprn=unit["uprn"],
-                    property_instance=property_instance
-                )
-
-                # Store the data in the database
-                # TODO: Rather than just doing a straight insert, we should overwrite what's already there if it exists
-                solar_api_client.save_to_db(
-                    session=session,
-                    uprns_to_location=[
-                        {
-                            "uprn": property_instance.uprn,
-                            "longitude": property_instance.spatial["longitude"],
-                            "latitude": property_instance.spatial["latitude"]
-                        }
-                    ],
-                    scenario_type="unit"
-                )
-
-                property_instance.set_solar_panel_configuration(
-                    solar_panel_configuration={
+                    coordinates = coordinates[0]
+                    energy_consumption = sum(
+                        [entry['energy_consumption'] for entry in building_ids if entry['building_id'] == building_id]
+                    )
+                    solar_api_client.get(
+                        longitude=coordinates["longitude"],
+                        latitude=coordinates["latitude"],
+                        energy_consumption=energy_consumption,
+                        is_building=True,
+                        session=session
+                    )
+                    solar_panel_configuration[building_id] = {
                         "insights_data": solar_api_client.insights_data,
                         "panel_performance": solar_api_client.panel_performance,
-                        "unit_share_of_energy": 1
-                    },
-                    roof_area=solar_api_client.roof_area
-                )
+                        "n_units": len([entry for entry in building_ids if entry['building_id'] == building_id])
+                    }
+
+                    # Store the data in the database
+                    # TODO: Rather than just doing a straight insert, we should overwrite what's already there if it
+                    #  exists
+                    solar_api_client.save_to_db(
+                        session=session, uprns_to_location=building_uprns[building_id], scenario_type="building"
+                    )
+
+                    # Insert this into the properties that have this building id
+                    for p in input_properties:
+                        if p.building_id == building_id:
+                            unit_solar_panel_configuration = solar_panel_configuration[building_id].copy()
+
+                            unit_solar_panel_configuration["unit_share_of_energy"] = (
+                                [x for x in building_ids if x["property_id"] == p.id][0]["energy_consumption"] /
+                                energy_consumption
+                            )
+                            p.set_solar_panel_configuration(unit_solar_panel_configuration)
+
+            if individual_units:
+                # Model the solar potential at the property level
+                for unit in individual_units:
+                    property_instance = [p for p in input_properties if p.id == unit["property_id"]][0]
+                    # At this level, we check if the property is suitable for solar and if now, skip
+                    if not property_instance.is_solar_pv_valid():
+                        continue
+
+                    solar_api_client.get(
+                        longitude=unit["longitude"],
+                        latitude=unit["latitude"],
+                        energy_consumption=unit["energy_consumption"],
+                        is_building=False,
+                        session=session,
+                        uprn=unit["uprn"],
+                        property_instance=property_instance
+                    )
+
+                    # Store the data in the database
+                    # TODO: Rather than just doing a straight insert, we should overwrite what's already there if it
+                    #  exists
+                    solar_api_client.save_to_db(
+                        session=session,
+                        uprns_to_location=[
+                            {
+                                "uprn": property_instance.uprn,
+                                "longitude": property_instance.spatial["longitude"],
+                                "latitude": property_instance.spatial["latitude"]
+                            }
+                        ],
+                        scenario_type="unit"
+                    )
+
+                    property_instance.set_solar_panel_configuration(
+                        solar_panel_configuration={
+                            "insights_data": solar_api_client.insights_data,
+                            "panel_performance": solar_api_client.panel_performance,
+                            "unit_share_of_energy": 1
+                        },
+                        roof_area=solar_api_client.roof_area
+                    )
 
         logger.info("Getting components and epc recommendations")
         recommendations = {}
diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py
index 108eb1ae..082f46d3 100644
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@@ -33,6 +33,8 @@ class PlanTriggerRequest(BaseModel):
         "solar_pv",
         # Specific measures
         "air_source_heat_pump",
+        "internal_wall_insulation",
+        "external_wall_insulation"
     }
 
     _allowed_goals = {"Increasing EPC"}
diff --git a/etl/bill_savings/EnergyConsumptionModel.py b/etl/bill_savings/EnergyConsumptionModel.py
index ff225073..5922177e 100644
--- a/etl/bill_savings/EnergyConsumptionModel.py
+++ b/etl/bill_savings/EnergyConsumptionModel.py
@@ -6,6 +6,7 @@ from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percenta
 from sklearn.feature_selection import RFECV
 from utils.s3 import save_pickle_to_s3, read_pickle_from_s3, read_dataframe_from_s3_parquet, read_csv_from_s3
 from utils.logger import setup_logger
+from backend.Property import Property
 
 logger = setup_logger()
 
@@ -506,6 +507,57 @@ class EnergyConsumptionModel:
 
         return prediction
 
+    @staticmethod
+    def prepare_new_data(p: Property):
+        """
+        Given an instance of the property class, this method will ensure that the EPC is ready for scoring with the
+        kwh models. In the backend, we perform some cleaning and transformation on an EPC so we just ensure that the
+        data is in the format required by the model
+        :return:
+        """
+
+        epc = p.data.copy()
+        numeric_cols = [
+            'current-energy-efficiency',
+            'potential-energy-efficiency', 'environment-impact-current',
+            'environment-impact-potential', 'energy-consumption-current',
+            'energy-consumption-potential', 'co2-emissions-current',
+            'co2-emiss-curr-per-floor-area', 'co2-emissions-potential',
+            'lighting-cost-current', 'lighting-cost-potential',
+            'heating-cost-current', 'heating-cost-potential',
+            'hot-water-cost-current', 'hot-water-cost-potential',
+            'total-floor-area', 'multi-glaze-proportion',
+            'extension-count', 'number-habitable-rooms', 'number-heated-rooms',
+            'low-energy-lighting', 'number-open-fireplaces',
+            'wind-turbine-count', 'unheated-corridor-length',
+            'floor-height', 'photo-supply', 'fixed-lighting-outlets-count',
+            'low-energy-fixed-light-count',
+        ]
+        for v in numeric_cols:
+            if epc[v] is not None:
+                epc[v] = float(epc[v])
+
+        bools_to_remap = ['mains-gas-flag', 'flat-top-storey']
+        bool_map = {
+            True: "Y",
+            False: "N",
+            None: "N",
+            "Y": "Y",
+            "N": "N"
+        }
+        for v in bools_to_remap:
+            epc[v] = bool_map[epc[v]]
+
+        no_data = {
+            "floor-level": "NODATA!",
+            "floor-energy-eff": "NO DATA!"
+        }
+        for v, fill_val in no_data.items():
+            if pd.isnull(epc[v]):
+                epc[v] = fill_val
+
+        return epc
+
     @staticmethod
     def calculate_percentage_decrease(start_efficiency, end_efficiency, consumption_averages):
 
diff --git a/etl/customers/newhaven/__init__.py b/etl/customers/newhaven/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/etl/customers/newhaven/newhaven_study.py b/etl/customers/newhaven/newhaven_study.py
new file mode 100644
index 00000000..1f3e858f
--- /dev/null
+++ b/etl/customers/newhaven/newhaven_study.py
@@ -0,0 +1,249 @@
+import inspect
+import pandas as pd
+from etl.epc.settings import EARLIEST_EPC_DATE
+from pathlib import Path
+import numpy as np
+from utils.s3 import save_csv_to_s3
+
+src_file_path = inspect.getfile(lambda: None)
+
+EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates"
+CUSTOMER_DATA_DIRECTORY = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/Data"
+
+USER_ID = 8
+PORTFOLIO_ID = 88
+
+
+def make_asset_list():
+    """
+    Set up a small asset list for the study
+    """
+
+    # Read in EPC data for Lewes
+    lewes_directory = EPC_DIRECTORY / "domestic-E07000063-Lewes/certificates.csv"
+    epc_data = pd.read_csv(lewes_directory, low_memory=False)
+    # Rename the columns to the same format as the api returns
+    epc_data.columns = [c.replace("_", "-").lower() for c in epc_data.columns]
+
+    # Take just date before the date threshold
+    epc_data = epc_data[epc_data["lodgement-date"] >= EARLIEST_EPC_DATE]
+
+    epc_data = epc_data[~pd.isnull(epc_data["uprn"])]
+    epc_data["uprn"] = epc_data["uprn"].astype(int).astype(str)
+    # /Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/Data/
+    # We read in the multiple data sources
+    address_base = pd.read_csv(
+        f"{CUSTOMER_DATA_DIRECTORY}/OS AddressBase Premium/OS AddressBase Premium.csv",
+        low_memory=False,
+    )
+    # Filter on resi
+    address_base = address_base[address_base["Primary Code Description"] == "Residential"]
+    address_base["UPRN"] = address_base["UPRN"].astype(int).astype(str)
+
+    pv_potential = pd.read_csv(
+        f"{CUSTOMER_DATA_DIRECTORY}/Domestic Rooftop PV Potential/Domestic Rooftop PV Potential.csv",
+        low_memory=False,
+    )
+    pv_potential["UPRN"] = pv_potential["UPRN"].astype(int).astype(str)
+
+    ashp_potential = pd.read_csv(
+        f"{CUSTOMER_DATA_DIRECTORY}/Air Source Heat Pump Potential/Air Source Heat Pump Potential.csv",
+        low_memory=False,
+    )
+    ashp_potential["UPRN"] = ashp_potential["UPRN"].astype(int).astype(str)
+
+    insulation_potential = pd.read_csv(
+        f"{CUSTOMER_DATA_DIRECTORY}/Insulation Potential/Insulation Potential.csv",
+        low_memory=False,
+    )
+    insulation_potential["UPRN"] = insulation_potential["UPRN"].astype(int).astype(str)
+
+    renewables_cost = pd.read_csv(
+        f"{CUSTOMER_DATA_DIRECTORY}/Low Carbon Technology Costs/Low Carbon Technology Costs.csv",
+        low_memory=False,
+    )
+    renewables_cost["UPRN"] = renewables_cost["UPRN"].astype(int).astype(str)
+
+    # Merge the EPC data onto address base
+    asset_list = address_base[
+        [
+            "UPRN", "Class Description", "Relative Height - Eaves",
+        ]
+    ].merge(
+        epc_data[
+            ["uprn", "current-energy-efficiency", "current-energy-rating", "address1", "postcode", "floor-height",
+             "property-type", "built-form"]],
+        how="left",
+        left_on="UPRN",
+        right_on="uprn"
+    ).drop(
+        columns=["uprn"]
+    ).merge(
+        insulation_potential[["UPRN", "EPC Rating", "Wall Area [m^2]", "Building Area [m^2]"]],
+        how="left",
+        on="UPRN"
+    ).rename(
+        columns={"Wall Area [m^2]": "insulation_wall_area", "Building Area [m^2]": "floor_area"}
+    )
+
+    # Take properties below a B - there are 2844 units
+    asset_list = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 80]
+    # Drop caravans
+    asset_list = asset_list[asset_list["Class Description"] != "Caravan"]
+    asset_list = asset_list[~pd.isnull(asset_list["current-energy-efficiency"])]
+
+    # Take a 10% sample, for properties that have an EPC, with a seed
+    asset_list = asset_list.sample(frac=0.1, random_state=42)
+
+    AVG_FLOOR_HEIGHT = asset_list["floor-height"].median()
+
+    def estimate_n_floors(
+        building_height, floor_height, address_base_property_description, epc_property_type,
+    ):
+
+        if address_base_property_description == "Self Contained Flat (Includes Maisonette / Apartment)":
+            if epc_property_type == "Flat":
+                return 1
+            if epc_property_type == "House":
+                return 2
+            return NotImplementedError("Implement me")
+
+        if pd.isnull(floor_height):
+            return np.round(building_height / AVG_FLOOR_HEIGHT)
+
+        return np.round(building_height / floor_height)
+
+    # Estimate the number of floors
+    asset_list["number_of_floors"] = asset_list.apply(
+        lambda x: estimate_n_floors(
+            building_height=x["Relative Height - Eaves"],
+            floor_height=x["floor-height"],
+            address_base_property_description=x["Class Description"],
+            epc_property_type=x["property-type"],
+        ),
+        axis=1
+    )
+
+    # D    0.419929
+    # C    0.391459
+    # E    0.160142
+    # F    0.017794
+    # G    0.010676
+
+    # Total asset list:
+    # D    0.450409
+    # C    0.412016
+    # E    0.110203
+    # F    0.020263
+    # G    0.007110
+
+    # We do the followings:
+    # 1) Create final asset list
+    # 2) Create Non-intrusive recommendations
+    # 3) Create a third party costing object
+
+    cost_testing = renewables_cost.merge(
+        insulation_potential, how="inner", on="UPRN"
+    )
+
+    cost_testing["cwi_cost_per_m2"] = cost_testing["Insulation - Cavity Wall - Total"] / cost_testing["Wall Area [m^2]"]
+    # Their cavity wall insulation is £8 per m^2
+
+    cost_testing["ewi_cost_per_m2"] = cost_testing["Insulation - External Wall - Total"] / cost_testing[
+        "Wall Area [m^2]"]
+
+    final_asset_list = asset_list.rename(
+        columns={"UPRN": "uprn", "address1": "address", "floor_area": "insulation_floor_area"}
+    )[["uprn", "address", "postcode", "insulation_wall_area", "insulation_floor_area", "number_of_floors"]]
+
+    # Create non-invasive recommendations, which come from the solar potential and ASHP potential data sources
+    non_invasive_recommendations = []
+    for _, row in final_asset_list.iterrows():
+        property_ashp_potential = ashp_potential[
+            (ashp_potential["UPRN"] == row["uprn"]) & ashp_potential["Overall Suitability Rating"]
+            ]
+        property_pv_potential = pv_potential[
+            (pv_potential["UPRN"] == row["uprn"]) & pv_potential["Overall Suitability"]
+            ]
+        property_costs = renewables_cost[renewables_cost["UPRN"] == row["uprn"]]
+
+        property_non_invasive_recs = []
+        if not property_ashp_potential.empty:
+            property_non_invasive_recs.append(
+                {
+                    "type": "air_source_heat_pump",
+                    "size": property_ashp_potential["Recommended Heat Pump Size [kW]"].values[0],
+                    "cost": property_costs["Air Source Heat Pump - Total"].values[0],
+                    "ashp_only_heating_recommendation": True
+                }
+            )
+
+        if not property_pv_potential.empty:
+            property_non_invasive_recs.append(
+                {
+                    "type": "solar_pv",
+                    "array_wattage": property_pv_potential["Recommended Array Size [kW]"].values[0] * 1000,
+                    "initial_ac_kwh_per_year": property_pv_potential["Annual Generation [kWh]"].values[0],
+                    "panneled_roof_area": property_pv_potential["Roof area suitable for PV [m^2]"].values[0],
+                    "cost": property_costs["Rooftop PV - Total"].values[0],
+                }
+            )
+
+        non_invasive_recommendations.append(
+            {
+                "uprn": row["uprn"],
+                "recommendations": property_non_invasive_recs,
+            }
+        )
+
+    # Save the asset list
+
+    # Store the asset list in s3
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
+    save_csv_to_s3(
+        dataframe=final_asset_list,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    # Store non-invasive recommendations in S3
+    non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(non_invasive_recommendations),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=non_invasive_recommendations_filename
+    )
+
+    # Create two scenarios
+    # Scenario A
+    body1 = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increasing EPC",
+        "goal_value": "A",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+        "scenario_name": "Fabric - no solid wall",
+        "multi_plan": True,
+        "exclusions": ["internal_wall_insulation", "external_wall_insulation", "floor_insulation"],
+        "budget": None,
+    }
+    print(body1)
+
+    # Scenario B - deep fabric, no exclusions
+    body2 = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increasing EPC",
+        "goal_value": "A",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+        "scenario_name": "Deep Fabric",
+        "multi_plan": True,
+        "budget": None,
+    }
+    print(body2)
diff --git a/etl/testing_data/bills_model_testing.py b/etl/testing_data/bills_model_testing.py
new file mode 100644
index 00000000..0c9bb06d
--- /dev/null
+++ b/etl/testing_data/bills_model_testing.py
@@ -0,0 +1,60 @@
+# We use some sample properties from Newhaven to use as a testing dataset for implementing the model fixes
+
+
+import inspect
+import pandas as pd
+from etl.epc.settings import EARLIEST_EPC_DATE
+from pathlib import Path
+from utils.s3 import save_csv_to_s3
+
+src_file_path = inspect.getfile(lambda: None)
+
+EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates"
+
+USER_ID = 8
+PORTFOLIO_ID = -1
+
+
+def app():
+    """
+    This application is tasked with pulling a large quantity of data from the find my epc website, containing the
+    estimated energy consumption for properties
+    :return:
+    """
+
+    lewes_directory = EPC_DIRECTORY / "domestic-E07000063-Lewes/certificates.csv"
+
+    data = pd.read_csv(lewes_directory, low_memory=False)
+    # Rename the columns to the same format as the api returns
+    data.columns = [c.replace("_", "-").lower() for c in data.columns]
+
+    # Take just date before the date threshold
+    data = data[data["lodgement-date"] >= EARLIEST_EPC_DATE]
+
+    data = data[~pd.isnull(data["uprn"])]
+    data = data[data["current-energy-efficiency"].astype(float) < 52]
+    data = data.sample(10)
+
+    # Create an asset list
+    asset_list = data[["uprn", "address1", "postcode"]].copy().rename(columns={"address1": "address"})
+    asset_list["uprn"] = asset_list["uprn"].astype(str)
+
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
+    save_csv_to_s3(
+        dataframe=asset_list,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    body = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increasing EPC",
+        "goal_value": "B",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": "",
+        "budget": None,
+    }
+    print(body)
diff --git a/recommendations/Costs.py b/recommendations/Costs.py
index 738e9b07..c1feb18a 100644
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@@ -1014,7 +1014,7 @@ class Costs:
             "labour_days": labour_days
         }
 
-    def solar_pv(self, wattage: float, has_battery: bool = False):
+    def solar_pv(self, wattage: float, has_battery: bool = False, array_cost=None):
 
         """
         Calculates the total cost for solar PV based data provided by the MCS dashboard, which contains
@@ -1028,13 +1028,17 @@ class Costs:
         https://www.checkatrade.com/blog/cost-guides/cost-of-solar-panel-installation/
         :param wattage: Peak wattage of the solar PV system]
         :param has_battery: Bool, whether the system includes a battery
+        :param array_cost: float, containing the cost of the solar PV array
         """
 
         # Get the cost data relevant to the region
         regional_cost = MCS_SOLAR_PV_COST_DATA["-".join(["average_cost_per_kwh", self.region])]
 
-        kw = wattage / 1000
-        total_cost = kw * regional_cost
+        if array_cost is not None:
+            total_cost = array_cost
+        else:
+            kw = wattage / 1000
+            total_cost = kw * regional_cost
 
         if has_battery:
             # The battery cost is based on the £3500 quote, recieved from installers
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 4d91f21b..523bfe3b 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -57,13 +57,20 @@ class HeatingRecommender:
         #       in the Costs class, stored as SYSTEM_FLUSH_COST
 
         exclusions = [] if exclusions is None else exclusions
+        non_invasive_ashp_recommendation = next(
+            (r for r in self.property.non_invasive_recommendations if r["type"] == "air_source_heat_pump"), {}
+        )
 
+        # This option will prevent other heating recommendations from being specified, other than an ASHP
+        ashp_only_heating_recommendation = non_invasive_ashp_recommendation.get(
+            "ashp_only_heating_recommendation", False
+        )
         self.heating_recommendations = []
         self.heating_control_recommendations = []
         # This first iteration of the recommender will provide very basic recommendation
         # We recommend heating controls based on the main heating system
 
-        if self.is_high_heat_retention_valid():
+        if self.is_high_heat_retention_valid() and not ashp_only_heating_recommendation:
             # Recommend high heat retention storage heaters
             # TODO: We need to allow for the possibility that the property aleady has storage heaters, but just
             #       needs the controls
@@ -91,13 +98,13 @@ class HeatingRecommender:
             self.property.data["mains-gas-flag"]
         )
 
-        if (
+        if ((
             has_boiler or
             no_heating_has_mains or
             electic_heating_has_mains or
             has_gas_heaters or
             portable_heaters_has_mains
-        ):
+        ) and not ashp_only_heating_recommendation):
             # This indicates that the home previously did not have a boiler in place and so would require
             # an overhaul to the system - right now, this is all reasons, apart from if there is an existing boiler
             system_change = not has_boiler
@@ -118,7 +125,9 @@ class HeatingRecommender:
 
         if self.property.is_ashp_valid(exclusions=exclusions):
             self.recommend_air_source_heat_pump(
-                phase=phase, has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations
+                phase=phase,
+                has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations,
+
             )
 
         return
@@ -194,14 +203,21 @@ class HeatingRecommender:
         :return:
         """
 
+        # Look for a non-intrusive recommendation
+        non_intrusive_recommendation = next((
+            r for r in self.property.non_invasive_recommendations if r["type"] == "air_source_heat_pump"
+        ), {})
+
         controls_recommender = HeatingControlRecommender(self.property)
         controls_recommender.recommend(heating_description="Air source heat pump, radiators, electric")
 
         ashp_costs = self.costs.air_source_heat_pump()
-        # We add the costs of the heating controls, onto each key in the costs dictionary
-        if controls_recommender.recommendation:
-            for key in ashp_costs:
-                ashp_costs[key] += controls_recommender.recommendation[0][key]
+        if non_intrusive_recommendation:
+            # Update with non-intrusive recommendation
+            if non_intrusive_recommendation.get("cost"):
+                ashp_costs.update(
+                    {"total": non_intrusive_recommendation["cost"], "subtotal": None, "vat": None}
+                )
 
         already_installed = "air_source_heat_pump" in self.property.already_installed
 
@@ -213,6 +229,14 @@ class HeatingRecommender:
         if already_installed:
             ashp_costs = override_costs(ashp_costs)
 
+        if non_intrusive_recommendation and not all([x is None for x in controls_recommendations]):
+            # We just use the ttzc control
+            controls_recommendations = [
+                x for x in controls_recommendations if (
+                    x["description_simulation"]["mainheatcont-description"] == "Time and temperature zone control"
+                )
+            ]
+
         # This is a map from the heating controls description to the description of the air source heat pump set up
         ashp_descriptions = {
             "Time and temperature zone control": (
@@ -233,7 +257,8 @@ class HeatingRecommender:
 
             if controls_rec:
                 for key in ashp_costs_with_controls:
-                    ashp_costs_with_controls[key] += controls_rec[key]
+                    if ashp_costs_with_controls[key] is not None:
+                        ashp_costs_with_controls[key] += controls_rec[key]
 
             if controls_rec is None:
                 description = "Install an air source heat pump."
@@ -245,19 +270,19 @@ class HeatingRecommender:
             # If the property does not have existing cavity and loft insulation, we include a note that the cost
             # includes the boiler upgrade scheme and that the cavity and loft need to be treated, to ensure access
             # to the funding
-            if has_cavity_or_loft_recommendations:
-                description = description + (
-                    f" The cost includes the £"
-                    f"{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant. "
-                    f"You must ensure that the property has an insulated cavity and "
-                    f"270mm+ loft insulation to qualify for the grant"
-                )
-            else:
-                description = description + (
-                    f" The cost includes the £{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant"
-                )
+            if not non_intrusive_recommendation:
+                if has_cavity_or_loft_recommendations:
+                    description = description + (
+                        f" The cost includes the £"
+                        f"{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant. "
+                        f"You must ensure that the property has an insulated cavity and "
+                        f"270mm+ loft insulation to qualify for the grant"
+                    )
+                else:
+                    description = description + (
+                        f" The cost includes the £{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant"
+                    )
 
-            print("TEMP UPDATED FOR 77 Perryn!!!!!")
             simulation_config = {
                 "mainheat_energy_eff_ending": "Good",
                 "hot_water_energy_eff_ending": "Good"
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 81c26e15..23b0e7df 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -66,7 +66,7 @@ class Recommendations:
 
         # Building Fabric
         if "wall_insulation" not in self.exclusions:
-            self.wall_recomender.recommend(phase=phase)
+            self.wall_recomender.recommend(phase=phase, exclusions=self.exclusions)
             if self.wall_recomender.recommendations:
                 property_recommendations.append(self.wall_recomender.recommendations)
                 phase += 1
diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index 63519d02..5069b9fb 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -1,6 +1,8 @@
 import numpy as np
+import pandas as pd
+
 from recommendations.Costs import Costs
-from recommendations.recommendation_utils import override_costs
+from recommendations.recommendation_utils import override_costs, esimtate_pitched_roof_area
 
 
 class SolarPvRecommendations:
@@ -150,17 +152,37 @@ class SolarPvRecommendations:
             self.recommend_building_analysis(phase)
             return
 
-        panel_performance = self.property.solar_panel_configuration["panel_performance"]
-        roof_area = self.property.roof_area
+        non_invasive_recommendation = next(
+            (r for r in self.property.non_invasive_recommendations if r["type"] == "solar_pv"), {}
+        )
 
-        solar_configurations = panel_performance.head(3).reset_index(drop=True)
+        if non_invasive_recommendation:
+
+            roof_area = esimtate_pitched_roof_area(
+                floor_area=self.property.insulation_floor_area, floor_height=self.property.data["floor-height"]
+            )
+            solar_configurations = pd.DataFrame(
+                [
+                    {
+                        "array_wattage": non_invasive_recommendation["array_wattage"],
+                        "initial_ac_kwh_per_year": non_invasive_recommendation["initial_ac_kwh_per_year"],
+                        "panneled_roof_area": non_invasive_recommendation["panneled_roof_area"]
+                    }
+                ]
+            )
+        else:
+            panel_performance = self.property.solar_panel_configuration["panel_performance"]
+            roof_area = self.property.roof_area
+            solar_configurations = panel_performance.head(3).reset_index(drop=True)
 
         # We combine each of these configurations with estimates with and without a battery
         for rank, recommendation_config in solar_configurations.iterrows():
             roof_coverage_percent = round(recommendation_config["panneled_roof_area"] / roof_area * 100)
             for has_battery in [False, True]:
                 cost_result = self.costs.solar_pv(
-                    wattage=recommendation_config["array_wattage"], has_battery=has_battery
+                    wattage=recommendation_config["array_wattage"],
+                    has_battery=has_battery,
+                    array_cost=non_invasive_recommendation["cost"] if non_invasive_recommendation else None
                 )
                 kw = np.floor(recommendation_config["array_wattage"] / 100) / 10
                 if has_battery:
diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py
index 4ef747f7..7aeabc7a 100644
--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@@ -184,7 +184,7 @@ class WallRecommendations(Definitions):
 
         return ewi_recommendations
 
-    def recommend(self, phase=0):
+    def recommend(self, phase=0, exclusions=None):
         # if building built after 1990 + we're able to identify U-value +
         # U-value less than 0.18 and if in or close to a conversation area,
         # recommend internal wall insulation as a possible measure
@@ -262,7 +262,7 @@ class WallRecommendations(Definitions):
 
         # Remaining wall types are treated with IWI or EWI
         if (u_value >= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE) and self.is_suitable_for_solid_insulation():
-            self.find_insulation(u_value, phase)
+            self.find_insulation(u_value, phase, exclusions=exclusions)
             return
 
         # If the u-value is within regulations, we don't do anything
@@ -552,7 +552,7 @@ class WallRecommendations(Definitions):
 
         return recommendations
 
-    def find_insulation(self, u_value, phase):
+    def find_insulation(self, u_value, phase, exclusions=None):
         """
         This function contains the logic for finding potential insulation measures for a property, depending
         on the parts available and whether the property can have external wall insulation installed
@@ -564,8 +564,10 @@ class WallRecommendations(Definitions):
         # we separate the logic for for recommending them, therefore we don't
         # consider diminishing returns between the two as they are considered to be separate measures
 
+        exclusions = [] if exclusions is None else exclusions
+
         ewi_recommendations = []
-        if self.ewi_valid():
+        if self.ewi_valid() and "external_wall_insulation" not in exclusions:
             ewi_recommendations = self._find_insulation(
                 u_value=u_value,
                 insulation_materials=pd.DataFrame(
@@ -575,12 +577,14 @@ class WallRecommendations(Definitions):
                 phase=phase,
             )
 
-        iwi_recommendations = self._find_insulation(
-            u_value=u_value,
-            insulation_materials=pd.DataFrame(self.internal_wall_insulation_materials),
-            non_insulation_materials=self.internal_wall_non_insulation_materials,
-            phase=phase,
-        )
+        iwi_recommendations = []
+        if "internal_wall_insulation" not in exclusions:
+            iwi_recommendations = self._find_insulation(
+                u_value=u_value,
+                insulation_materials=pd.DataFrame(self.internal_wall_insulation_materials),
+                non_insulation_materials=self.internal_wall_non_insulation_materials,
+                phase=phase,
+            )
 
         self.recommendations += ewi_recommendations + iwi_recommendations
 

From c9d733d76eee9e2fc024a7c243db77cc3910e40f Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 5 Aug 2024 16:51:37 +0100
Subject: [PATCH 058/182] checking costs

---
 backend/app/plan/router.py               | 11 -----------
 etl/customers/newhaven/newhaven_study.py |  5 +++++
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index db0ff552..769b13f7 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -737,17 +737,6 @@ async def trigger_plan(body: PlanTriggerRequest):
             ]
             recommendations[property_id] = final_recommendations
 
-        # df = []
-        # for rec in recommendations[list(recommendations.keys())[0]]:
-        #     df.append(
-        #         {
-        #             "id": rec["recommendation_id"],
-        #             "description": rec["description"],
-        #             "sap": rec["sap_points"],
-        #         }
-        #     )
-        # df = pd.DataFrame(df)
-
         # 1) the property data
         # 2) the property details (epc)
         # 3) the recommendations
diff --git a/etl/customers/newhaven/newhaven_study.py b/etl/customers/newhaven/newhaven_study.py
index 1f3e858f..b10a8bf0 100644
--- a/etl/customers/newhaven/newhaven_study.py
+++ b/etl/customers/newhaven/newhaven_study.py
@@ -152,6 +152,11 @@ def make_asset_list():
     cost_testing["ewi_cost_per_m2"] = cost_testing["Insulation - External Wall - Total"] / cost_testing[
         "Wall Area [m^2]"]
 
+    cost_testing["li_cost_per_m2"] = cost_testing["Insulation - Loft - Total"] / cost_testing["Building Area [m^2]"]
+
+    cost_testing["underfloor_cost_per_m2"] = cost_testing["Insulation - Under Floor- Total"] / cost_testing[
+        "Building Area [m^2]"]
+
     final_asset_list = asset_list.rename(
         columns={"UPRN": "uprn", "address1": "address", "floor_area": "insulation_floor_area"}
     )[["uprn", "address", "postcode", "insulation_wall_area", "insulation_floor_area", "number_of_floors"]]

From 664521856310af676977262fde8cbe2a0aa20641 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 5 Aug 2024 16:55:41 +0100
Subject: [PATCH 059/182] fixing estimate_n_floors on asset list creation

---
 etl/customers/newhaven/newhaven_study.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/etl/customers/newhaven/newhaven_study.py b/etl/customers/newhaven/newhaven_study.py
index b10a8bf0..ab601fdc 100644
--- a/etl/customers/newhaven/newhaven_study.py
+++ b/etl/customers/newhaven/newhaven_study.py
@@ -104,9 +104,9 @@ def make_asset_list():
         if address_base_property_description == "Self Contained Flat (Includes Maisonette / Apartment)":
             if epc_property_type == "Flat":
                 return 1
-            if epc_property_type == "House":
+            if epc_property_type == "Maisonette":
                 return 2
-            return NotImplementedError("Implement me")
+            raise NotImplementedError("Implement me")
 
         if pd.isnull(floor_height):
             return np.round(building_height / AVG_FLOOR_HEIGHT)

From 3c65d1639a40aedcf0230d458b38a4f585f4d3be Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 5 Aug 2024 17:11:53 +0100
Subject: [PATCH 060/182] debugging heatingsource code

---
 backend/Property.py        | 4 ++--
 backend/app/plan/router.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 309fb149..cc392933 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -1293,7 +1293,7 @@ class Property:
             fuel for key, fuel in heating_fuel_mapping.items() if self.main_heating.get(key, False)
         ]
         if len(self.heating_energy_source) == 0 or len(self.heating_energy_source) > 1:
-            raise Exception("Investigate em")
+            raise Exception("Investigate me")
 
         self.heating_energy_source = self.heating_energy_source[0]
 
@@ -1301,7 +1301,7 @@ class Property:
             self.hot_water_energy_source = heater_type_to_fuel[self.hotwater["heater_type"]]
         else:
             fuel = system_type_modification[self.hotwater["system_type"]]
-            if fuel == 'Main System':
+            if fuel in ['Main System', "Community Scheme"]:
                 self.hot_water_energy_source = self.heating_energy_source
             else:
                 raise Exception("Investiage me")
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 769b13f7..3e2c724f 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -452,7 +452,7 @@ async def trigger_plan(body: PlanTriggerRequest):
         # We need to prepare the EPC so it's in the same format as the training data
 
         logger.info("Getting spatial data")
-        for p in input_properties:
+        for p in tqdm(input_properties):
             p.get_components(cleaned=cleaned, energy_consumption_client=energy_consumption_client)
             p.get_spatial_data(uprn_filenames)
 

From ea9086ba37c612ffe6bb517c3d3e655328640396 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 5 Aug 2024 18:06:16 +0100
Subject: [PATCH 061/182] optimise fetching of spatial data

---
 backend/app/plan/router.py                | 27 +++++++++++++++++++++--
 etl/customers/newhaven/newhaven_study.py  | 16 ++++++++++++++
 recommendations/Costs.py                  |  4 ++--
 recommendations/HeatingRecommender.py     |  6 ++++-
 recommendations/SecondaryHeating.py       |  3 +++
 recommendations/SolarPvRecommendations.py |  6 ++++-
 6 files changed, 56 insertions(+), 6 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 3e2c724f..d4b2a9a5 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -451,10 +451,34 @@ async def trigger_plan(body: PlanTriggerRequest):
         # )
         # We need to prepare the EPC so it's in the same format as the training data
 
+        # TODO: Move this/tidy it up
+        uprn_map = {}
+        for uprn in [p.uprn for p in input_properties]:
+            filtered_df = uprn_filenames[
+                (uprn_filenames["lower"] <= int(uprn))
+                & (uprn_filenames["upper"] >= int(uprn))
+                ]
+            if filtered_df["filenames"].values[0] in uprn_map:
+                uprn_map[filtered_df["filenames"].values[0]].append(int(uprn))
+            else:
+                uprn_map[filtered_df["filenames"].values[0]] = [int(uprn)]
+
+        for filename, associated_uprn in tqdm(uprn_map.items(), total=len(uprn_map)):
+            # Read in the file
+            spatial_data = read_dataframe_from_s3_parquet(
+                bucket_name="retrofit-data-dev", file_key=f"spatial/{filename}"
+            )
+
+            spatial_df = spatial_data[spatial_data["UPRN"].isin(associated_uprn)]
+            for p in input_properties:
+                if p.uprn in associated_uprn:
+                    p.set_spatial(spatial_df[spatial_df["UPRN"] == p.uprn])
+
         logger.info("Getting spatial data")
         for p in tqdm(input_properties):
+            if p.spatial is None:
+                raise Exception("Missed setting of spatial data for a property")
             p.get_components(cleaned=cleaned, energy_consumption_client=energy_consumption_client)
-            p.get_spatial_data(uprn_filenames)
 
         logger.info("Performing solar analysis")
         # TODO: Tidy this up
@@ -613,7 +637,6 @@ async def trigger_plan(body: PlanTriggerRequest):
         recommendations_scoring_data = []
         representative_recommendations = {}
         for p in tqdm(input_properties):
-
             recommender = Recommendations(property_instance=p, materials=materials, exclusions=body.exclusions)
             property_recommendations, property_representative_recommendations = recommender.recommend()
 
diff --git a/etl/customers/newhaven/newhaven_study.py b/etl/customers/newhaven/newhaven_study.py
index ab601fdc..7c53405f 100644
--- a/etl/customers/newhaven/newhaven_study.py
+++ b/etl/customers/newhaven/newhaven_study.py
@@ -177,22 +177,38 @@ def make_asset_list():
             property_non_invasive_recs.append(
                 {
                     "type": "air_source_heat_pump",
+                    "suitable": True,
                     "size": property_ashp_potential["Recommended Heat Pump Size [kW]"].values[0],
                     "cost": property_costs["Air Source Heat Pump - Total"].values[0],
                     "ashp_only_heating_recommendation": True
                 }
             )
+        else:
+            property_non_invasive_recs.append(
+                {
+                    "type": "air_source_heat_pump",
+                    "suitable": False
+                }
+            )
 
         if not property_pv_potential.empty:
             property_non_invasive_recs.append(
                 {
                     "type": "solar_pv",
+                    "suitable": True,
                     "array_wattage": property_pv_potential["Recommended Array Size [kW]"].values[0] * 1000,
                     "initial_ac_kwh_per_year": property_pv_potential["Annual Generation [kWh]"].values[0],
                     "panneled_roof_area": property_pv_potential["Roof area suitable for PV [m^2]"].values[0],
                     "cost": property_costs["Rooftop PV - Total"].values[0],
                 }
             )
+        else:
+            property_non_invasive_recs.append(
+                {
+                    "type": "solar_pv",
+                    "suitable": False
+                }
+            )
 
         non_invasive_recommendations.append(
             {
diff --git a/recommendations/Costs.py b/recommendations/Costs.py
index c1feb18a..8deed75a 100644
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@@ -100,8 +100,8 @@ CONDENSING_BOILER_COSTS = {
 # The unit is a 15kw boiler, capable of outputting between 3kw and 15kw. Costs seem to be around £1800
 ELECTRIC_BOILER_COSTS = 1800
 
-# Assumes 3 hours to remove each heater (including re-decorating)
-ROOM_HEATER_REMOVAL_COST = 120
+# Assumes 1 hours to remove each heater (including re-decorating)
+ROOM_HEATER_REMOVAL_COST = 50
 ROOM_HEATER_REMOVAL_LABOUR_HOURS = 3
 
 # This is a cost quoted by Jim for a system flush - existig system will run more efficiently
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 523bfe3b..1a3b6159 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -58,8 +58,12 @@ class HeatingRecommender:
 
         exclusions = [] if exclusions is None else exclusions
         non_invasive_ashp_recommendation = next(
-            (r for r in self.property.non_invasive_recommendations if r["type"] == "air_source_heat_pump"), {}
+            (r for r in self.property.non_invasive_recommendations if r["type"] == "air_source_heat_pump"),
+            {"suitable": True}
         )
+        # We allow for the non-invasive recommendation to be that ASHP is not suitable
+        if not non_invasive_ashp_recommendation["suitable"]:
+            return
 
         # This option will prevent other heating recommendations from being specified, other than an ASHP
         ashp_only_heating_recommendation = non_invasive_ashp_recommendation.get(
diff --git a/recommendations/SecondaryHeating.py b/recommendations/SecondaryHeating.py
index 5d763510..aed48da2 100644
--- a/recommendations/SecondaryHeating.py
+++ b/recommendations/SecondaryHeating.py
@@ -60,6 +60,9 @@ class SecondaryHeating:
                 **costs,
                 "simulation_config": {
                     "secondheat_description_ending": "None"
+                },
+                "description_simulation": {
+                    "secondheat-description": "None"
                 }
             }
         )
diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index 5069b9fb..3e7ede28 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -153,9 +153,13 @@ class SolarPvRecommendations:
             return
 
         non_invasive_recommendation = next(
-            (r for r in self.property.non_invasive_recommendations if r["type"] == "solar_pv"), {}
+            (r for r in self.property.non_invasive_recommendations if r["type"] == "solar_pv"), {"suitable": True}
         )
 
+        # We allow for the non-invasive recommendation to be that solar PV is not suitable
+        if not non_invasive_recommendation["suitable"]:
+            return
+
         if non_invasive_recommendation:
 
             roof_area = esimtate_pitched_roof_area(

From 1cbcfefa696df8730e0d5b63f6c14bee0652f8c2 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 5 Aug 2024 18:09:32 +0100
Subject: [PATCH 062/182] handing decently insulated wall

---
 recommendations/WallRecommendations.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py
index 7aeabc7a..edfc4d66 100644
--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@@ -236,8 +236,8 @@ class WallRecommendations(Definitions):
             # + it already has a U-value better than the building regulations, so we don't need to recommend anything
             if (
                 (not is_cavity_wall)
-                and (self.property.year_built >= self.YEAR_WALLS_BUILT_WITH_INSULATION)
-                and (u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE)
+                and ((self.property.year_built >= self.YEAR_WALLS_BUILT_WITH_INSULATION)
+                     or (u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE))
             ):
                 # Recommend nothing
                 return

From f732ce0be593b5ee7060da0637bc3c22e113e357 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 5 Aug 2024 18:30:07 +0100
Subject: [PATCH 063/182] Adding simulation_config convention to roof
 recommendations

---
 backend/Property.py                    | 66 ++++-----------------
 recommendations/RoofRecommendations.py | 81 ++++++++++++++++----------
 2 files changed, 60 insertions(+), 87 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index cc392933..ba22ce60 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -494,61 +494,6 @@ class Property:
                 if output["floor_insulation_thickness_ending"] is None:
                     output["floor_insulation_thickness_ending"] = "none"
 
-            if recommendation["type"] in [
-                "loft_insulation",
-                "room_roof_insulation",
-                "flat_roof_insulation",
-            ]:
-                output["roof_thermal_transmittance_ending"] = recommendation[
-                    "new_u_value"
-                ]
-
-                parts = recommendation["parts"]
-                if len(parts) != 1:
-                    raise ValueError(
-                        "More than one part for roof insulation - investiage me"
-                    )
-
-                # This is based on the values we have in the training data
-                valid_numeric_values = [
-                    12,
-                    25,
-                    50,
-                    75,
-                    100,
-                    150,
-                    200,
-                    250,
-                    270,
-                    300,
-                    350,
-                    400,
-                ]
-
-                proposed_depth = recommendation["new_thickness"]
-                if proposed_depth not in valid_numeric_values:
-                    # Take the nearest value for scoring
-                    proposed_depth = min(
-                        valid_numeric_values, key=lambda x: abs(x - proposed_depth)
-                    )
-
-                output["roof_insulation_thickness_ending"] = str(int(proposed_depth))
-                if recommendation["type"] == "loft_insulation":
-                    if proposed_depth >= 270:
-                        output["roof_energy_eff_ending"] = "Very Good"
-                    else:
-                        if output["roof_energy_eff_ending"] not in ["Good", "Very Good"]:
-                            output["roof_energy_eff_ending"] = "Good"
-                else:
-                    output["roof_energy_eff_ending"] = "Very Good"
-            else:
-                # Fill missing roof u-values - this fill is not based on recommended upgrades
-                if output["roof_thermal_transmittance_ending"] is None:
-                    raise ValueError("We should not have a None value for the u value")
-
-                if output["roof_insulation_thickness_ending"] is None:
-                    output["roof_insulation_thickness_ending"] = "none"
-
             if recommendation["type"] == "sealing_open_fireplace":
                 output["number_open_fireplaces_ending"] = 0
 
@@ -592,12 +537,21 @@ class Property:
             if recommendation["type"] in [
                 "heating", "hot_water_tank_insulation", "heating_control", "secondary_heating",
                 "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation",
-                "cylinder_thermostat"
+                "cylinder_thermostat", "loft_insulation", "room_roof_insulation", "flat_roof_insulation",
             ]:
                 # We update the data, as defined in the recommendaton
                 if output["walls_insulation_thickness_ending"] is None:
                     output["walls_insulation_thickness_ending"] = "none"
 
+                if output["walls_thermal_transmittance_ending"] is None:
+                    raise ValueError("We should not have a None value for the u value")
+
+                if output["roof_insulation_thickness_ending"] is None:
+                    output["roof_insulation_thickness_ending"] = "none"
+
+                if output["roof_thermal_transmittance_ending"] is None:
+                    raise ValueError("We should not have a None value for the u value")
+
                 simulation_config = recommendation["simulation_config"]
                 # If any entries in simulation_config are None, we will set them to "Unknown" which is the cleaning
                 # value
diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py
index 56f3721a..fa2cb53c 100644
--- a/recommendations/RoofRecommendations.py
+++ b/recommendations/RoofRecommendations.py
@@ -5,9 +5,11 @@ from typing import List
 from datatypes.enums import QuantityUnits
 from recommendations.recommendation_utils import (
     get_roof_u_value, r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns,
-    update_lowest_selected_u_value, get_recommended_part, convert_thickness_to_numeric, override_costs
+    update_lowest_selected_u_value, get_recommended_part, convert_thickness_to_numeric, override_costs,
+    check_simulation_difference
 )
 from recommendations.Costs import Costs
+from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
 
 
 class RoofRecommendations:
@@ -274,6 +276,40 @@ class RoofRecommendations:
                         if already_installed:
                             cost_result = override_costs(cost_result)
                         new_thickness = insulation_thickness + material["depth"]
+
+                        # This is based on the values we have in the training data
+                        valid_numeric_values = [
+                            12,
+                            25,
+                            50,
+                            75,
+                            100,
+                            150,
+                            200,
+                            250,
+                            270,
+                            300,
+                            350,
+                            400,
+                        ]
+
+                        proposed_depth = new_thickness
+                        if (new_thickness not in valid_numeric_values) and material["type"] == "loft_insulation":
+                            # Take the nearest value for scoring
+                            proposed_depth = min(
+                                valid_numeric_values, key=lambda x: abs(x - proposed_depth)
+                            )
+
+                        if proposed_depth >= 270:
+                            new_efficiency = "Very Good"
+                        else:
+                            if self.property.data["walls-energy-eff"] not in ["Good", "Very Good"]:
+                                new_efficiency = "Good"
+                            else:
+                                new_efficiency = "Very Good"
+
+                        new_description = f"Pitched, {int(proposed_depth)}mm loft insulation"
+
                     elif material["type"] == "flat_roof_insulation":
                         cost_result = self.costs.flat_roof_insulation(
                             floor_area=self.property.insulation_floor_area,
@@ -283,38 +319,21 @@ class RoofRecommendations:
                         already_installed = "flat_roof_insulation" in self.property.already_installed
                         if already_installed:
                             cost_result = override_costs(cost_result)
-                        new_thickness = None
+                        new_description = "Flat, insulated"
+                        new_efficiency = "Good"
                     else:
                         raise ValueError("Invalid material type")
 
-                    # This is based on the values we have in the training data
-                    valid_numeric_values = [
-                        12,
-                        25,
-                        50,
-                        75,
-                        100,
-                        150,
-                        200,
-                        250,
-                        270,
-                        300,
-                        350,
-                        400,
-                    ]
+                    roof_ending_config = RoofAttributes(new_description).process()
+                    roof_simulation_config = check_simulation_difference(
+                        new_config=roof_ending_config, old_config=self.property.roof, prefix="roof_"
+                    )
 
-                    proposed_depth = new_thickness
-                    if new_thickness not in valid_numeric_values:
-                        # Take the nearest value for scoring
-                        proposed_depth = min(
-                            valid_numeric_values, key=lambda x: abs(x - proposed_depth)
-                        )
-
-                    if proposed_depth >= 270:
-                        new_efficiency = "Very Good"
-                    else:
-                        if self.property.data["walls-energy-eff"] not in ["Good", "Very Good"]:
-                            new_efficiency = "Good"
+                    simulation_config = {
+                        **roof_simulation_config,
+                        "roof_thermal_transmittance_ending": new_u_value,
+                        "roof_energy_eff_ending": new_efficiency
+                    }
 
                     recommendations.append(
                         {
@@ -333,9 +352,9 @@ class RoofRecommendations:
                             "new_u_value": new_u_value,
                             "sap_points": None,
                             "already_installed": already_installed,
-                            "new_thickness": new_thickness,
+                            "simulation_config": simulation_config,
                             "description_simulation": {
-                                "roof-description": f"Pitched, {int(proposed_depth)}mm loft insulation",
+                                "roof-description": new_description,
                                 "roof-energy-eff": new_efficiency
                             },
                             **cost_result

From 87c36a80d379a498f8da95233fcf4e6a637152b4 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 5 Aug 2024 20:50:43 +0100
Subject: [PATCH 064/182] tweaking heating rec logic

---
 backend/Property.py                    |   4 +
 recommendations/HeatingRecommender.py  |   4 +-
 recommendations/Recommendations.py     |   4 +-
 recommendations/RoofRecommendations.py | 101 ++++++++++++++++---------
 4 files changed, 74 insertions(+), 39 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index ba22ce60..b8563b87 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -438,6 +438,10 @@ class Property:
 
             # Replace the understores with hyphens
             simulation_epc = {k.replace("_", "-"): v for k, v in simulation_epc.items()}
+            # Add in today's costs (unadjusted
+            simulation_epc["heating-cost-current"] = int(self.energy_cost_estimates["unadjusted"]["heating"])
+            simulation_epc["hot-water-cost-current"] = int(self.energy_cost_estimates["unadjusted"]["hot_water"])
+            simulation_epc["lighting-cost-current"] = int(self.energy_cost_estimates["unadjusted"]["lighting"])
             simulation_epc.update(phase_epc_transformation)
             self.simulation_epcs[phase] = simulation_epc
 
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 1a3b6159..d8e597e7 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -62,8 +62,6 @@ class HeatingRecommender:
             {"suitable": True}
         )
         # We allow for the non-invasive recommendation to be that ASHP is not suitable
-        if not non_invasive_ashp_recommendation["suitable"]:
-            return
 
         # This option will prevent other heating recommendations from being specified, other than an ASHP
         ashp_only_heating_recommendation = non_invasive_ashp_recommendation.get(
@@ -127,7 +125,7 @@ class HeatingRecommender:
         # In the future, we'll allow overrides, so that non-intrusive surveys can contradict these conditions
         # and either allow or prevent the recommendation of an air source heat pump
 
-        if self.property.is_ashp_valid(exclusions=exclusions):
+        if self.property.is_ashp_valid(exclusions=exclusions) and non_invasive_ashp_recommendation["suitable"]:
             self.recommend_air_source_heat_pump(
                 phase=phase,
                 has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations,
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 23b0e7df..1c12d5eb 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -488,7 +488,7 @@ class Recommendations:
                         float(property_instance.data["energy-consumption-current"]) - new_heat_demand
                     )
 
-                    if rec["type"] == "lighting":
+                    if rec["type"] == "low_energy_lighting":
                         new_heating_cost = property_instance.energy_cost_estimates["adjusted"]["heating"]
                         new_hot_water_cost = property_instance.energy_cost_estimates["adjusted"]["hot_water"]
                         new_lighting_cost = min(
@@ -556,10 +556,12 @@ class Recommendations:
                     new_heating_kwh = energy_consumption_client.score_new_data(
                         new_data=scoring_df, target="heating_kwh"
                     )[0]
+                    new_heating_kwh = 0 if new_heating_kwh < 0 else new_heating_kwh
 
                     new_hot_water_kwh = energy_consumption_client.score_new_data(
                         new_data=scoring_df, target="hot_water_kwh"
                     )[0]
+                    new_hot_water_kwh = 0 if new_hot_water_kwh < 0 else new_hot_water_kwh
 
                     # Adjust these figures
                     new_heating_kwh_adjusted = AnnualBillSavings.adjust_energy_to_metered(
diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py
index fa2cb53c..5075928e 100644
--- a/recommendations/RoofRecommendations.py
+++ b/recommendations/RoofRecommendations.py
@@ -405,18 +405,23 @@ class RoofRecommendations:
         :return:
         """
 
-        roof_roof_insulation_materials = [m for m in self.materials if m["type"] == "room_roof_insulation"]
-        if not roof_roof_insulation_materials:
-            raise ValueError("No room in roof insulation materials found")
+        # TODO: We temporarilty use costs from SCIS for RIR insulation. The costing was £180/m2 floor
+        roof_roof_insulation_materials = [
+            {
+                "type": "room_roof_insulation",
+                "description": "Insulating the ceiling of the roof roof and re-decorate",
+                "depths": [100],
+                "depth_unit": "mm",
+                "r_value_per_mm": 0.038,
+                "thermal_conductivity": 0.022,
+                "cost": [180],
+            }
+        ]
 
-        if self.property.pitched_roof_area is None:
-            raise ValueError("pitched_roof_area not included as property attribute")
-
-        lowest_selected_u_value = None
+        # lowest_selected_u_value = None
         recommendations = []
         for material in roof_roof_insulation_materials:
             for depth, cost_per_unit in zip(material["depths"], material["cost"]):
-
                 part_u_value = r_value_per_mm_to_u_value(depth, material["r_value_per_mm"])
 
                 _, new_u_value = calculate_u_value_uplift(u_value, part_u_value)
@@ -428,36 +433,62 @@ class RoofRecommendations:
                 # If I have a lowest U value and my new u value is lower than the lowest value, it's
                 # further into the diminishing returns threshold and can shouldn't be
 
-                if is_diminishing_returns(
-                    recommendations, new_u_value, lowest_selected_u_value, self.DIMINISHING_RETURNS_U_VALUE
-                ):
-                    continue
+                # if is_diminishing_returns(
+                #     recommendations, new_u_value, lowest_selected_u_value, self.DIMINISHING_RETURNS_U_VALUE
+                # ):
+                #     continue
 
                 # We allow a small tolerance for error so we don't discount the recommendation entirely
-                if new_u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
-                    lowest_selected_u_value = update_lowest_selected_u_value(lowest_selected_u_value, new_u_value)
+                # if new_u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
+                # lowest_selected_u_value = update_lowest_selected_u_value(lowest_selected_u_value, new_u_value)
 
-                    estimated_cost = cost_per_unit * self.property.pitched_roof_area
+                estimated_cost = cost_per_unit * self.property.insulation_floor_area
 
-                    recommendations.append(
-                        {
-                            "phase": phase,
-                            "parts": [
-                                get_recommended_part(
-                                    part=material,
-                                    selected_depth=depth,
-                                    quantity=self.property.pitched_roof_area,
-                                    quantity_unit=QuantityUnits.m2.value,
-                                    selected_total_cost=estimated_cost
-                                )
-                            ],
-                            "type": "room_roof_insulation",
-                            "description": self.make_room_roof_insulation_description(material, depth),
-                            "starting_u_value": u_value,
-                            "new_u_value": new_u_value,
-                            "sap_points": None,
-                            "cost": estimated_cost,
-                        }
-                    )
+                # Could also be Roof room(s), ceiling insulated
+                new_descriptin = "Pitched, insulated at rafters"
+                roof_ending_config = RoofAttributes(new_descriptin).process()
+                roof_simulation_config = check_simulation_difference(
+                    new_config=roof_ending_config, old_config=self.property.roof, prefix="roof_"
+                )
+                if self.property.data["roof-energy-eff"] in ["Very Poor", "Poor"]:
+                    new_efficiency = "Average"
+                else:
+                    new_efficiency = self.property.data["roof-energy-eff"]
+
+                simulation_config = {
+                    **roof_simulation_config,
+                    "roof_thermal_transmittance_ending": new_u_value,
+                    "roof_energy_eff_ending": new_efficiency
+                }
+
+                already_installed = "flat_roof_insulation" in self.property.already_installed
+                cost_result = {
+                    "total": estimated_cost,
+                    "labour_hours": 80,
+                    "labour_days": 5,
+                }
+                if already_installed:
+                    cost_result = override_costs(cost_result)
+
+                recommendations.append(
+                    {
+                        "phase": phase,
+                        "parts": [
+                            # TODO
+                        ],
+                        "type": "room_roof_insulation",
+                        "description": "Insulate room in roof at rafters and re-decorate",
+                        "starting_u_value": u_value,
+                        "new_u_value": None,
+                        "sap_points": None,
+                        "simulation_config": simulation_config,
+                        "description_simulation": {
+                            "roof-description": new_descriptin,
+                            "roof-energy-eff": new_efficiency
+                        },
+                        **cost_result,
+                        "already_installed": already_installed
+                    }
+                )
 
         self.recommendations = recommendations

From a3e119001c45b554f22fc8da7fa19fc49ae323d2 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 5 Aug 2024 21:20:48 +0100
Subject: [PATCH 065/182] added simulation_config convention to floor
 recommendations

---
 backend/Property.py                     | 29 +++++++------------------
 recommendations/FloorRecommendations.py | 20 +++++++++++++++--
 2 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index b8563b87..1e241b04 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -478,26 +478,6 @@ class Property:
         for recommendation in recommendations:
             # For the list of recommendations we have, we iteratively update the output
 
-            # Update description to indicate it's insulate
-            if recommendation["type"] in [
-                "solid_floor_insulation",
-                "suspended_floor_insulation",
-                "exposed_floor_insulation",
-            ]:
-                if len(recommendation["parts"]) > 1:
-                    raise NotImplementedError(
-                        "Have more than 1 floor insulation part - handle this case"
-                    )
-
-                # We don't really see above average for this in the training data
-                output["floor_insulation_thickness_ending"] = "average"
-            else:
-                if output["floor_thermal_transmittance_ending"] is None:
-                    raise ValueError("We should not have a None value for the u value")
-
-                if output["floor_insulation_thickness_ending"] is None:
-                    output["floor_insulation_thickness_ending"] = "none"
-
             if recommendation["type"] == "sealing_open_fireplace":
                 output["number_open_fireplaces_ending"] = 0
 
@@ -542,6 +522,7 @@ class Property:
                 "heating", "hot_water_tank_insulation", "heating_control", "secondary_heating",
                 "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation",
                 "cylinder_thermostat", "loft_insulation", "room_roof_insulation", "flat_roof_insulation",
+                "solid_floor_insulation", "suspended_floor_insulation",
             ]:
                 # We update the data, as defined in the recommendaton
                 if output["walls_insulation_thickness_ending"] is None:
@@ -556,6 +537,12 @@ class Property:
                 if output["roof_thermal_transmittance_ending"] is None:
                     raise ValueError("We should not have a None value for the u value")
 
+                if output["floor_thermal_transmittance_ending"] is None:
+                    raise ValueError("We should not have a None value for the u value")
+
+                if output["floor_insulation_thickness_ending"] is None:
+                    output["floor_insulation_thickness_ending"] = "none"
+
                 simulation_config = recommendation["simulation_config"]
                 # If any entries in simulation_config are None, we will set them to "Unknown" which is the cleaning
                 # value
@@ -572,7 +559,7 @@ class Property:
                 "sealing_open_fireplace", "low_energy_lighting",
                 "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation",
                 "loft_insulation", "room_roof_insulation", "flat_roof_insulation",
-                "solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation",
+                "solid_floor_insulation", "suspended_floor_insulation",
                 "windows_glazing", "solar_pv", "heating", "hot_water_tank_insulation",
                 "heating_control", "secondary_heating", "cylinder_thermostat"
             ]:
diff --git a/recommendations/FloorRecommendations.py b/recommendations/FloorRecommendations.py
index 9faedb89..5a8ad242 100644
--- a/recommendations/FloorRecommendations.py
+++ b/recommendations/FloorRecommendations.py
@@ -8,9 +8,10 @@ from datatypes.enums import QuantityUnits
 from backend.Property import Property
 from recommendations.recommendation_utils import (
     r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns, update_lowest_selected_u_value,
-    get_recommended_part, get_floor_u_value, override_costs
+    get_recommended_part, get_floor_u_value, override_costs, check_simulation_difference
 )
 from recommendations.Costs import Costs
+from etl.epc_clean.epc_attributes.FloorAttributes import FloorAttributes
 
 
 class FloorRecommendations(Definitions):
@@ -118,7 +119,7 @@ class FloorRecommendations(Definitions):
         if u_value < self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
             return
 
-        if self.property.floor["is_suspended"]:
+        if self.property.floor["is_suspended"] or self.property.floor["is_to_unheated_space"]:
             # Given the U-value, we recommend underfloor insulation
             self.recommend_floor_insulation(
                 phase=phase,
@@ -197,6 +198,8 @@ class FloorRecommendations(Definitions):
                         if already_installed:
                             cost_result = override_costs(cost_result)
 
+                        new_description = "Suspended, insulated"
+
                     elif material["type"] == "solid_floor_insulation":
                         cost_result = self.costs.solid_floor_insulation(
                             insulation_floor_area=self.property.insulation_floor_area,
@@ -207,9 +210,21 @@ class FloorRecommendations(Definitions):
                         already_installed = "solid_floor_insulation" in self.property.already_installed
                         if already_installed:
                             cost_result = override_costs(cost_result)
+
+                        new_description = "Solid, insulated"
                     else:
                         raise NotImplementedError("Implement me!")
 
+                    floor_ending_config = FloorAttributes(new_description).process()
+                    floor_simulation_config = check_simulation_difference(
+                        new_config=floor_ending_config, old_config=self.property.floor, prefix="floor_"
+                    )
+
+                    simulation_config = {
+                        **floor_simulation_config,
+                        "floor_thermal_transmittance_ending": new_u_value,
+                    }
+
                     self.recommendations.append(
                         {
                             "phase": phase,
@@ -227,6 +242,7 @@ class FloorRecommendations(Definitions):
                             "new_u_value": new_u_value,
                             "sap_points": None,
                             "already_installed": already_installed,
+                            "simulation_config": simulation_config,
                             "description_simulation": {
                                 "floor-description": "Solid, insulated" if
                                 material["type"] == "solid_floor_insulation"

From 891545804e855c9262045711647ee20be0a53853 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 6 Aug 2024 17:04:58 +0100
Subject: [PATCH 066/182] simplified extraction of costs and kwh predictions

---
 backend/Property.py                        | 166 +++++++++------------
 backend/app/config.py                      |   6 +-
 backend/app/plan/router.py                 |  38 +++--
 backend/ml_models/api.py                   |  34 +++--
 etl/bill_savings/EnergyConsumptionModel.py |  11 +-
 etl/customers/newhaven/newhaven_study.py   |  49 +++++-
 recommendations/WallRecommendations.py     |   4 +
 7 files changed, 174 insertions(+), 134 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 1e241b04..600e9b03 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -235,12 +235,9 @@ class Property:
 
     def parse_kwargs(self, kwargs):
         # We extract the elements from kwargs that we recognise. Anything additional is ignored
-        self.n_bathrooms = kwargs.get("n_bathrooms", None)
-        self.n_bedrooms = kwargs.get("n_bedrooms", None)
-        self.building_id = kwargs.get("building_id", None)
-        self.number_of_floors = kwargs.get("number_of_floors", None)
-        self.insulation_floor_area = kwargs.get("insulation_floor_area", None)
-        self.insulation_wall_area = kwargs.get("insulation_wall_area", None)
+        for arg, val in kwargs.items():
+            if val is not None:
+                setattr(self, arg, val)
 
     def create_base_difference_epc_record(self, cleaned_lookup: dict):
         """
@@ -574,7 +571,8 @@ class Property:
     def get_components(
         self,
         cleaned,
-        energy_consumption_client
+        energy_consumption_client,
+        kwh_predictions
     ):
         """
         Given the cleaning that has been performed, we'll use this to identify the property
@@ -582,6 +580,7 @@ class Property:
         :param cleaned: This is the dictionary of components found in cleaner.cleaned
         :param energy_consumption_client: Contains the heating and hot water kwh models - used to predict current
                                         energy annual consumption in kWh
+        :param kwh_predictions: Contains the kwh predictions for heating and hot water
         :return:
         """
 
@@ -646,7 +645,7 @@ class Property:
         self.set_windows_count()
         self.set_energy_source()
         self.find_energy_sources()
-        self.set_current_energy_bill(energy_consumption_client)
+        self.set_current_energy_bill(energy_consumption_client, kwh_predictions)
 
     def set_solar_panel_configuration(
         self, solar_panel_configuration, roof_area
@@ -659,7 +658,7 @@ class Property:
         # We also set the roof area
         self.roof_area = roof_area
 
-    def set_current_energy_bill(self, energy_consumption_client):
+    def set_current_energy_bill(self, energy_consumption_client, kwh_predictions):
         """
         Given what we know about the property now, estimates the current energy consumption using the UCL paper
         https://www.sciencedirect.com/science/article/pii/S0378778823002542
@@ -687,97 +686,86 @@ class Property:
         # If we have the kwh figures, we don't need to predict them
         condition_data = self.energy_assessment_condition_data.copy()
 
-        scoring_df = pd.DataFrame([self.epc_record.prepared_epc])
-        # Change columns from underscores to hyphens
-        scoring_df.columns = [
-            x.lower().replace("_", "-") for x in scoring_df.columns
-        ]
-        for col in ["heating_kwh", "hot_water_kwh"]:
-            scoring_df[col] = None
-
-        energy_consumption_client.data = None
+        # scoring_df = pd.DataFrame([self.epc_record.prepared_epc])
+        # # Change columns from underscores to hyphens
+        # scoring_df.columns = [
+        #     x.lower().replace("_", "-") for x in scoring_df.columns
+        # ]
+        # for col in ["heating_kwh", "hot_water_kwh"]:
+        #     scoring_df[col] = None
+        #
+        # energy_consumption_client.data = None
+        heating_kwh_predictions = kwh_predictions["heating_kwh_predictions"]
+        hotwater_kwh_predictions = kwh_predictions["hotwater_kwh_predictions"]
 
         heating_prediction = (
-            float(condition_data["space_heating_kwh"]) if condition_data.get("space_heating_kwh") is not None
-            else energy_consumption_client.score_new_data(
-                new_data=scoring_df, target="heating_kwh"
-            )[0]
+            condition_data.get("space_heating_kwh") if condition_data.get("space_heating_kwh") is not None else
+            heating_kwh_predictions[
+                heating_kwh_predictions["id"].astype(int) == self.uprn
+                ]["predictions"].values[0]
         )
 
+        # heating_prediction = (
+        #     float(condition_data["space_heating_kwh"]) if condition_data.get("space_heating_kwh") is not None
+        #     else energy_consumption_client.score_new_data(
+        #         new_data=scoring_df, target="heating_kwh"
+        #     )[0]
+        # )
+
         hot_water_prediction = (
-            float(condition_data["water_heating_kwh"]) if condition_data.get("water_heating_kwh") is not None
-            else energy_consumption_client.score_new_data(
-                new_data=scoring_df, target="hot_water_kwh"
-            )[0]
+            condition_data.get("water_heating_kwh") if condition_data.get("water_heating_kwh") is not None else
+            hotwater_kwh_predictions[
+                hotwater_kwh_predictions["id"].astype(int) == self.uprn
+                ]["predictions"].values[0]
         )
 
+        # hot_water_prediction = (
+        #     float(condition_data["water_heating_kwh"]) if condition_data.get("water_heating_kwh") is not None
+        #     else energy_consumption_client.score_new_data(
+        #         new_data=scoring_df, target="hot_water_kwh"
+        #     )[0]
+        # )
+
         # We convert the lighting cost into kwh, just using the price cap
-        lighting_kwh = float(self.data["lighting-cost-current"]) / AnnualBillSavings.ELECTRICITY_PRICE_CAP
+        lighting_kwh = todays_lighting_cost / AnnualBillSavings.ELECTRICITY_PRICE_CAP
 
         appliances_kwh = AnnualBillSavings.estimate_appliances_energy_use(total_floor_area=self.floor_area)
 
-        adjusted_heating_kwh = AnnualBillSavings.adjust_energy_to_metered(
-            epc_energy=heating_prediction,
-            current_epc_rating=self.data["current-energy-rating"],
-        )
+        unadjusted_kwh_estimates = {
+            "heating": heating_prediction,
+            "hot_water": hot_water_prediction,
+            "lighting": lighting_kwh,
+            "appliances": appliances_kwh
+        }
 
-        adjusted_hot_water_kwh = AnnualBillSavings.adjust_energy_to_metered(
-            epc_energy=hot_water_prediction,
-            current_epc_rating=self.data["current-energy-rating"],
-        )
+        adjusted_kwh_estimates = {
+            k: AnnualBillSavings.adjust_energy_to_metered(
+                epc_energy=v,
+                current_epc_rating=self.data["current-energy-rating"],
+            ) for k, v in unadjusted_kwh_estimates.items()
+        }
 
-        adjusted_lighting_kwh = AnnualBillSavings.adjust_energy_to_metered(
-            epc_energy=lighting_kwh,
-            current_epc_rating=self.data["current-energy-rating"],
-        )
+        unadjusted_heating_costs = {
+            "heating": todays_heating_cost,
+            "hot_water": todays_hot_water_cost,
+            "lighting": todays_lighting_cost,
+            "appliances": appliances_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP
+        }
 
-        adjusted_applicances_kwh = AnnualBillSavings.adjust_energy_to_metered(
-            epc_energy=appliances_kwh,
-            current_epc_rating=self.data["current-energy-rating"],
-        )
-
-        # Adjust today's cost figures with the UCL model
-        adjusted_heating_cost = AnnualBillSavings.adjust_energy_to_metered(
-            epc_energy=todays_heating_cost,
-            current_epc_rating=self.data["current-energy-rating"],
-        )
-
-        adjusted_hot_water_cost = AnnualBillSavings.adjust_energy_to_metered(
-            epc_energy=todays_hot_water_cost,
-            current_epc_rating=self.data["current-energy-rating"],
-        )
-
-        adjusted_lighting_cost = AnnualBillSavings.adjust_energy_to_metered(
-            epc_energy=todays_lighting_cost,
-            current_epc_rating=self.data["current-energy-rating"],
-        )
-
-        adjusted_appliances_cost = AnnualBillSavings.adjust_energy_to_metered(
-            epc_energy=appliances_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP,
-            current_epc_rating=self.data["current-energy-rating"],
-        )
+        adjusted_heating_costs = {
+            k: AnnualBillSavings.adjust_energy_to_metered(
+                epc_energy=v,
+                current_epc_rating=self.data["current-energy-rating"],
+            ) for k, v in unadjusted_heating_costs.items()
+        }
 
         # Sum up the adjusted kwh figures
-        self.current_adjusted_energy = (
-            adjusted_heating_kwh + adjusted_hot_water_kwh + adjusted_lighting_kwh + adjusted_applicances_kwh
-        )
-        self.current_energy_bill = (
-            adjusted_heating_cost + adjusted_hot_water_cost + adjusted_lighting_cost + adjusted_appliances_cost
-        )
+        self.current_adjusted_energy = sum(list(adjusted_kwh_estimates.values()))
+        self.current_energy_bill = sum(list(adjusted_heating_costs.values()))
 
         self.energy_cost_estimates = {
-            "adjusted": {
-                "heating": adjusted_heating_cost,
-                "hot_water": adjusted_hot_water_cost,
-                "lighting": adjusted_lighting_cost,
-                "appliances": adjusted_appliances_cost
-            },
-            "unadjusted": {
-                "heating": todays_heating_cost,
-                "hot_water": todays_hot_water_cost,
-                "lighting": todays_lighting_cost,
-                "appliances": appliances_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP
-            },
+            "adjusted": adjusted_heating_costs,
+            "unadjusted": unadjusted_heating_costs,
             "epc": {
                 "heating": float(self.data["heating-cost-current"]),
                 "hot_water": float(self.data["hot-water-cost-current"]),
@@ -786,18 +774,8 @@ class Property:
         }
 
         self.energy_consumption_estimates = {
-            "adjusted": {
-                "heating": adjusted_heating_kwh,
-                "hot_water": adjusted_hot_water_kwh,
-                "lighting": adjusted_lighting_kwh,
-                "appliances": adjusted_applicances_kwh
-            },
-            "unadjusted": {
-                "heating": heating_prediction,
-                "hot_water": hot_water_prediction,
-                "lighting": lighting_kwh,
-                "appliances": appliances_kwh
-            }
+            "adjusted": adjusted_kwh_estimates,
+            "unadjusted": unadjusted_kwh_estimates
         }
 
     def set_spatial(self, spatial: pd.DataFrame):
diff --git a/backend/app/config.py b/backend/app/config.py
index f80da387..b5ea72fe 100644
--- a/backend/app/config.py
+++ b/backend/app/config.py
@@ -30,6 +30,8 @@ class Settings(BaseSettings):
     LIGHTING_COST_PREDICTIONS_BUCKET: str
     HEATING_COST_PREDICTIONS_BUCKET: str
     HOT_WATER_COST_PREDICTIONS_BUCKET: str
+    HEATING_KWH_PREDICTIONS_BUCKET: str
+    HOTWATER_KWH_PREDICTIONS_BUCKET: str
 
     class Config:
         env_file = "backend/.env"
@@ -48,5 +50,7 @@ def get_prediction_buckets():
         "carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET,
         "lighting_cost_predictions": get_settings().LIGHTING_COST_PREDICTIONS_BUCKET,
         "heating_cost_predictions": get_settings().HEATING_COST_PREDICTIONS_BUCKET,
-        "hot_water_cost_predictions": get_settings().HOT_WATER_COST_PREDICTIONS_BUCKET
+        "hot_water_cost_predictions": get_settings().HOT_WATER_COST_PREDICTIONS_BUCKET,
+        "heating_kwh_predictions": get_settings().HEATING_KWH_PREDICTIONS_BUCKET,
+        "hotwater_kwh_predictions": get_settings().HOTWATER_KWH_PREDICTIONS_BUCKET,
     }
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index d4b2a9a5..b4d5c774 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -432,24 +432,20 @@ async def trigger_plan(body: PlanTriggerRequest):
             environment=get_settings().ENVIRONMENT
         )
 
-        epcs_for_scoring = pd.DataFrame([energy_consumption_client.prepare_new_data(p) for p in input_properties])
-        # What do we need?
-        # We need an estimate of each properties energy consumption now, as well as the cost of heating and hot water
-        # The newest EPC may have been done quite some time ago, and so we should take this into consideration when
-        # producing the estimate for cost. With that said, we already have a methodology which will re-map the cost
-        # when the EPC was produced to a cost for today, however could we use the ML models.
-        # In theory, we could just score the kwh models via the API, pass the results into the get_components function
-        # and insert the kwh figures into the property and we're done
-        # TODO: Need to check if we need to re-map when scoring new data or not
+        model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
 
-        # We need to prepare the EPC so it's in the same format as the training data
-        # TODO: DELETE ME
-        # from utils.s3 import read_dataframe_from_s3_parquet
-        # train = read_dataframe_from_s3_parquet(
-        #     bucket_name="retrofit-data-dev",
-        #     file_key="energy_consumption/2024-07-08/energy_consumption_dataset.parquet"
-        # )
-        # We need to prepare the EPC so it's in the same format as the training data
+        epcs_for_scoring = energy_consumption_client.prepare_new_data(input_properties)
+
+        # prepare the data
+
+        # TODO: Some junk is being returned by the heating kwh model!
+        kwh_predictions = model_api.predict_all(
+            df=epcs_for_scoring,
+            bucket=get_settings().DATA_BUCKET,
+            prediction_buckets=get_prediction_buckets(),
+            model_prefixes=["heating_kwh_predictions", "hotwater_kwh_predictions"],
+            extract_ids=False
+        )
 
         # TODO: Move this/tidy it up
         uprn_map = {}
@@ -478,7 +474,11 @@ async def trigger_plan(body: PlanTriggerRequest):
         for p in tqdm(input_properties):
             if p.spatial is None:
                 raise Exception("Missed setting of spatial data for a property")
-            p.get_components(cleaned=cleaned, energy_consumption_client=energy_consumption_client)
+            p.get_components(
+                cleaned=cleaned,
+                # energy_consumption_client=energy_consumption_client  # TODO: Full remove me
+                kwh_predictions=kwh_predictions
+            )
 
         logger.info("Performing solar analysis")
         # TODO: Tidy this up
@@ -663,8 +663,6 @@ async def trigger_plan(body: PlanTriggerRequest):
                      "carbon_ending"]
         )
 
-        model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
-
         all_predictions = model_api.predictions_template()
         to_loop_over = range(0, recommendations_scoring_data.shape[0], SCORING_BATCH_SIZE)
         for chunk in tqdm(to_loop_over, total=len(to_loop_over)):
diff --git a/backend/ml_models/api.py b/backend/ml_models/api.py
index 4844d7fd..e4a0715f 100644
--- a/backend/ml_models/api.py
+++ b/backend/ml_models/api.py
@@ -15,6 +15,8 @@ class ModelApi:
         "lighting_cost_predictions",
         "heating_cost_predictions",
         "hot_water_cost_predictions",
+        "hotwater_kwh_predictions",
+        "heating_kwh_predictions",
     ]
 
     MODEL_URLS = {
@@ -24,6 +26,8 @@ class ModelApi:
         "lighting_cost_predictions": "lightingmodel",
         "heating_cost_predictions": "heatingmodel",
         "hot_water_cost_predictions": "hotwatermodel",
+        "hotwater_kwh_predictions": "hotwaterkwhmodel",
+        "heating_kwh_predictions": "heatingkwhmodel",
     }
 
     def __init__(
@@ -123,7 +127,7 @@ class ModelApi:
         else:
             return None
 
-    def predict_all(self, df, bucket, prediction_buckets) -> dict:
+    def predict_all(self, df, bucket, prediction_buckets, model_prefixes=None, extract_ids=True) -> dict:
 
         """
         For each model prefix, this method will upload the scoring data to s3 and then make a request to the
@@ -133,11 +137,17 @@ class ModelApi:
         :param df:  Pandas dataframe with scoring data to be uploaded to s3
         :param bucket: Name of the bucket in s3 to upload to
         :param prediction_buckets: Dictionary containing the prediction buckets for each model prefix
+        :param model_prefixes: List of model prefixes to generate predictions for. If None, all model prefixes will be
+        used
+        :param extract_ids: Boolean to determine if the property_id and recommendation_id should be extracted from the
+        id column
         :return:
         """
 
+        model_prefixes = self.MODEL_PREFIXES if model_prefixes is None else model_prefixes
+
         predictions = {}
-        for model_prefix in self.MODEL_PREFIXES:
+        for model_prefix in model_prefixes:
             logger.info(f"Scoring for model prefix: {model_prefix}")
             file_location = self.upload_scoring_data(df, bucket, model_prefix)
             response = self.predict(
@@ -155,15 +165,17 @@ class ModelApi:
             )
 
             predictions_df['predictions'] = predictions_df["predictions"].astype(float).round(1)
-            predictions_df[['property_id', 'recommendation_id']] = predictions_df['id'].str.split('+', expand=True)
-            # To grab the phase, we pull the integer after "phase=" in the recommendation_id. We can do this with a
-            # string split on phase= and then grab the second element of the resulting list. We could also use a
-            # regular expression to do this but we use the string split method here, for safety.
-            # We may not always have a phase to split on, so we need to handle this case. We can do this by using the
-            # str[1] method to grab the second element of the resulting list. We then grab the first character of this
-            # string to get the phase. We then convert this to an integer.
-            # Convert back to int
-            predictions_df['phase'] = predictions_df['recommendation_id'].apply(self.extract_phase)
+            if extract_ids:
+                predictions_df[['property_id', 'recommendation_id']] = predictions_df['id'].str.split('+', expand=True)
+                # To grab the phase, we pull the integer after "phase=" in the recommendation_id. We can do this with a
+                # string split on phase= and then grab the second element of the resulting list. We could also use a
+                # regular expression to do this but we use the string split method here, for safety.
+                # We may not always have a phase to split on, so we need to handle this case. We can do this by using
+                # the str[1] method to grab the second element of the resulting list. We then grab the first
+                # character of this
+                # string to get the phase. We then convert this to an integer.
+                # Convert back to int
+                predictions_df['phase'] = predictions_df['recommendation_id'].apply(self.extract_phase)
 
             predictions[model_prefix] = predictions_df
 
diff --git a/etl/bill_savings/EnergyConsumptionModel.py b/etl/bill_savings/EnergyConsumptionModel.py
index 5922177e..01dcce7a 100644
--- a/etl/bill_savings/EnergyConsumptionModel.py
+++ b/etl/bill_savings/EnergyConsumptionModel.py
@@ -508,7 +508,7 @@ class EnergyConsumptionModel:
         return prediction
 
     @staticmethod
-    def prepare_new_data(p: Property):
+    def _prepare_new_data(p: Property):
         """
         Given an instance of the property class, this method will ensure that the EPC is ready for scoring with the
         kwh models. In the backend, we perform some cleaning and transformation on an EPC so we just ensure that the
@@ -558,6 +558,15 @@ class EnergyConsumptionModel:
 
         return epc
 
+    def prepare_new_data(self, input_properties: list[Property]):
+        scoring_data = pd.DataFrame([self._prepare_new_data(p) for p in input_properties])
+        scoring_data["lodgement-year"] = pd.to_datetime(scoring_data["lodgement-date"]).dt.year
+        scoring_data["lodgement-month"] = pd.to_datetime(scoring_data["lodgement-date"]).dt.month
+
+        scoring_data["id"] = scoring_data["uprn"].copy()
+
+        return scoring_data
+
     @staticmethod
     def calculate_percentage_decrease(start_efficiency, end_efficiency, consumption_averages):
 
diff --git a/etl/customers/newhaven/newhaven_study.py b/etl/customers/newhaven/newhaven_study.py
index 7c53405f..4092dd87 100644
--- a/etl/customers/newhaven/newhaven_study.py
+++ b/etl/customers/newhaven/newhaven_study.py
@@ -30,6 +30,8 @@ def make_asset_list():
 
     epc_data = epc_data[~pd.isnull(epc_data["uprn"])]
     epc_data["uprn"] = epc_data["uprn"].astype(int).astype(str)
+    # Take the newest EPC per uprn
+    epc_data = epc_data.sort_values("lodgement-date").groupby("uprn").last().reset_index()
     # /Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/Data/
     # We read in the multiple data sources
     address_base = pd.read_csv(
@@ -72,7 +74,7 @@ def make_asset_list():
     ].merge(
         epc_data[
             ["uprn", "current-energy-efficiency", "current-energy-rating", "address1", "postcode", "floor-height",
-             "property-type", "built-form"]],
+             "property-type", "built-form", "co2-emissions-current"]],
         how="left",
         left_on="UPRN",
         right_on="uprn"
@@ -86,6 +88,21 @@ def make_asset_list():
         columns={"Wall Area [m^2]": "insulation_wall_area", "Building Area [m^2]": "floor_area"}
     )
 
+    had_an_epc = asset_list[~pd.isnull(asset_list["current-energy-efficiency"])]
+    below_b = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 80].shape
+    below_c = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 69].shape
+    had_an_epc["energy-efficiency-rating"].value_counts()
+    asset_list["current-energy-rating"].value_counts()
+    asset_list["co2-emissions-current"].mean()
+    # Get the underlying data of a histograme
+    import matplotlib.pyplot as plt
+    n, bins, patches = plt.hist(asset_list["co2-emissions-current"], bins=100, color="blue", alpha=0.7)
+
+    bins = np.arange(0, asset_list["co2-emissions-current"].max(), 1)  # Bins from 50 to 150 with a step of 10
+
+    # Step 3: Calculate the frequency of data in each bin
+    hist, bin_edges = np.histogram(asset_list["co2-emissions-current"], bins=bins)
+
     # Take properties below a B - there are 2844 units
     asset_list = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 80]
     # Drop caravans
@@ -235,8 +252,7 @@ def make_asset_list():
         file_name=non_invasive_recommendations_filename
     )
 
-    # Create two scenarios
-    # Scenario A
+    # Create three scenarios
     body1 = {
         "portfolio_id": str(PORTFOLIO_ID),
         "housing_type": "Private",
@@ -246,14 +262,16 @@ def make_asset_list():
         "already_installed_file_path": "",
         "patches_file_path": "",
         "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
-        "scenario_name": "Fabric - no solid wall",
+        "scenario_name": "Demand Reduction - no solid wall",
         "multi_plan": True,
-        "exclusions": ["internal_wall_insulation", "external_wall_insulation", "floor_insulation"],
+        "exclusions": [
+            "internal_wall_insulation", "external_wall_insulation", "floor_insulation", "heating", "solar_pv"
+        ],
         "budget": None,
     }
     print(body1)
 
-    # Scenario B - deep fabric, no exclusions
+    # Scenario B
     body2 = {
         "portfolio_id": str(PORTFOLIO_ID),
         "housing_type": "Private",
@@ -263,8 +281,25 @@ def make_asset_list():
         "already_installed_file_path": "",
         "patches_file_path": "",
         "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
-        "scenario_name": "Deep Fabric",
+        "scenario_name": "Demand Reduction, Heating Systems, Solar PV - no solid wall",
         "multi_plan": True,
+        "exclusions": ["internal_wall_insulation", "external_wall_insulation", "floor_insulation"],
         "budget": None,
     }
     print(body2)
+
+    # Scenario C - deep fabric, no exclusions
+    body3 = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increasing EPC",
+        "goal_value": "A",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+        "scenario_name": "Whole House",
+        "multi_plan": True,
+        "budget": None,
+    }
+    print(body3)
diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py
index edfc4d66..569d7bcb 100644
--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@@ -61,10 +61,12 @@ class WallRecommendations(Definitions):
         "Cavity wall, as built, insulated": "Cavity wall, filled cavity and external insulation",
         "Solid brick, as built, no insulation": "Solid brick, with external insulation",
         "Solid brick, as built, insulated": "Solid brick, with external insulation",
+        "Solid brick, as built, partial insulation": "Solid brick, with external insulation",
         "Cob, as built": "Cob, with external insulation",
         "System built, as built, no insulation": "System built, with external insulation",
         "Granite or whinstone, as built, no insulation": 'Granite or whinstone, with external insulation',
         "Timber frame, as built, no insulation": "Timber frame, with external insulation",
+        'Timber frame, as built, partial insulation': 'Timber frame, with external insulation',
     }
 
     # These are the ending descriptions we consider for walls with internal insulation
@@ -72,10 +74,12 @@ class WallRecommendations(Definitions):
         "Cavity wall, as built, insulated": "Cavity wall, filled cavity and internal insulation",
         "Solid brick, as built, no insulation": "Solid brick, with internal insulation",
         "Solid brick, as built, insulated": "Solid brick, with internal insulation",
+        "Solid brick, as built, partial insulation": "Solid brick, with internal insulation",
         "Cob, as built": "Cob, with internal insulation",
         "System built, as built, no insulation": "System built, with internal insulation",
         "Granite or whinstone, as built, no insulation": 'Granite or whinstone, with internal insulation',
         "Timber frame, as built, no insulation": "Timber frame, with internal insulation",
+        'Timber frame, as built, partial insulation': 'Timber frame, with internal insulation',
     }
 
     def __init__(

From db3ab9bb4a0ed883561592b0c2e9f4972e046065 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 7 Aug 2024 15:04:19 +0100
Subject: [PATCH 067/182] refactoring calculate_recommendation_impact

---
 backend/Property.py                |   2 +-
 backend/app/plan/router.py         | 137 ++++++-
 backend/ml_models/api.py           |   6 +-
 recommendations/Recommendations.py | 587 ++++++++---------------------
 4 files changed, 285 insertions(+), 447 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 600e9b03..25068f6c 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -1025,7 +1025,7 @@ class Property:
             built_form=self.data["built-form"],
         )
 
-        if self.insulation_floor_area is not None:
+        if self.insulation_floor_area is None:
             self.insulation_floor_area = float(
                 self.energy_assessment_condition_data["main_dwelling_ground_floor_area"]
             ) if (condition_data.get("main_dwelling_ground_floor_area") is not None) else (
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index b4d5c774..fb4ffa14 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -438,7 +438,120 @@ async def trigger_plan(body: PlanTriggerRequest):
 
         # prepare the data
 
-        # TODO: Some junk is being returned by the heating kwh model!
+        # TODO - this needs to be moved to the etl process
+        import numpy as np
+        def add_features_from_code(df):
+
+            FEATURES = {
+                "heating_kwh": [
+                    "lodgement-year", "lodgement-month", "current-energy-efficiency", "energy-consumption-current",
+                    "heating-cost-current", "heating-cost-potential", "total-floor-area", "number-heated-rooms",
+                    "mainheat-description", "mainheat-energy-eff", "main-fuel", "secondheat-description",
+                    "property-type",
+                    "built-form", "mainheatcont-description", "hotwater-description", "hot-water-energy-eff",
+                    "walls-energy-eff",
+                    "roof-energy-eff", "windows-description", "windows-energy-eff", "floor-description",
+                    "flat-top-storey",
+                    "flat-storey-count", "unheated-corridor-length", "solar-water-heating-flag",
+                    "mechanical-ventilation",
+                    "low-energy-lighting", "environment-impact-current", "energy-tariff",
+                    "county", "construction-age-band", "co2-emissions-current",
+                ],
+                "hot_water_kwh": [
+                    "lodgement-year", "lodgement-month",
+                    "current-energy-efficiency",
+                    "energy-consumption-current",
+                    "hot-water-cost-current",
+                    "total-floor-area", "number-heated-rooms",
+                    "hotwater-description", "hot-water-energy-eff", "main-fuel", "property-type", "built-form",
+                    "co2-emissions-current",
+                ]
+            }
+            CATEGORICAL_COLUMNS = [
+                "lodgement-year", "lodgement-month", "main-fuel", "mainheat-description", "number-heated-rooms",
+                "number-habitable-rooms", "mainheat-energy-eff", "mainheatcont-description", "property-type",
+                "built-form",
+                "construction-age-band", "secondheat-description", "hotwater-description", "hot-water-energy-eff",
+                "walls-description", "walls-energy-eff", "roof-description", "roof-energy-eff", "floor-description",
+                "county",
+                "windows-description", "windows-energy-eff", "flat-top-storey",
+                "flat-storey-count", "unheated-corridor-length", "solar-water-heating-flag", "mechanical-ventilation",
+                "low-energy-lighting", "environment-impact-current", "energy-tariff", "current-energy-rating"
+            ]
+
+            NUMERICAL_COLUMNS = list({
+                x for x in FEATURES["heating_kwh"] + FEATURES["hot_water_kwh"]
+                if x not in CATEGORICAL_COLUMNS
+            })
+
+            """Performs feature engineering on the dataset."""
+            df["lodgement-date"] = pd.to_datetime(df["lodgement-date"])
+            df["lodgement-year"] = df["lodgement-date"].dt.year
+            df["lodgement-month"] = df["lodgement-date"].dt.month
+
+            # For walls, roof, floor description where we have average thermal transmittance, to avoid too many
+            # categories
+            # we group them
+            ranges = {
+                "lessthan 0.1": (0, 0.1),
+                "0.1 - 0.3": (0.1, 0.3),
+                "0.3 - 0.5": (0.3, 0.5),
+                "morethan 0.5": (0.5, 2.5),
+            }
+
+            # Generate the lookup table
+            thermal_transmittance_lookup_table = []
+            for i in range(1, 251):
+                value = i / 100
+                for label, (low, high) in ranges.items():
+                    if low < value <= high:
+                        thermal_transmittance_lookup_table.append({"from": value, "to": label})
+                        break
+
+            # Convert to DataFrame for display
+            thermal_transmittance_lookup_table = pd.DataFrame(thermal_transmittance_lookup_table)
+            thermal_transmittance_lookup_table["from"] = thermal_transmittance_lookup_table["from"].astype(str)
+
+            # Apply the lookup table to the data
+            for feature in ["walls-description", "roof-description", "floor-description"]:
+                cleaned_df = pd.DataFrame(cleaned[feature])[["original_description", "thermal_transmittance"]]
+                # Round to 2 decimal places and convert to string
+                cleaned_df["thermal_transmittance"] = cleaned_df["thermal_transmittance"].round(2).astype(str)
+
+                df = df.merge(
+                    cleaned_df,
+                    how="left",
+                    left_on=feature,
+                    right_on="original_description",
+                )
+                # We now have the thermal transmittance in the data, which we can use to group with the lookup table
+                df = df.merge(
+                    thermal_transmittance_lookup_table,
+                    how="left",
+                    left_on="thermal_transmittance",
+                    right_on="from",
+                )
+                # Where "to" is populated, replace feature with to
+                df[feature] = np.where(
+                    ~pd.isnull(df["to"]),
+                    df["to"],
+                    df[feature]
+                )
+                df = df.drop(columns=["original_description", "thermal_transmittance", "from", "to"])
+
+            # Convert data types
+            df[NUMERICAL_COLUMNS] = df[NUMERICAL_COLUMNS].apply(pd.to_numeric)
+            df[CATEGORICAL_COLUMNS] = df[CATEGORICAL_COLUMNS].astype(str)
+
+            return df
+
+        def add_estimate_annual_kwh(df):
+            df['estimate_annual_kwh'] = df['energy-consumption-current'] * df['total-floor-area']
+            return df
+
+        epcs_for_scoring = add_features_from_code(epcs_for_scoring)
+        epcs_for_scoring = add_estimate_annual_kwh(epcs_for_scoring)
+
         kwh_predictions = model_api.predict_all(
             df=epcs_for_scoring,
             bucket=get_settings().DATA_BUCKET,
@@ -476,7 +589,7 @@ async def trigger_plan(body: PlanTriggerRequest):
                 raise Exception("Missed setting of spatial data for a property")
             p.get_components(
                 cleaned=cleaned,
-                # energy_consumption_client=energy_consumption_client  # TODO: Full remove me
+                energy_consumption_client=energy_consumption_client,  # TODO: Full remove me
                 kwh_predictions=kwh_predictions
             )
 
@@ -676,6 +789,12 @@ async def trigger_plan(body: PlanTriggerRequest):
             for key, scored in predictions_dict.items():
                 all_predictions[key] = pd.concat([all_predictions[key], scored])
 
+        # We now produce predictions for the kwh models
+
+        # TODO!!!!! In order to score the kwh models, we need to insert the new SAP, heat demand, carbon, cost
+        #        etc values, into the simulated EPC, otherwise it won't work. We might also want to drop all potential
+        #       columns and env-efficiency columns (POTENTIAL COLUMNS ALREADY GONE, JUST NEED TO DROP ENV EFFICIENCY)
+
         # Insert the predictions into the recommendations and run the optimiser
         # TODO: If a recommendation has a negative impact on SAP, we should remove it - this seems to have become a
         #       possibility with heating system
@@ -686,26 +805,14 @@ async def trigger_plan(body: PlanTriggerRequest):
 
             property_instance = [p for p in input_properties if p.id == property_id][0]
 
-            (
-                recommendations_with_impact,
-                expected_adjusted_energy,
-                expected_energy_bill
-            ) = (
+            recommendations_with_impact, impact_summary = (
                 Recommendations.calculate_recommendation_impact(
                     property_instance=property_instance,
                     all_predictions=all_predictions,
                     recommendations=recommendations,
-                    representative_recommendations=representative_recommendations,
-                    energy_consumption_client=energy_consumption_client
                 )
             )
 
-            # Store the resulting adjusted energy in the property instance
-            property_instance.set_adjusted_energy(
-                expected_adjusted_energy=expected_adjusted_energy,
-                expected_energy_bill=expected_energy_bill
-            )
-
             input_measures = prepare_input_measures(recommendations_with_impact, body.goal)
 
             current_sap_points = int(property_instance.data["current-energy-efficiency"])
diff --git a/backend/ml_models/api.py b/backend/ml_models/api.py
index e4a0715f..0de7977f 100644
--- a/backend/ml_models/api.py
+++ b/backend/ml_models/api.py
@@ -15,8 +15,6 @@ class ModelApi:
         "lighting_cost_predictions",
         "heating_cost_predictions",
         "hot_water_cost_predictions",
-        "hotwater_kwh_predictions",
-        "heating_kwh_predictions",
     ]
 
     MODEL_URLS = {
@@ -72,8 +70,8 @@ class ModelApi:
         :return:
         """
 
-        if model_prefix not in self.MODEL_PREFIXES:
-            raise ValueError(f"Model prefix specified is not in {self.MODEL_PREFIXES}")
+        # if model_prefix not in self.MODEL_PREFIXES:
+        #     raise ValueError(f"Model prefix specified is not in {self.MODEL_PREFIXES}")
 
         # Store parquet file in s3 for scoring
         file_location = f"{model_prefix}/{self.portfolio_id}/{self.timestamp}.parquet"
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 1c12d5eb..0de8931a 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -359,477 +359,210 @@ class Recommendations:
         property_instance,
         all_predictions,
         recommendations,
-        representative_recommendations,
-        energy_consumption_client
     ):
 
         """
         Given predictions from the model apis, with method will update the recommendations with the predicted
         impact of the recommendation on the property
 
+        This function will return two objects:
+        1) Updated recommendations with the predicted impact of the recommendation
+        2) A list of impacts by phase, which will be used for the kwh model scoring
+
         :param property_instance: Instance of the Property class, for the home associated to property_id
         :param all_predictions: dictionary of predictions from the model apis
         :param recommendations: dictionary of recommendations for the property
-        :param representative_recommendations: dictionary of representative recommendations for the property
-        :param energy_consumption_client: Instance of the EnergyConsumptionClient class
         :return:
         """
 
-        property_sap_predictions = all_predictions["sap_change_predictions"][
-            all_predictions["sap_change_predictions"]["property_id"] == str(property_instance.id)
-            ].copy()
-        property_heat_predictions = all_predictions["heat_demand_predictions"][
-            all_predictions["heat_demand_predictions"]["property_id"] == str(property_instance.id)
-            ].copy()
-        property_carbon_predictions = all_predictions["carbon_change_predictions"][
-            all_predictions["carbon_change_predictions"]["property_id"] == str(property_instance.id)
-            ].copy()
-        property_lighting_cost_predictions = all_predictions["lighting_cost_predictions"][
-            all_predictions["lighting_cost_predictions"]["property_id"] == str(property_instance.id)
-            ].copy()
-        property_heating_cost_predictions = all_predictions["heating_cost_predictions"][
-            all_predictions["heating_cost_predictions"]["property_id"] == str(property_instance.id)
-            ].copy()
-        property_hot_water_cost_predictions = all_predictions["hot_water_cost_predictions"][
-            all_predictions["hot_water_cost_predictions"]["property_id"] == str(property_instance.id)
-            ].copy()
+        property_predictions = {
+            prefix + "_predictions": all_predictions[prefix + "_predictions"][
+                all_predictions[prefix + "_predictions"]["property_id"] == str(property_instance.id)
+                ].copy() for prefix in [
+                "sap_change", "heat_demand", "carbon_change", "lighting_cost", "heating_cost", "hot_water_cost"
+            ]
+        }
 
         # We apply adjustments to each of the heating costs
-        property_lighting_cost_predictions["adjusted_cost"] = property_lighting_cost_predictions["predictions"].apply(
-            lambda x: AnnualBillSavings.adjust_energy_to_metered(
-                x, current_epc_rating=property_instance.data["current-energy-rating"]
+        for prefix in ["lighting_cost", "heating_cost", "hot_water_cost"]:
+            property_predictions[f"{prefix}_predictions"]["adjusted_cost"] = (
+                property_predictions[f"{prefix}_predictions"]["predictions"].apply(
+                    lambda x: AnnualBillSavings.adjust_energy_to_metered(
+                        x, current_epc_rating=property_instance.data["current-energy-rating"]
+                    )
+                )
             )
-        )
-
-        property_heating_cost_predictions["adjusted_cost"] = property_heating_cost_predictions["predictions"].apply(
-            lambda x: AnnualBillSavings.adjust_energy_to_metered(
-                x, current_epc_rating=property_instance.data["current-energy-rating"]
-            )
-        )
-
-        property_hot_water_cost_predictions["adjusted_cost"] = property_hot_water_cost_predictions["predictions"].apply(
-            lambda x: AnnualBillSavings.adjust_energy_to_metered(
-                x, current_epc_rating=property_instance.data["current-energy-rating"]
-            )
-        )
 
         property_recommendations = recommendations[property_instance.id].copy()
 
         # We calculate the impact by phase
-        sap_phase_impact = property_sap_predictions.groupby("phase")["predictions"].median().reset_index()
-        heat_phase_impact = property_heat_predictions.groupby("phase")["predictions"].median().reset_index()
-        carbon_phase_impact = property_carbon_predictions.groupby("phase")["predictions"].median().reset_index()
-        # lighting_cost_phase_impact = (
-        #     property_lighting_cost_predictions.groupby("phase")[["adjusted_cost", "predictions"]].median(
-        #     ).reset_index()
-        # )
-        heating_cost_phase_impact = (
-            property_heating_cost_predictions.groupby("phase")[["adjusted_cost", "predictions"]].median().reset_index()
-        )
-        hot_water_cost_phase_impact = (
-            property_hot_water_cost_predictions.groupby("phase")[
-                ["adjusted_cost", "predictions"]
-            ].median().reset_index()
-        )
+        phase_impact = {
+            prefix: property_predictions[prefix + "_predictions"].groupby("phase")["predictions"].median().reset_index()
+            for prefix in [
+                "sap_change", "heat_demand", "carbon_change", "lighting_cost", "heating_cost", "hot_water_cost"
+            ]
+        }
 
-        representative_rec_ids = [
-            rec["recommendation_id"] for rec in representative_recommendations[property_instance.id]
-        ]
+        # TODO: should fabric upgrades have an impact on hot water costs/kwh?
+        # TODO: Generally, the costing models are just increasing. Maybe they're including something in the model
+        #       that they shouldn't e.g. SAP, carbon, heat demand etc?
 
-        phase_lighting_costs = {}
-        phase_kwh_figures = {}
-        bill_savings_list = []
-        kwh_savings_list = []
+        impact_summary = []
         for recommendations_by_type in property_recommendations:
             for rec in recommendations_by_type:
-
                 if rec["type"] == "mechanical_ventilation":
                     # We don't have a percieved sap impact of mechanical ventilation
                     continue
 
-                new_heat_demand = property_heat_predictions[property_heat_predictions["recommendation_id"] == str(
-                    rec["recommendation_id"]
-                )]["predictions"].values[0]
+                phase_energy_efficiency_metrics = {
+                    prefix: property_predictions[prefix + "_predictions"][
+                        property_predictions[prefix + "_predictions"]["recommendation_id"] == str(
+                            rec["recommendation_id"]
+                        )]["predictions"].values[0] for prefix in ["sap_change", "heat_demand", "carbon_change"]
+                }
 
-                new_carbon = property_carbon_predictions[property_carbon_predictions["recommendation_id"] == str(
-                    rec["recommendation_id"]
-                )]["predictions"].values[0]
-
-                new_sap = property_sap_predictions[property_sap_predictions["recommendation_id"] == str(
-                    rec["recommendation_id"]
-                )]["predictions"].values[0]
-
-                # Lighting costs won't change unless we have a lighting recommendation
-                new_lighting_cost_data = property_lighting_cost_predictions[
-                    property_lighting_cost_predictions["recommendation_id"] == str(rec["recommendation_id"])
-                    ]
-
-                new_lighting_cost = new_lighting_cost_data["adjusted_cost"].values[0]
-                new_lighting_cost_unadjusted = new_lighting_cost_data["predictions"].values[0]
-
-                new_heating_cost_data = property_heating_cost_predictions[
-                    property_heating_cost_predictions["recommendation_id"] == str(rec["recommendation_id"])
-                    ]
-
-                new_heating_cost = new_heating_cost_data["adjusted_cost"].values[0]
-                new_heating_cost_unadjusted = new_heating_cost_data["predictions"].values[0]
-
-                new_hot_water_cost_data = property_hot_water_cost_predictions[
-                    property_hot_water_cost_predictions["recommendation_id"] == str(rec["recommendation_id"])
-                    ]
-
-                new_hot_water_cost = new_hot_water_cost_data["adjusted_cost"].values[0]
-                new_hot_water_cost_unadjusted = new_hot_water_cost_data["predictions"].values[0]
+                # For phase costs, we need adusted and unadjusted values
+                phase_cost = {
+                    prefix: property_predictions[prefix + "_predictions"][
+                        property_predictions[prefix + "_predictions"]["recommendation_id"] ==
+                        str(rec["recommendation_id"])
+                        ] for prefix in ["lighting_cost", "heating_cost", "hot_water_cost"]
+                }
 
+                # We structure this so that depending on the phase, we capture the previous phase impacts and
+                # then just have one piece of code to calculate the difference
                 if rec["phase"] == 0:
-                    predicted_sap_points = new_sap - float(property_instance.data["current-energy-efficiency"])
-                    predicted_co2_savings = float(property_instance.data["co2-emissions-current"]) - new_carbon
-                    predicted_heat_demand = property_instance.floor_area * (
-                        float(property_instance.data["energy-consumption-current"]) - new_heat_demand
-                    )
+                    previous_phase_values = {
+                        "sap": float(property_instance.data["current-energy-efficiency"]),
+                        "carbon": float(property_instance.data["co2-emissions-current"]),
+                        "heat_demand": float(property_instance.data["energy-consumption-current"]),
+                    }
 
                     if rec["type"] == "low_energy_lighting":
-                        new_heating_cost = property_instance.energy_cost_estimates["adjusted"]["heating"]
-                        new_hot_water_cost = property_instance.energy_cost_estimates["adjusted"]["hot_water"]
-                        new_lighting_cost = min(
-                            new_lighting_cost, property_instance.energy_cost_estimates["adjusted"]["lighting"]
-                        )
-                        scoring_heating_cost = property_instance.energy_cost_estimates["unadjusted"]["heating"]
-                        scoring_hot_water_cost = property_instance.energy_cost_estimates["unadjusted"]["hot_water"]
-                        scoring_lighting_cost = min(
-                            property_instance.energy_cost_estimates["unadjusted"]["lighting"],
-                            new_lighting_cost_unadjusted
-                        )
-                    else:
-                        new_heating_cost = min(
-                            new_heating_cost, property_instance.energy_cost_estimates["adjusted"]["heating"]
-                        )
-                        new_hot_water_cost = min(
-                            new_hot_water_cost, property_instance.energy_cost_estimates["adjusted"]["hot_water"]
-                        )
-                        new_lighting_cost = property_instance.energy_cost_estimates["adjusted"]["lighting"]
-
-                        scoring_heating_cost = min(
-                            property_instance.energy_cost_estimates["unadjusted"]["heating"],
-                            new_heating_cost_unadjusted
-                        )
-                        scoring_hot_water_cost = min(
-                            property_instance.energy_cost_estimates["unadjusted"]["hot_water"],
-                            new_hot_water_cost_unadjusted
-                        )
-                        scoring_lighting_cost = property_instance.energy_cost_estimates["unadjusted"]["lighting"]
-
-                    predicted_heating_cost_reduction = (
-                        property_instance.energy_cost_estimates["adjusted"]["heating"] - new_heating_cost
-                    )
-                    predicted_hot_water_cost_reduction = (
-                        property_instance.energy_cost_estimates["adjusted"]["hot_water"] - new_hot_water_cost
-                    )
-
-                    predicted_lighting_cost_reduction = 0 if rec["type"] != "lighting" else (
-                        property_instance.energy_cost_estimates["adjusted"]["lighting"] - new_lighting_cost
-                    )
-                    # We store this value for later
-                    phase_lighting_costs[rec["phase"]] = {
-                        "adjusted": new_lighting_cost,
-                        "unadjusted": scoring_lighting_cost
-                    }
-
-                    # We now predict the kwh savings using the xgb model
-
-                    simulation_epc = property_instance.simulation_epcs[rec["phase"]].copy()
-                    # The current heating, hot water and energy kwh should be based on the new, unadjusted
-                    # costs for lighting, heating, hot water
-                    simulation_epc["heating-cost-current"] = int(scoring_heating_cost)
-                    simulation_epc["hot-water-cost-current"] = int(scoring_hot_water_cost)
-                    simulation_epc["lighting-cost-current"] = int(scoring_lighting_cost)
-                    # We predict with the energy consumption model
-                    scoring_df = pd.DataFrame([simulation_epc])
-                    # Change columns from underscores to hyphens
-                    scoring_df.columns = [
-                        x.lower().replace("_", "-") for x in scoring_df.columns
-                    ]
-                    for col in ["heating_kwh", "hot_water_kwh"]:
-                        scoring_df[col] = None
-
-                    energy_consumption_client.data = None
-                    new_heating_kwh = energy_consumption_client.score_new_data(
-                        new_data=scoring_df, target="heating_kwh"
-                    )[0]
-                    new_heating_kwh = 0 if new_heating_kwh < 0 else new_heating_kwh
-
-                    new_hot_water_kwh = energy_consumption_client.score_new_data(
-                        new_data=scoring_df, target="hot_water_kwh"
-                    )[0]
-                    new_hot_water_kwh = 0 if new_hot_water_kwh < 0 else new_hot_water_kwh
-
-                    # Adjust these figures
-                    new_heating_kwh_adjusted = AnnualBillSavings.adjust_energy_to_metered(
-                        new_heating_kwh, current_epc_rating=property_instance.data["current-energy-rating"]
-                    )
-                    new_hot_water_kwh_adjusted = AnnualBillSavings.adjust_energy_to_metered(
-                        new_hot_water_kwh, current_epc_rating=property_instance.data["current-energy-rating"]
-                    )
-
-                    heating_kwh_reduction = 0 if predicted_heating_cost_reduction == 0 else (
-                        property_instance.energy_consumption_estimates["adjusted"]["heating"] - new_heating_kwh_adjusted
-                    )
-
-                    hot_water_kwh_reduction = 0 if predicted_hot_water_cost_reduction == 0 else (
-                        property_instance.energy_consumption_estimates["adjusted"]["hot_water"] -
-                        new_hot_water_kwh_adjusted
-                    )
-
-                    lighting_kwh_reduction = predicted_lighting_cost_reduction / AnnualBillSavings.ELECTRICITY_PRICE_CAP
-
-                    (
-                        predicted_appliances_cost_reduction,
-                        predicted_appliances_kwh_reduction
-                    ) = cls._calculate_appliance_solar_savings(
-                        rec=rec,
-                        property_instance=property_instance,
-                        heating_kwh_reduction=heating_kwh_reduction,
-                        hot_water_kwh_reduction=hot_water_kwh_reduction,
-                        lighting_kwh_reduction=lighting_kwh_reduction
-                    )
-
-                    kwh_reduction = (
-                        heating_kwh_reduction +
-                        hot_water_kwh_reduction +
-                        lighting_kwh_reduction +
-                        predicted_appliances_kwh_reduction
-                    )
-
-                    predicted_bill_savings = (
-                        predicted_heating_cost_reduction +
-                        predicted_hot_water_cost_reduction +
-                        predicted_lighting_cost_reduction +
-                        predicted_appliances_cost_reduction
-                    )
-
-                    phase_kwh_figures[rec["phase"]] = {
-                        "adjusted": {
-                            "heating": new_heating_kwh_adjusted,
-                            "hot_water": new_hot_water_kwh_adjusted
-                        },
-                        "unadjusted": {
-                            "heating": new_heating_kwh,
-                            "hot_water": new_hot_water_kwh
+                        # In this instance, heating cost and hot water cost should not change so we set the previous
+                        # value to the new one, so the difference is zero
+                        previous_phase_unadjusted_costs = {
+                            "unadjusted_heating_cost": phase_cost["heating_cost"]["predictions"].values[0],
+                            "unadjusted_hot_water_cost": phase_cost["hot_water_cost"]["predictions"].values[0],
+                            "unadjusted_lighting_cost": (
+                                property_instance.energy_cost_estimates["unadjusted"]["lighting"]
+                            )
+                        }
+                    else:
+                        # If the recommendaiton is not for low energy lighting, we expect the heating/hot water
+                        # costs to change but not te lighting
+                        previous_phase_unadjusted_costs = {
+                            "unadjusted_heating_cost": property_instance.energy_cost_estimates["adjusted"]["heating"],
+                            "unadjusted_hot_water_cost": (
+                                property_instance.energy_cost_estimates["adjusted"]["hot_water"]
+                            ),
+                            "unadjusted_lighting_cost": phase_cost["lighting_cost"]["predictions"].values[0]
                         }
-                    }
-
                 else:
-                    previous_phase = rec["phase"] - 1
-                    predicted_sap_points = (
-                        new_sap - sap_phase_impact[sap_phase_impact["phase"] == previous_phase]["predictions"].values[0]
-                    )
-                    predicted_co2_savings = (
-                        carbon_phase_impact[carbon_phase_impact["phase"] == previous_phase]["predictions"].values[0] -
-                        new_carbon
-                    )
-                    predicted_heat_demand = property_instance.floor_area * (
-                        heat_phase_impact[heat_phase_impact["phase"] == previous_phase]["predictions"].values[0] -
-                        new_heat_demand
-                    )
-
-                    if rec["type"] == "lighting":
-                        # If we have a lighting recommendation, the heating, hot water and lighting costs will
-                        # be from the previous phase - nothing will change
-                        new_heating_cost = heating_cost_phase_impact[
-                            heating_cost_phase_impact["phase"] == previous_phase
-                            ]["adjusted_cost"].values[0]
-                        new_hot_water_cost = hot_water_cost_phase_impact[
-                            hot_water_cost_phase_impact["phase"] == previous_phase
-                            ]["adjusted_cost"].values[0]
-
-                        new_lighting_cost = min(
-                            new_lighting_cost, phase_lighting_costs[previous_phase]["adjusted"]
-                        )
-                        # We also use the unadjusted costs for the scoring from the previous phase
-                        scoring_heating_cost = heating_cost_phase_impact[
-                            heating_cost_phase_impact["phase"] == previous_phase
-                            ]["predictions"].values[0]
-                        scoring_hot_water_cost = hot_water_cost_phase_impact[
-                            hot_water_cost_phase_impact["phase"] == previous_phase
-                            ]["predictions"].values[0]
-                        scoring_lighting_cost = min(
-                            new_lighting_cost_unadjusted,
-                            phase_lighting_costs[previous_phase]["unadjusted"]
-                        )
-                    else:
-                        # Whereas for other recommendations, we use the new costs
-                        new_heating_cost = min(
-                            new_heating_cost,
-                            heating_cost_phase_impact[
-                                heating_cost_phase_impact["phase"] == previous_phase
-                                ]["adjusted_cost"].values[0]
-                        )
-                        new_hot_water_cost = min(
-                            new_hot_water_cost,
-                            hot_water_cost_phase_impact[
-                                hot_water_cost_phase_impact["phase"] == previous_phase
-                                ]["adjusted_cost"].values[0]
-                        )
-                        new_lighting_cost = phase_lighting_costs[previous_phase]["adjusted"]
-
-                        scoring_heating_cost = min(
-                            new_heating_cost_unadjusted,
-                            heating_cost_phase_impact[
-                                heating_cost_phase_impact["phase"] == previous_phase
-                                ]["predictions"].values[0]
-                        )
-                        scoring_hot_water_cost = min(
-                            new_hot_water_cost_unadjusted,
-                            hot_water_cost_phase_impact[
-                                hot_water_cost_phase_impact["phase"] == previous_phase
-                                ]["predictions"].values[0]
-                        )
-                        scoring_lighting_cost = phase_lighting_costs[previous_phase]["unadjusted"]
-
-                    # We now estimate the adjusted cost savings for the recommendation
-                    predicted_heating_cost_reduction = (
-                        heating_cost_phase_impact[heating_cost_phase_impact["phase"] == previous_phase][
-                            "adjusted_cost"
-                        ].values[0] - new_heating_cost
-                    )
-
-                    predicted_hot_water_cost_reduction = (
-                        hot_water_cost_phase_impact[hot_water_cost_phase_impact["phase"] == previous_phase][
-                            "adjusted_cost"
-                        ].values[0] - new_hot_water_cost
-                    )
-
-                    # Only lighting recommendations can have an impact here
-                    predicted_lighting_cost_reduction = (
-                        phase_lighting_costs[previous_phase]["adjusted"] - new_lighting_cost
-                    )
-
-                    # We now predict the kwh savings using the xgb model - this is based on
-                    # the new costs at this phase
-
-                    simulation_epc = property_instance.simulation_epcs[rec["phase"]].copy()
-                    # The current heating, hot water and energy kwh should be based on the new, unadjusted
-                    # costs for lighting, heating, hot water
-                    simulation_epc["heating-cost-current"] = int(scoring_heating_cost)
-                    simulation_epc["hot-water-cost-current"] = int(scoring_hot_water_cost)
-                    simulation_epc["lighting-cost-current"] = int(scoring_lighting_cost)
-                    # We predict with the energy consumption model
-                    scoring_df = pd.DataFrame([simulation_epc])
-                    # Change columns from underscores to hyphens
-                    scoring_df.columns = [
-                        x.lower().replace("_", "-") for x in scoring_df.columns
-                    ]
-                    for col in ["heating_kwh", "hot_water_kwh"]:
-                        scoring_df[col] = None
-
-                    energy_consumption_client.data = None
-                    new_heating_kwh = energy_consumption_client.score_new_data(
-                        new_data=scoring_df, target="heating_kwh"
-                    )[0]
-
-                    new_hot_water_kwh = energy_consumption_client.score_new_data(
-                        new_data=scoring_df, target="hot_water_kwh"
-                    )[0]
-
-                    # Adjust these figures
-                    new_heating_kwh_adjusted = AnnualBillSavings.adjust_energy_to_metered(
-                        new_heating_kwh, current_epc_rating=property_instance.data["current-energy-rating"]
-                    )
-                    new_hot_water_kwh_adjusted = AnnualBillSavings.adjust_energy_to_metered(
-                        new_hot_water_kwh, current_epc_rating=property_instance.data["current-energy-rating"]
-                    )
-
-                    heating_kwh_reduction = 0 if predicted_heating_cost_reduction == 0 else (
-                        phase_kwh_figures[previous_phase]["adjusted"]["heating"] - new_heating_kwh_adjusted
-                    )
-                    if heating_kwh_reduction < 0:
-                        heating_kwh_reduction = 0
-
-                    hot_water_kwh_reduction = 0 if predicted_hot_water_cost_reduction == 0 else (
-                        phase_kwh_figures[previous_phase]["adjusted"]["hot_water"] - new_hot_water_kwh_adjusted
-                    )
-                    if hot_water_kwh_reduction < 0:
-                        hot_water_kwh_reduction = 0
-
-                    lighting_kwh_reduction = predicted_lighting_cost_reduction / AnnualBillSavings.ELECTRICITY_PRICE_CAP
-
-                    (
-                        predicted_appliances_cost_reduction,
-                        predicted_appliances_kwh_reduction
-                    ) = cls._calculate_appliance_solar_savings(
-                        rec=rec,
-                        property_instance=property_instance,
-                        heating_kwh_reduction=heating_kwh_reduction,
-                        hot_water_kwh_reduction=hot_water_kwh_reduction,
-                        lighting_kwh_reduction=lighting_kwh_reduction
-                    )
-
-                    # We now calculate the predicted_bill_savings
-                    predicted_bill_savings = (
-                        predicted_heating_cost_reduction + predicted_hot_water_cost_reduction +
-                        predicted_lighting_cost_reduction + predicted_appliances_cost_reduction
-                    )
-
-                    kwh_reduction = (
-                        heating_kwh_reduction +
-                        hot_water_kwh_reduction +
-                        lighting_kwh_reduction +
-                        predicted_appliances_kwh_reduction
-                    )
-
-                    # We store this value for later
-                    phase_lighting_costs[rec["phase"]] = {
-                        "adjusted": new_lighting_cost,
-                        "unadjusted": scoring_lighting_cost
+                    previous_phase_values = {
+                        "sap": (
+                            phase_impact["sap_change"][phase_impact["sap_change"]["phase"] == (rec["phase"] - 1)]
+                            ["predictions"].values[0]
+                        ),
+                        "carbon": (
+                            phase_impact["carbon_change"][phase_impact["carbon_change"]["phase"] == (rec["phase"] - 1)]
+                            ["predictions"].values[0]
+                        ),
+                        "heat_demand": (
+                            phase_impact["heat_demand"][phase_impact["heat_demand"]["phase"] == (rec["phase"] - 1)]
+                            ["predictions"].values[0]
+                        ),
                     }
 
-                    phase_kwh_figures[rec["phase"]] = {
-                        "adjusted": {
-                            "heating": new_heating_kwh_adjusted,
-                            "hot_water": new_hot_water_kwh_adjusted
-                        },
-                        "unadjusted": {
-                            "heating": new_heating_kwh,
-                            "hot_water": new_hot_water_kwh
+                    if rec["type"] == "low_energy_lighting":
+                        # Heating and hot water costs shouldn't change
+                        # {'unadjusted_heating_cost': 501.8528134938132, 'unadjusted_hot_water_cost':
+                        # 171.22534405283452, 'unadjusted_lighting_cost': 127.2}
+                        previous_phase_unadjusted_costs = {
+                            "unadjusted_heating_cost": phase_cost["heating_cost"]["predictions"].values[0],
+                            "unadjusted_hot_water_cost": phase_cost["hot_water_cost"]["predictions"].values[0],
+                            "unadjusted_lighting_cost": phase_impact["lighting_cost"][
+                                phase_impact["lighting_cost"]["phase"] == (rec["phase"] - 1)
+                                ]["predictions"].values[0]
                         }
-                    }
+                    else:
+                        # update heating and hot water costs
+                        previous_phase_unadjusted_costs = {
+                            "unadjusted_heating_cost": phase_impact["heating_cost"][
+                                phase_impact["heating_cost"]["phase"] == (rec["phase"] - 1)
+                                ]["predictions"].values[0],
+                            "unadjusted_hot_water_cost": phase_impact["hot_water_cost"][
+                                phase_impact["hot_water_cost"]["phase"] == (rec["phase"] - 1)
+                                ]["predictions"].values[0],
+                            "unadjusted_lighting_cost": phase_cost["lighting_cost"]["predictions"].values[0]
+                        }
+
+                previous_phase_values.update(previous_phase_unadjusted_costs)
+
+                # We extract the values for the current phase
+                current_phase_values = {
+                    "sap": phase_energy_efficiency_metrics["sap_change"],
+                    "carbon": phase_energy_efficiency_metrics["carbon_change"],
+                    "heat_demand": phase_energy_efficiency_metrics["heat_demand"],
+                    "unadjusted_heating_cost": phase_cost["heating_cost"]["predictions"].values[0],
+                    "unadjusted_hot_water_cost": phase_cost["hot_water_cost"]["predictions"].values[0],
+                    "unadjusted_lighting_cost": phase_cost["lighting_cost"]["predictions"].values[0]
+                }
+
+                property_phase_impact = {
+                    # Increasing
+                    "sap": current_phase_values["sap"] - previous_phase_values["sap"],
+                    # Decreasing
+                    "carbon": previous_phase_values["carbon"] - current_phase_values["carbon"],
+                    # Decreasing
+                    "heat_demand": previous_phase_values["heat_demand"] - current_phase_values["heat_demand"],
+                    # Decreasing
+                    "unadjusted_heating_cost": (
+                        previous_phase_values["unadjusted_heating_cost"] -
+                        current_phase_values["unadjusted_heating_cost"]
+                    ),
+                    # Decreasing
+                    "unadjusted_hot_water_cost": (
+                        previous_phase_values["unadjusted_hot_water_cost"] -
+                        current_phase_values["unadjusted_hot_water_cost"]
+                    ),
+                    # Decreasing
+                    "unadjusted_lighting_cost": (
+                        previous_phase_values["unadjusted_lighting_cost"] -
+                        current_phase_values["unadjusted_lighting_cost"]
+                    )
+                }
 
                 # Prevent from being negative
-                predicted_sap_points = 0 if predicted_sap_points < 0 else predicted_sap_points
-                predicted_co2_savings = 0 if predicted_co2_savings < 0 else predicted_co2_savings
-                predicted_heat_demand = 0 if predicted_heat_demand < 0 else predicted_heat_demand
+                for metric in ["sap", "carbon", "heat_demand"]:
+                    property_phase_impact[metric] = (
+                        0 if property_phase_impact[metric] < 0 else property_phase_impact[metric]
+                    )
+                    if metric == "sap":
+                        property_phase_impact[metric] = round(property_phase_impact[metric], 2)
 
+                # For the moment, we cap the number of SAP points that can be achieved by LEDs at 2
                 if rec["type"] == "low_energy_lighting":
-                    # For the moment, we cap the number of SAP points that can be achieved by ventilation at 2
-                    rec["sap_points"] = min(predicted_sap_points, LightingRecommendations.SAP_LIMIT)
-                    rec["co2_equivalent_savings"] = min(predicted_co2_savings, rec["co2_equivalent_savings"])
-                    rec["heat_demand"] = predicted_heat_demand
-                else:
-                    rec["sap_points"] = predicted_sap_points
-                    rec["co2_equivalent_savings"] = predicted_co2_savings
-                    rec["heat_demand"] = predicted_heat_demand
+                    property_phase_impact["sap"] = min(property_phase_impact["sap"], LightingRecommendations.SAP_LIMIT)
+                    property_phase_impact["carbon"] = min(
+                        property_phase_impact["carbon"], rec["co2_equivalent_savings"]
+                    )
 
-                # Round to 2 decimal places
-                rec["sap_points"] = round(rec["sap_points"], 2)
-
-                rec["kwh_savings"] = kwh_reduction
-                rec["energy_cost_savings"] = predicted_bill_savings
-
-                if rec["recommendation_id"] in representative_rec_ids:
-                    bill_savings_list.append(predicted_bill_savings)
-                    kwh_savings_list.append(kwh_reduction)
+                # Insert this information into the recommendation
+                rec["sap_points"] = property_phase_impact["sap"]
+                rec["co2_equivalent_savings"] = property_phase_impact["carbon"]
+                rec["heat_demand"] = property_phase_impact["heat_demand"]
 
                 if (rec["sap_points"] is None) and (rec["co2_equivalent_savings"] is None) or (
-                    rec["heat_demand"] is None) or (rec["energy_cost_savings"] is None):
+                    rec["heat_demand"] is None):
                     raise ValueError("sap points, co2 or heat demand is missing")
 
-        # We sum up the total savings for the property and that is our expected energy bill
+                impact_summary.append(
+                    {
+                        "phase": rec["phase"],
+                        "recommendation_id": rec["recommendation_id"],
+                        **current_phase_values
+                    }
+                )
 
-        expected_energy_bill = property_instance.current_energy_bill - sum(bill_savings_list)
-        expected_adjusted_energy = property_instance.current_adjusted_energy - sum(kwh_savings_list)
-
-        return (
-            property_recommendations,
-            expected_adjusted_energy,
-            expected_energy_bill
-        )
+        return property_recommendations, impact_summary

From 48f21e6edf3e65bf725d78282bf4ebf0fbb81c7e Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 7 Aug 2024 16:25:30 +0100
Subject: [PATCH 068/182] insert phase metrics into the simualation epc

---
 backend/Property.py                | 34 ++++++++++++++++++++++++++++++
 backend/app/plan/router.py         |  5 +++++
 recommendations/Recommendations.py |  3 +++
 3 files changed, 42 insertions(+)

diff --git a/backend/Property.py b/backend/Property.py
index 25068f6c..b040ffee 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -22,6 +22,7 @@ from recommendations.recommendation_utils import (
     estimate_windows,
 )
 from backend.ml_models.AnnualBillSavings import AnnualBillSavings
+from backend.app.utils import sap_to_epc
 
 ENVIRONMENT = os.environ.get("ENVIRONMENT", "dev")
 DATA_BUCKET = os.environ.get(
@@ -442,6 +443,39 @@ class Property:
             simulation_epc.update(phase_epc_transformation)
             self.simulation_epcs[phase] = simulation_epc
 
+    def update_simulation_epcs(self, impact_summary):
+        """
+        This method will insert the high level measures, such as SAP, heat demand, carbon, etc
+        :return:
+        """
+        if self.simulation_epcs is None:
+            raise ValueError("Simulation EPCs have not been created")
+
+        phases = sorted(list(self.simulation_epcs.keys()))
+        updated_simulation_epcs = []
+        for phase in phases:
+            sim_epc = self.simulation_epcs[phase].copy()
+            phase_impact = [x for x in impact_summary if x["phase"] == phase][0]
+            # We update all of the features that should have an impact on the kwh model
+
+            sim_epc.update(
+                {
+                    "heating-cost-current": phase_impact["unadjusted_heating_cost"],
+                    "hot-water-cost-current": phase_impact["unadjusted_hot_water_cost"],
+                    # CO₂ emissions per square metre floor area per year in kg/m². Since CO₂ emissions are in tonnes
+                    # per year, we multiply by 1000 to get kg/m²
+                    "co2-emiss-curr-per-floor-area": round(
+                        1000 * (phase_impact["carbon"] / self.data["total-floor-area"])
+                    ),
+                    "co2-emissions-current": phase_impact["carbon"],
+                    "current-energy-rating": sap_to_epc(phase_impact["sap"]),
+                    "current-energy-efficiency": int(np.floor(phase_impact["sap"])),
+                    "current-energy-cost": phase_impact["unadjusted_energy_cost"],
+                    "energy-consumption-current": phase_impact["heat_demand"],
+                    "lighting-cost-current": phase_impact["unadjusted_lighting_cost"],
+                }
+            )
+
     @staticmethod
     def create_recommendation_scoring_data(
         property_id,
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index fb4ffa14..b2c235d3 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -800,6 +800,7 @@ async def trigger_plan(body: PlanTriggerRequest):
         #       possibility with heating system
         # TODO: After optimising, if there are any cheap, quick win measures (e.g. insulate water tank with hot water
         #      cylinder jacket), we should add these to the recommendations as default
+        raise Exception("Add the cost impacts into the cost model")
         logger.info("Optimising recommendations")
         for property_id in recommendations.keys():
 
@@ -813,6 +814,10 @@ async def trigger_plan(body: PlanTriggerRequest):
                 )
             )
 
+            # We use the impact_summary to update the simulation_epcs with the new SAP, heat demand, carbon, cost etc
+            # at each phase
+            property_instance.update_simulation_epcs(impact_summary)
+
             input_measures = prepare_input_measures(recommendations_with_impact, body.goal)
 
             current_sap_points = int(property_instance.data["current-energy-efficiency"])
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 0de8931a..c099c8a3 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -500,6 +500,9 @@ class Recommendations:
                 previous_phase_values.update(previous_phase_unadjusted_costs)
 
                 # We extract the values for the current phase
+                # TODO: For things like lighting costs for heating and hot water recommendations, we should actually
+                #       update phase_cost since the phase cost should be the same as the previous phase
+
                 current_phase_values = {
                     "sap": phase_energy_efficiency_metrics["sap_change"],
                     "carbon": phase_energy_efficiency_metrics["carbon_change"],

From 25c07fdc52f4c5b9b60199566a340ed3cfc5262c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 7 Aug 2024 17:56:07 +0100
Subject: [PATCH 069/182] updating recommender update

---
 backend/Property.py                |  3 ++
 backend/app/plan/router.py         |  4 ++-
 recommendations/Recommendations.py | 54 +++++++++++++++---------------
 3 files changed, 33 insertions(+), 28 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index b040ffee..a5346643 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -475,6 +475,9 @@ class Property:
                     "lighting-cost-current": phase_impact["unadjusted_lighting_cost"],
                 }
             )
+            updated_simulation_epcs.append(sim_epc)
+
+        return updated_simulation_epcs
 
     @staticmethod
     def create_recommendation_scoring_data(
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index b2c235d3..e4759b7d 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -802,6 +802,7 @@ async def trigger_plan(body: PlanTriggerRequest):
         #      cylinder jacket), we should add these to the recommendations as default
         raise Exception("Add the cost impacts into the cost model")
         logger.info("Optimising recommendations")
+        scoring_epcs = []  # For scoring the kwh models
         for property_id in recommendations.keys():
 
             property_instance = [p for p in input_properties if p.id == property_id][0]
@@ -816,7 +817,8 @@ async def trigger_plan(body: PlanTriggerRequest):
 
             # We use the impact_summary to update the simulation_epcs with the new SAP, heat demand, carbon, cost etc
             # at each phase
-            property_instance.update_simulation_epcs(impact_summary)
+            property_scoring_epcs = property_instance.update_simulation_epcs(impact_summary)
+            scoring_epcs.extend(property_scoring_epcs)
 
             input_measures = prepare_input_measures(recommendations_with_impact, body.goal)
 
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index c099c8a3..67d38528 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -432,32 +432,19 @@ class Recommendations:
                 # We structure this so that depending on the phase, we capture the previous phase impacts and
                 # then just have one piece of code to calculate the difference
                 if rec["phase"] == 0:
+                    # These are just the starting values, from the EPC. When we score the ML models,
+                    # heating_cost_starting and heating_cost_ending are just the values in the EPC. However, with
+                    # heating_cost_ending, we expect that the EPC will predict a heating cost based on what would happen
+                    # if we implemented the recommendation today, so our starting value is the EPC
                     previous_phase_values = {
                         "sap": float(property_instance.data["current-energy-efficiency"]),
                         "carbon": float(property_instance.data["co2-emissions-current"]),
                         "heat_demand": float(property_instance.data["energy-consumption-current"]),
+                        "epc_heating_cost": float(property_instance.data["heating-cost-current"]),
+                        "epc_hot_water_cost": float(property_instance.data["hot-water-cost-current"]),
+                        "epc_lighting_cost": float(property_instance.data["lighting-cost-current"])
                     }
 
-                    if rec["type"] == "low_energy_lighting":
-                        # In this instance, heating cost and hot water cost should not change so we set the previous
-                        # value to the new one, so the difference is zero
-                        previous_phase_unadjusted_costs = {
-                            "unadjusted_heating_cost": phase_cost["heating_cost"]["predictions"].values[0],
-                            "unadjusted_hot_water_cost": phase_cost["hot_water_cost"]["predictions"].values[0],
-                            "unadjusted_lighting_cost": (
-                                property_instance.energy_cost_estimates["unadjusted"]["lighting"]
-                            )
-                        }
-                    else:
-                        # If the recommendaiton is not for low energy lighting, we expect the heating/hot water
-                        # costs to change but not te lighting
-                        previous_phase_unadjusted_costs = {
-                            "unadjusted_heating_cost": property_instance.energy_cost_estimates["adjusted"]["heating"],
-                            "unadjusted_hot_water_cost": (
-                                property_instance.energy_cost_estimates["adjusted"]["hot_water"]
-                            ),
-                            "unadjusted_lighting_cost": phase_cost["lighting_cost"]["predictions"].values[0]
-                        }
                 else:
                     previous_phase_values = {
                         "sap": (
@@ -497,8 +484,6 @@ class Recommendations:
                             "unadjusted_lighting_cost": phase_cost["lighting_cost"]["predictions"].values[0]
                         }
 
-                previous_phase_values.update(previous_phase_unadjusted_costs)
-
                 # We extract the values for the current phase
                 # TODO: For things like lighting costs for heating and hot water recommendations, we should actually
                 #       update phase_cost since the phase cost should be the same as the previous phase
@@ -507,11 +492,24 @@ class Recommendations:
                     "sap": phase_energy_efficiency_metrics["sap_change"],
                     "carbon": phase_energy_efficiency_metrics["carbon_change"],
                     "heat_demand": phase_energy_efficiency_metrics["heat_demand"],
-                    "unadjusted_heating_cost": phase_cost["heating_cost"]["predictions"].values[0],
-                    "unadjusted_hot_water_cost": phase_cost["hot_water_cost"]["predictions"].values[0],
-                    "unadjusted_lighting_cost": phase_cost["lighting_cost"]["predictions"].values[0]
                 }
 
+                static_cost_variables = (
+                    ["epc_heating_cost", "epc_hot_water_cost"] if
+                    rec["type"] == "low_energy_lighting" else ["epc_lighting_cost"]
+                )
+                dynamic_cost_variables = [
+                    v for v in ["epc_heating_cost", "epc_hot_water_cost", "epc_lighting_cost"]
+                    if v not in static_cost_variables
+                ]
+                # Take the static variables from the previous phase
+                current_phase_costs = {k: v for k, v in previous_phase_values.items() if k in static_cost_variables}
+                # Insert the dynamic variables from the current phase
+                for v in dynamic_cost_variables:
+                    current_phase_costs[v] = phase_cost[v.split("epc_")[1]]["adjusted_cost"].values[0]
+
+                current_phase_values.update(current_phase_costs)
+
                 property_phase_impact = {
                     # Increasing
                     "sap": current_phase_values["sap"] - previous_phase_values["sap"],
@@ -556,8 +554,10 @@ class Recommendations:
                 rec["co2_equivalent_savings"] = property_phase_impact["carbon"]
                 rec["heat_demand"] = property_phase_impact["heat_demand"]
 
-                if (rec["sap_points"] is None) and (rec["co2_equivalent_savings"] is None) or (
-                    rec["heat_demand"] is None):
+                if (
+                    (rec["sap_points"] is None) and (rec["co2_equivalent_savings"] is None) or
+                    (rec["heat_demand"] is None)
+                ):
                     raise ValueError("sap points, co2 or heat demand is missing")
 
                 impact_summary.append(

From 9a62184ab5adb8705a0382d0e2280a1851f2c3b0 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 7 Aug 2024 18:52:46 +0100
Subject: [PATCH 070/182] updating the simulation epcs

---
 backend/Property.py                |   8 +--
 backend/app/plan/router.py         |   3 +
 recommendations/Recommendations.py | 100 +++++++++++++----------------
 3 files changed, 52 insertions(+), 59 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index a5346643..d1858abe 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -460,8 +460,8 @@ class Property:
 
             sim_epc.update(
                 {
-                    "heating-cost-current": phase_impact["unadjusted_heating_cost"],
-                    "hot-water-cost-current": phase_impact["unadjusted_hot_water_cost"],
+                    "heating-cost-current": phase_impact["epc_heating_cost"],
+                    "hot-water-cost-current": phase_impact["epc_hot_water_cost"],
                     # CO₂ emissions per square metre floor area per year in kg/m². Since CO₂ emissions are in tonnes
                     # per year, we multiply by 1000 to get kg/m²
                     "co2-emiss-curr-per-floor-area": round(
@@ -470,9 +470,9 @@ class Property:
                     "co2-emissions-current": phase_impact["carbon"],
                     "current-energy-rating": sap_to_epc(phase_impact["sap"]),
                     "current-energy-efficiency": int(np.floor(phase_impact["sap"])),
-                    "current-energy-cost": phase_impact["unadjusted_energy_cost"],
                     "energy-consumption-current": phase_impact["heat_demand"],
-                    "lighting-cost-current": phase_impact["unadjusted_lighting_cost"],
+                    "lighting-cost-current": phase_impact["epc_lighting_cost"],
+                    "phase": phase
                 }
             )
             updated_simulation_epcs.append(sim_epc)
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index e4759b7d..95ea7d92 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -872,6 +872,9 @@ async def trigger_plan(body: PlanTriggerRequest):
             ]
             recommendations[property_id] = final_recommendations
 
+        # We call the API with the scoring epcs
+        scoring_epcs = pd.DataFrame(scoring_epcs)
+
         # 1) the property data
         # 2) the property details (epc)
         # 3) the recommendations
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 67d38528..9d709639 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -1,4 +1,5 @@
 import pandas as pd
+import numpy as np
 from backend.Property import Property
 from typing import List
 from itertools import groupby
@@ -395,14 +396,6 @@ class Recommendations:
 
         property_recommendations = recommendations[property_instance.id].copy()
 
-        # We calculate the impact by phase
-        phase_impact = {
-            prefix: property_predictions[prefix + "_predictions"].groupby("phase")["predictions"].median().reset_index()
-            for prefix in [
-                "sap_change", "heat_demand", "carbon_change", "lighting_cost", "heating_cost", "hot_water_cost"
-            ]
-        }
-
         # TODO: should fabric upgrades have an impact on hot water costs/kwh?
         # TODO: Generally, the costing models are just increasing. Maybe they're including something in the model
         #       that they shouldn't e.g. SAP, carbon, heat demand etc?
@@ -446,48 +439,24 @@ class Recommendations:
                     }
 
                 else:
-                    previous_phase_values = {
-                        "sap": (
-                            phase_impact["sap_change"][phase_impact["sap_change"]["phase"] == (rec["phase"] - 1)]
-                            ["predictions"].values[0]
-                        ),
-                        "carbon": (
-                            phase_impact["carbon_change"][phase_impact["carbon_change"]["phase"] == (rec["phase"] - 1)]
-                            ["predictions"].values[0]
-                        ),
-                        "heat_demand": (
-                            phase_impact["heat_demand"][phase_impact["heat_demand"]["phase"] == (rec["phase"] - 1)]
-                            ["predictions"].values[0]
-                        ),
-                    }
 
-                    if rec["type"] == "low_energy_lighting":
-                        # Heating and hot water costs shouldn't change
-                        # {'unadjusted_heating_cost': 501.8528134938132, 'unadjusted_hot_water_cost':
-                        # 171.22534405283452, 'unadjusted_lighting_cost': 127.2}
-                        previous_phase_unadjusted_costs = {
-                            "unadjusted_heating_cost": phase_cost["heating_cost"]["predictions"].values[0],
-                            "unadjusted_hot_water_cost": phase_cost["hot_water_cost"]["predictions"].values[0],
-                            "unadjusted_lighting_cost": phase_impact["lighting_cost"][
-                                phase_impact["lighting_cost"]["phase"] == (rec["phase"] - 1)
-                                ]["predictions"].values[0]
-                        }
+                    previous_phase_values_multiple = [x for x in impact_summary if x["phase"] == (rec["phase"] - 1)]
+                    if len(previous_phase_values_multiple) != 1:
+                        # Take an average of each of the previous phases
+                        keys_to_median = [
+                            "sap", "carbon", "heat_demand", "epc_heating_cost", "epc_hot_water_cost",
+                            "epc_lighting_cost"
+                        ]
+
+                        previous_phase_values = {}
+                        for key in keys_to_median:
+                            values = [item[key] for item in previous_phase_values_multiple]
+                            previous_phase_values[key] = np.median(values)
+
                     else:
-                        # update heating and hot water costs
-                        previous_phase_unadjusted_costs = {
-                            "unadjusted_heating_cost": phase_impact["heating_cost"][
-                                phase_impact["heating_cost"]["phase"] == (rec["phase"] - 1)
-                                ]["predictions"].values[0],
-                            "unadjusted_hot_water_cost": phase_impact["hot_water_cost"][
-                                phase_impact["hot_water_cost"]["phase"] == (rec["phase"] - 1)
-                                ]["predictions"].values[0],
-                            "unadjusted_lighting_cost": phase_cost["lighting_cost"]["predictions"].values[0]
-                        }
+                        previous_phase_values = previous_phase_values_multiple[0]
 
                 # We extract the values for the current phase
-                # TODO: For things like lighting costs for heating and hot water recommendations, we should actually
-                #       update phase_cost since the phase cost should be the same as the previous phase
-
                 current_phase_values = {
                     "sap": phase_energy_efficiency_metrics["sap_change"],
                     "carbon": phase_energy_efficiency_metrics["carbon_change"],
@@ -510,6 +479,27 @@ class Recommendations:
 
                 current_phase_values.update(current_phase_costs)
 
+                increasing_variables = ["sap"]
+                decreasing_variables = [
+                    "carbon", "heat_demand", "epc_heating_cost", "epc_hot_water_cost", "epc_lighting_cost"
+                ]
+                # For increasing variables, the new value needs to be higher than the previous, otherwise we set it to
+                # the previous
+                # For decreasing variables, the new value should be lower than the previous, otherwise we set it to
+                # the previous
+                # In either case, we adjudge the recommendation to have had no/negligible impact
+                for v in increasing_variables:
+                    current_phase_values[v] = (
+                        current_phase_values[v] if current_phase_values[v] > previous_phase_values[v] else
+                        previous_phase_values[v]
+                    )
+                for v in previous_phase_values:
+                    if v in decreasing_variables:
+                        current_phase_values[v] = (
+                            current_phase_values[v] if current_phase_values[v] < previous_phase_values[v] else
+                            previous_phase_values[v]
+                        )
+
                 property_phase_impact = {
                     # Increasing
                     "sap": current_phase_values["sap"] - previous_phase_values["sap"],
@@ -518,19 +508,19 @@ class Recommendations:
                     # Decreasing
                     "heat_demand": previous_phase_values["heat_demand"] - current_phase_values["heat_demand"],
                     # Decreasing
-                    "unadjusted_heating_cost": (
-                        previous_phase_values["unadjusted_heating_cost"] -
-                        current_phase_values["unadjusted_heating_cost"]
+                    "epc_heating_cost": (
+                        previous_phase_values["epc_heating_cost"] -
+                        current_phase_values["epc_heating_cost"]
                     ),
                     # Decreasing
-                    "unadjusted_hot_water_cost": (
-                        previous_phase_values["unadjusted_hot_water_cost"] -
-                        current_phase_values["unadjusted_hot_water_cost"]
+                    "epc_hot_water_cost": (
+                        previous_phase_values["epc_hot_water_cost"] -
+                        current_phase_values["epc_hot_water_cost"]
                     ),
                     # Decreasing
-                    "unadjusted_lighting_cost": (
-                        previous_phase_values["unadjusted_lighting_cost"] -
-                        current_phase_values["unadjusted_lighting_cost"]
+                    "epc_lighting_cost": (
+                        previous_phase_values["epc_lighting_cost"] -
+                        current_phase_values["epc_lighting_cost"]
                     )
                 }
 

From a2834a180e317d87ed1aabe38caacb120005eba6 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 7 Aug 2024 19:07:08 +0100
Subject: [PATCH 071/182] refactoring simialtion_epcs

---
 backend/Property.py | 97 ++++++++++++++++++---------------------------
 1 file changed, 38 insertions(+), 59 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index d1858abe..414d0831 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -376,72 +376,51 @@ class Property:
                 )
                 self.recommendations_scoring_data.append(scoring_dict)
 
-            # We also use the representative recommendations to produce transformed EPCs
-            represenative_recs_to_this_phase = [
-                r for r in property_representative_recommendations
-                if r["phase"] <= phase
-            ]
+                simulation_epc = self.epc_record.prepared_epc.copy()
+                # Insert static values
+                simulation_epc["lodgement_date"] = simulation_lodgment_date
+                simulation_epc = {k.replace("_", "-"): v for k, v in simulation_epc.items()}
 
-            # TODO: This is placeholder, but it's to handle the case of having both internal and external wall
-            #       insulation as options. This will cause the process below to fall over, so we take just
-            #       external wall insulation in epc_transformations, if we have both
-            types = [
-                x["type"] for x in represenative_recs_to_this_phase
-            ]
-            if "external_wall_insulation" in types and "internal_wall_insulation" in types:
+                types = [x["type"] for x in previous_phase_representatives]
+                if "external_wall_insulation" in types and "internal_wall_insulation" in types:
+                    raise Exception("We shouldn't have this in the representative recommendations")
                 epc_transformations = [
-                    x["description_simulation"] for x in represenative_recs_to_this_phase if
-                    x["type"] != "internal_wall_insulation"
-                ]
-            else:
-                epc_transformations = [
-                    x["description_simulation"] for x in represenative_recs_to_this_phase
+                    x["description_simulation"] for x in previous_phase_representatives
                 ]
 
-            # It is possible that we could have two simulations applied to the same descriptions
-            # We extract these out
-            phase_epc_transformation = {}
-            for config in epc_transformations:
-                for k, v in config.items():
-                    if k in phase_epc_transformation:
-                        if "-energy-eff" in k:
-                            # We take the highest value
-                            if phase_epc_transformation[k] == "Very Good":
+                # It is possible that we could have two simulations applied to the same descriptions
+                # We extract these out
+                phase_epc_transformation = {}
+                for config in epc_transformations:
+                    for k, v in config.items():
+                        if k in phase_epc_transformation:
+                            if "-energy-eff" in k:
+                                # We take the highest value
+                                if phase_epc_transformation[k] == "Very Good":
+                                    continue
+                                elif phase_epc_transformation[k] == "Good":
+                                    if v == "Very Good":
+                                        phase_epc_transformation[k] = v
+                                elif phase_epc_transformation[k] == "Average":
+                                    if v in ["Good", "Very Good"]:
+                                        phase_epc_transformation[k] = v
+                                elif phase_epc_transformation[k] == "Poor":
+                                    if v in ["Average", "Good", "Very Good"]:
+                                        phase_epc_transformation[k] = v
+                                else:
+                                    phase_epc_transformation[k] = v
+
                                 continue
-                            elif phase_epc_transformation[k] == "Good":
-                                if v == "Very Good":
-                                    phase_epc_transformation[k] = v
-                            elif phase_epc_transformation[k] == "Average":
-                                if v in ["Good", "Very Good"]:
-                                    phase_epc_transformation[k] = v
-                            elif phase_epc_transformation[k] == "Poor":
-                                if v in ["Average", "Good", "Very Good"]:
-                                    phase_epc_transformation[k] = v
-                            else:
-                                phase_epc_transformation[k] = v
 
-                            continue
+                            if phase_epc_transformation[k] == v:
+                                continue
 
-                        if phase_epc_transformation[k] == v:
-                            continue
-
-                        raise NotImplementedError(
-                            "Already have this key in the phase_epc_transformation - implement me"
-                        )
-                    phase_epc_transformation[k] = v
-
-            simulation_epc = self.epc_record.prepared_epc.copy()
-            # Insert static values
-            simulation_epc["lodgement_date"] = simulation_lodgment_date
-
-            # Replace the understores with hyphens
-            simulation_epc = {k.replace("_", "-"): v for k, v in simulation_epc.items()}
-            # Add in today's costs (unadjusted
-            simulation_epc["heating-cost-current"] = int(self.energy_cost_estimates["unadjusted"]["heating"])
-            simulation_epc["hot-water-cost-current"] = int(self.energy_cost_estimates["unadjusted"]["hot_water"])
-            simulation_epc["lighting-cost-current"] = int(self.energy_cost_estimates["unadjusted"]["lighting"])
-            simulation_epc.update(phase_epc_transformation)
-            self.simulation_epcs[phase] = simulation_epc
+                            raise NotImplementedError(
+                                "Already have this key in the phase_epc_transformation - implement me"
+                            )
+                        phase_epc_transformation[k] = v
+                simulation_epc.update(phase_epc_transformation)
+                self.simulation_epcs[rec["recommendation_id"]] = simulation_epc
 
     def update_simulation_epcs(self, impact_summary):
         """

From c77f642861f214c44a378ff46a21bb652c4cf26d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 8 Aug 2024 11:10:21 +0100
Subject: [PATCH 072/182] inserting the kwh savings into the recommendation

---
 backend/Property.py        | 26 +++++++-------
 backend/app/plan/router.py | 74 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 87 insertions(+), 13 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 414d0831..d66db529 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -430,28 +430,28 @@ class Property:
         if self.simulation_epcs is None:
             raise ValueError("Simulation EPCs have not been created")
 
-        phases = sorted(list(self.simulation_epcs.keys()))
+        rec_ids = sorted(list(self.simulation_epcs.keys()))
         updated_simulation_epcs = []
-        for phase in phases:
-            sim_epc = self.simulation_epcs[phase].copy()
-            phase_impact = [x for x in impact_summary if x["phase"] == phase][0]
+        for rec_id in rec_ids:
+            sim_epc = self.simulation_epcs[rec_id].copy()
+            rec_impact = [x for x in impact_summary if x["recommendation_id"] == rec_id][0]
             # We update all of the features that should have an impact on the kwh model
 
             sim_epc.update(
                 {
-                    "heating-cost-current": phase_impact["epc_heating_cost"],
-                    "hot-water-cost-current": phase_impact["epc_hot_water_cost"],
+                    "heating-cost-current": rec_impact["epc_heating_cost"],
+                    "hot-water-cost-current": rec_impact["epc_hot_water_cost"],
                     # CO₂ emissions per square metre floor area per year in kg/m². Since CO₂ emissions are in tonnes
                     # per year, we multiply by 1000 to get kg/m²
                     "co2-emiss-curr-per-floor-area": round(
-                        1000 * (phase_impact["carbon"] / self.data["total-floor-area"])
+                        1000 * (rec_impact["carbon"] / self.data["total-floor-area"])
                     ),
-                    "co2-emissions-current": phase_impact["carbon"],
-                    "current-energy-rating": sap_to_epc(phase_impact["sap"]),
-                    "current-energy-efficiency": int(np.floor(phase_impact["sap"])),
-                    "energy-consumption-current": phase_impact["heat_demand"],
-                    "lighting-cost-current": phase_impact["epc_lighting_cost"],
-                    "phase": phase
+                    "co2-emissions-current": rec_impact["carbon"],
+                    "current-energy-rating": sap_to_epc(rec_impact["sap"]),
+                    "current-energy-efficiency": int(np.floor(rec_impact["sap"])),
+                    "energy-consumption-current": rec_impact["heat_demand"],
+                    "lighting-cost-current": rec_impact["epc_lighting_cost"],
+                    "id": "+".join([str(self.id), rec_id])
                 }
             )
             updated_simulation_epcs.append(sim_epc)
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 95ea7d92..47478b3c 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -874,6 +874,80 @@ async def trigger_plan(body: PlanTriggerRequest):
 
         # We call the API with the scoring epcs
         scoring_epcs = pd.DataFrame(scoring_epcs)
+        scoring_epcs = add_features_from_code(scoring_epcs)
+        scoring_epcs = add_estimate_annual_kwh(scoring_epcs)
+        # TODO: Drop all potential and env columns
+        kwh_simulation_predictions = model_api.predict_all(
+            df=scoring_epcs,
+            bucket=get_settings().DATA_BUCKET,
+            prediction_buckets=get_prediction_buckets(),
+            model_prefixes=["heating_kwh_predictions", "hotwater_kwh_predictions"],
+            extract_ids=True
+        )
+
+        # We now insert into the recommendations
+        for property_id in recommendations.keys():
+            property_recommendations = recommendations[property_id]
+            property_instance = [p for p in input_properties if p.id == property_id][0]
+            # The predicted kwhs are without appliances
+            consumption = property_instance.energy_consumption_estimates["adjusted"]
+            # Starting consumption is the sum of the consumption values, without appliances
+            starting_heating = consumption["heating"]
+            starting_hotwater = consumption["hot_water"]
+            property_kwh_predictions = {
+                k: kwh_simulation_predictions[k][kwh_simulation_predictions[k]["property_id"] == str(property_id)]
+                for k in ['heating_kwh_predictions', 'hotwater_kwh_predictions']
+            }
+            # We adjust the predictions
+            from backend.ml_models.AnnualBillSavings import AnnualBillSavings
+            for k in ["heating_kwh_predictions", "hotwater_kwh_predictions"]:
+                property_kwh_predictions[k]["adjusted"] = property_kwh_predictions[k]["predictions"].apply(
+                    lambda x: AnnualBillSavings.adjust_energy_to_metered(
+                        epc_energy=x, current_epc_rating=property_instance.data["current-energy-rating"]
+                    )
+                )
+
+            # For each recommendation, we difference the predictions
+            property_kwh_predictions["heating_kwh_predictions"]["savings"] = np.diff(
+                property_kwh_predictions["heating_kwh_predictions"]["adjusted"], prepend=starting_heating
+            )
+            property_kwh_predictions["hotwater_kwh_predictions"]["savings"] = np.diff(
+                property_kwh_predictions["hotwater_kwh_predictions"]["adjusted"], prepend=starting_hotwater
+            )
+
+            for recommendations_by_type in property_recommendations:
+                for rec in recommendations_by_type:
+                    # In the case of mechanical ventilation, there is no impact, and for low energy lighting we
+                    # calculate the savings inside of the recommendation itself
+                    if rec["type"] in ["mechanical_ventilation", "low_energy_lighing"]:
+                        continue
+
+                    heating_kwh_savings = property_kwh_predictions["heating_kwh_predictions"][
+                        (
+                            property_kwh_predictions["heating_kwh_predictions"]["recommendation_id"] ==
+                            rec["recommendation_id"]
+                        )
+                    ]["savings"].values[0]
+                    # This should be negative
+                    if heating_kwh_savings > 0:
+                        print("Positive heating kwh savings")
+                        # TODO: Raise an exception to investigate
+                        # raise Exception("Positive heating kwh savings")
+
+                    hot_water_kwh_savings = property_kwh_predictions["hotwater_kwh_predictions"][
+                        (
+                            property_kwh_predictions["hotwater_kwh_predictions"]["recommendation_id"] ==
+                            rec["recommendation_id"]
+                        )
+                    ]["savings"].values[0]
+
+                    # This should be negative
+                    if hot_water_kwh_savings > 0:
+                        print("Positive hot water kwh savings")
+                        # TODO: Raise an exception to investigate
+                        # raise Exception("Positive hot water kwh savings")
+
+                    rec["kwh_savings"] = abs(heating_kwh_savings + hot_water_kwh_savings)
 
         # 1) the property data
         # 2) the property details (epc)

From 58374e7a6d912fc6ef747084c5713ea763108e5a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 8 Aug 2024 20:16:05 +0100
Subject: [PATCH 073/182] scrappy testing

---
 backend/Property.py                       |  30 +---
 backend/app/plan/router.py                |   7 +-
 etl/bill_savings/data_collection.py       |   1 -
 etl/testing_data/bills_model_testing.py   | 205 ++++++++++++++++++++++
 etl/xml_survey_extraction/app.py          |   1 +
 recommendations/SolarPvRecommendations.py |   4 +-
 6 files changed, 217 insertions(+), 31 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index d66db529..45c7b3e5 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -721,13 +721,6 @@ class Property:
                 ]["predictions"].values[0]
         )
 
-        # heating_prediction = (
-        #     float(condition_data["space_heating_kwh"]) if condition_data.get("space_heating_kwh") is not None
-        #     else energy_consumption_client.score_new_data(
-        #         new_data=scoring_df, target="heating_kwh"
-        #     )[0]
-        # )
-
         hot_water_prediction = (
             condition_data.get("water_heating_kwh") if condition_data.get("water_heating_kwh") is not None else
             hotwater_kwh_predictions[
@@ -735,23 +728,16 @@ class Property:
                 ]["predictions"].values[0]
         )
 
-        # hot_water_prediction = (
-        #     float(condition_data["water_heating_kwh"]) if condition_data.get("water_heating_kwh") is not None
-        #     else energy_consumption_client.score_new_data(
-        #         new_data=scoring_df, target="hot_water_kwh"
-        #     )[0]
-        # )
-
         # We convert the lighting cost into kwh, just using the price cap
         lighting_kwh = todays_lighting_cost / AnnualBillSavings.ELECTRICITY_PRICE_CAP
 
         appliances_kwh = AnnualBillSavings.estimate_appliances_energy_use(total_floor_area=self.floor_area)
 
         unadjusted_kwh_estimates = {
-            "heating": heating_prediction,
-            "hot_water": hot_water_prediction,
-            "lighting": lighting_kwh,
-            "appliances": appliances_kwh
+            "heating": float(heating_prediction),
+            "hot_water": float(hot_water_prediction),
+            "lighting": float(lighting_kwh),
+            "appliances": float(appliances_kwh)
         }
 
         adjusted_kwh_estimates = {
@@ -762,10 +748,10 @@ class Property:
         }
 
         unadjusted_heating_costs = {
-            "heating": todays_heating_cost,
-            "hot_water": todays_hot_water_cost,
-            "lighting": todays_lighting_cost,
-            "appliances": appliances_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP
+            "heating": float(todays_heating_cost),
+            "hot_water": float(todays_hot_water_cost),
+            "lighting": float(todays_lighting_cost),
+            "appliances": float(appliances_kwh) * AnnualBillSavings.ELECTRICITY_PRICE_CAP
         }
 
         adjusted_heating_costs = {
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 47478b3c..05f8f88f 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -326,7 +326,6 @@ async def trigger_plan(body: PlanTriggerRequest):
 
         input_properties = []
         for config in tqdm(plan_input):
-
             # We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
             uprn = config.get("uprn", None)
             if uprn:
@@ -782,7 +781,7 @@ async def trigger_plan(body: PlanTriggerRequest):
             predictions_dict = model_api.predict_all(
                 df=recommendations_scoring_data.iloc[chunk:chunk + SCORING_BATCH_SIZE],
                 bucket=get_settings().DATA_BUCKET,
-                prediction_buckets=get_prediction_buckets()
+                prediction_buckets=get_prediction_buckets(),
             )
 
             # Append the predictions to the predictions dictionary
@@ -791,10 +790,6 @@ async def trigger_plan(body: PlanTriggerRequest):
 
         # We now produce predictions for the kwh models
 
-        # TODO!!!!! In order to score the kwh models, we need to insert the new SAP, heat demand, carbon, cost
-        #        etc values, into the simulated EPC, otherwise it won't work. We might also want to drop all potential
-        #       columns and env-efficiency columns (POTENTIAL COLUMNS ALREADY GONE, JUST NEED TO DROP ENV EFFICIENCY)
-
         # Insert the predictions into the recommendations and run the optimiser
         # TODO: If a recommendation has a negative impact on SAP, we should remove it - this seems to have become a
         #       possibility with heating system
diff --git a/etl/bill_savings/data_collection.py b/etl/bill_savings/data_collection.py
index 15a52663..df95f8e2 100644
--- a/etl/bill_savings/data_collection.py
+++ b/etl/bill_savings/data_collection.py
@@ -131,7 +131,6 @@ def app():
     sample_size = 500
 
     energy_consumption_data = []
-    cavity_walls_data = []
     for i, directory in tqdm(enumerate(epc_directories), total=len(epc_directories)):
 
         # Skip the first 50
diff --git a/etl/testing_data/bills_model_testing.py b/etl/testing_data/bills_model_testing.py
index 0c9bb06d..c10bbd8a 100644
--- a/etl/testing_data/bills_model_testing.py
+++ b/etl/testing_data/bills_model_testing.py
@@ -58,3 +58,208 @@ def app():
         "budget": None,
     }
     print(body)
+
+
+# This is some temp code, which is for diagnosing the issues with the bills models
+heating_training_data_filepath = "sap_change_model/2024-08-06-11-19-49/dataset_rooms.parquet"
+
+# For the heating model:
+heating_drop_columns = [
+    "sap_ending", "heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending",
+    "lighting_cost_ending", "hot_water_cost_ending",
+    # "days_to_ending", "days_to_starting",  # TODO This is in the live version
+    'number_habitable_rooms_starting', 'number_habitable_rooms_ending', 'number_heated_rooms_starting',
+    'number_heated_rooms_ending',
+    'number_habitable_rooms', 'number_heated_rooms'
+]
+
+heating_response = "heating_cost_ending"
+
+# for the hot water model (older dataset)
+hot_water_training_data_filepath = "sap_change_model/2024-07-10-20-28-54/dataset_rooms.parquet"
+
+hot_water_drop_columns = [
+    "sap_ending", "heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending",
+    "lighting_cost_ending", "heating_cost_ending",
+    "days_to_starting", "days_to_ending",
+    'number_habitable_rooms_starting', 'number_habitable_rooms_ending', 'number_heated_rooms_starting',
+    'number_heated_rooms_ending',
+    'number_habitable_rooms', 'number_heated_rooms'
+]
+
+# Diagnose heating
+from utils.s3 import read_dataframe_from_s3_parquet
+
+train = read_dataframe_from_s3_parquet(
+    bucket_name="retrofit-data-dev",
+    file_key=heating_training_data_filepath
+)
+
+# Drop the columns that aren't used
+train = train.drop(columns=heating_drop_columns)
+
+# if the value is postive, it means the ending cost is bigger than the starting (which means it got more expensive)
+train["cost_diference"] = (train["heating_cost_ending"] - train["heating_cost_starting"])
+change_direction = train["cost_diference"] > 0
+change_direction.value_counts(normalize=True)
+
+average_costs_by_time_starting = train.groupby(
+    ["lodgement_year_starting", "lodgement_month_starting"]
+)["heating_cost_starting"].mean().reset_index().sort_values(["lodgement_year_starting", "lodgement_month_starting"])
+
+average_costs_by_time_ending = train.groupby(
+    ["lodgement_year_ending", "lodgement_month_ending"]
+)["heating_cost_ending"].mean().reset_index().sort_values(["lodgement_year_ending", "lodgement_month_ending"])
+
+# Check by photo supply values - if the property is gas, solar panels won't have an affect on the heating or hot
+# water so let's look for electric homes
+# Across the entire dataset, there is no correlation
+# Even for electric properties, there is no correlation
+photo_supply_averages = train[
+    train["fuel_type_ending"] == "electricity"
+    ].groupby(["photo_supply_ending"])["heating_cost_ending"].mean().reset_index()
+
+photo_supply_to_size = train.groupby("photo_supply_ending")["total_floor_area_ending"].mean().reset_index()
+photo_supply_to_size[["photo_supply_ending", "total_floor_area_ending"]].corr()
+train[["total_floor_area_ending", "heating_cost_ending"]].corr()
+# Bigger properties end up with smaller photo_supply values. This will be because the array size likely remains fairly
+# consistent but takes up a smaller proportion of the roof. Typically, the bigger the floor area, the higher the heating
+# costs, but bigger units also have smaller photo_supply
+adding_solar = train[
+    (train["photo_supply_ending"] > 0) & (train["photo_supply_starting"] == 0)
+    ]
+is_positive = (adding_solar["cost_diference"] > 0)
+is_positive.value_counts(normalize=True)
+
+photo_supply_by_time = (
+    train[
+        train["fuel_type_ending"] == "electricity"
+        ].groupby(
+        ["lodgement_year_ending", "photo_supply_ending"]
+    )["heating_cost_ending"].mean().reset_index().sort_values(
+        ["lodgement_year_ending", "photo_supply_ending"], ascending=True)
+)
+# Plot
+photo_supply_by_time[["photo_supply_ending", "heating_cost_ending"]].corr()
+photo_supply_by_time.plot()
+
+# Observations
+# 1) We retain all of the potential columns, however they are just based on the starting EPC
+# 2) 21% of the the time, the ending heating cost is more than the starting but this is clearly a minority
+# 3) Let's get ride of estimated perimeter starting and ending
+
+# Things I should check
+# 1) Do we updated the lodgment_year_ending and lodgement_month_ending
+# 2) Should we adjust costs to now, as well as lodgement_dates to today? Since 2023, costs have increased a lot so
+#    any savings should be benchmarked against what a customer is paying now
+# 3) It might make sense to create a feature between floor area and photo supply, to give a more consistent estimate
+#    of a panel size for the property
+
+# Get an example and score with the models
+example = train[
+    (train["photo_supply_starting"] == 0) &
+    (train["photo_supply_ending"] > 0) &
+    (train["heating_cost_starting"] > train["heating_cost_ending"])
+    ].sample(1)
+
+# example["lodgement_month_starting"]
+# example["lodgement_year_starting"]
+# example["lodgement_month_ending"]
+# example["lodgement_year_ending"].values[0]
+#
+# example["lodgement_year_ending"] = 2023
+# example["days_to_ending"] = 3500
+# example["days_to_starting"]
+
+# {'heating_cost_predictions':    predictions
+# 0        378.5}
+resp = model_api.predict_all(
+    df=example,
+    bucket="retrofit-data-dev",
+    prediction_buckets=get_prediction_buckets(),
+    model_prefixes=["heating_cost_predictions"],
+    extract_ids=False
+)
+
+# Step 1: get a cost for today
+p.create_base_difference_epc_record(cleaned)
+cwi_impact = p.base_difference_record.df.copy()
+for k in property_recommendations[0][0]["simulation_config"]:
+    cwi_impact[k] = property_recommendations[0][0]["simulation_config"][k]
+
+# 2212.4 - Baseline
+today = model_api.predict_all(
+    df=p.base_difference_record.df.copy(),
+    bucket="retrofit-data-dev",
+    prediction_buckets=get_prediction_buckets(),
+    model_prefixes=["heating_cost_predictions"],
+    extract_ids=False
+)
+
+# impact of CWI - 1908
+cwi_response = model_api.predict_all(
+    df=cwi_impact,
+    bucket="retrofit-data-dev",
+    prediction_buckets=get_prediction_buckets(),
+    model_prefixes=["heating_cost_predictions"],
+    extract_ids=False
+)
+
+pv_impact = cwi_impact.copy()
+pv_impact["photo_supply_ending"] = 50
+pv_impact["heating_cost_starting"] = 2212.4
+
+pv_response = model_api.predict_all(
+    df=pv_impact,
+    bucket="retrofit-data-dev",
+    prediction_buckets=get_prediction_buckets(),
+    model_prefixes=["heating_cost_predictions"],
+    extract_ids=False
+)
+
+# Testing kwh for vde
+base_prediction = model_api.predict_all(
+    df=epcs_for_scoring,
+    bucket=get_settings().DATA_BUCKET,
+    prediction_buckets=get_prediction_buckets(),
+    model_prefixes=["heating_kwh_predictions"],
+    extract_ids=False
+)
+
+cwi_epc = epcs_for_scoring.copy()
+cwi_epc["walls-description"] = "Cavity wall, filled cavity"
+cwi_epc["walls-energy-eff"] = "Good"
+cwi_epc["heating-cost-current"] = 1650
+cwi_epc["current-energy-efficiency"] = 72
+cwi_epc["current-energy-rating"] = "C"
+cwi_epc["co2-emissions-current"] = 3.7
+cwi_epc["energy-consumption-current"] = 121
+cwi_epc["co2-emiss-curr-per-floor-area"] = 19
+cwi_epc["photo-supply"] = 0
+# cwi_epc["energy-consumption-current"] =
+# cwi_epc["roof-description"] = "Pitched, 300 mm loft insulation"
+# cwi_epc["roof-energy-eff"] = "Very Good"
+# cwi_epc["heating-cost-current"] = 1264
+
+# "heating-cost-current": rec_impact["epc_heating_cost"],
+#                     "hot-water-cost-current": rec_impact["epc_hot_water_cost"],
+#                     # CO₂ emissions per square metre floor area per year in kg/m². Since CO₂ emissions are in tonnes
+#                     # per year, we multiply by 1000 to get kg/m²
+#                     "co2-emiss-curr-per-floor-area": round(
+#                         1000 * (rec_impact["carbon"] / self.data["total-floor-area"])
+#                     ),
+#                     "co2-emissions-current": rec_impact["carbon"],
+#                     "current-energy-rating": sap_to_epc(rec_impact["sap"]),
+#                     "current-energy-efficiency": int(np.floor(rec_impact["sap"])),
+#                     "energy-consumption-current": rec_impact["heat_demand"],
+#                     "lighting-cost-current": rec_impact["epc_lighting_cost"],
+#                     "id": "+".join([str(self.id), rec_id])
+
+cwi_prediction = model_api.predict_all(
+    df=cwi_epc,
+    bucket=get_settings().DATA_BUCKET,
+    prediction_buckets=get_prediction_buckets(),
+    model_prefixes=["heating_kwh_predictions"],
+    extract_ids=False
+)
+2344 - 2060
diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py
index 92451d76..f5394abf 100644
--- a/etl/xml_survey_extraction/app.py
+++ b/etl/xml_survey_extraction/app.py
@@ -166,6 +166,7 @@ def main():
         # For each property, we download the xmls and extract the data
         database_data = []
         for uprn, xmls in assessments_map.items():
+            
             extracted_data = {}
             for xml in xmls:
                 xml_data = read_from_s3(bucket_name=BUCKET, s3_file_name=xml)
diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index 3e7ede28..9456519a 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -160,7 +160,7 @@ class SolarPvRecommendations:
         if not non_invasive_recommendation["suitable"]:
             return
 
-        if non_invasive_recommendation:
+        if non_invasive_recommendation.get("array_wattage") is not None:
 
             roof_area = esimtate_pitched_roof_area(
                 floor_area=self.property.insulation_floor_area, floor_height=self.property.data["floor-height"]
@@ -186,7 +186,7 @@ class SolarPvRecommendations:
                 cost_result = self.costs.solar_pv(
                     wattage=recommendation_config["array_wattage"],
                     has_battery=has_battery,
-                    array_cost=non_invasive_recommendation["cost"] if non_invasive_recommendation else None
+                    array_cost=non_invasive_recommendation.get("cost", None)
                 )
                 kw = np.floor(recommendation_config["array_wattage"] / 100) / 10
                 if has_battery:

From 935cfb24cf06def4fcffd586b0f87603137b9ebf Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 9 Aug 2024 08:52:34 +0100
Subject: [PATCH 074/182] testing out the new model - more reasonable behaviour

---
 backend/Property.py                     | 26 ++--------
 backend/app/plan/router.py              |  5 +-
 backend/ml_models/api.py                | 12 ++---
 etl/bill_savings/data_collection.py     |  4 +-
 etl/testing_data/bills_model_testing.py | 46 ++++++++++++-----
 recommendations/Recommendations.py      | 65 ++-----------------------
 6 files changed, 51 insertions(+), 107 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 45c7b3e5..497d976a 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -384,8 +384,9 @@ class Property:
                 types = [x["type"] for x in previous_phase_representatives]
                 if "external_wall_insulation" in types and "internal_wall_insulation" in types:
                     raise Exception("We shouldn't have this in the representative recommendations")
+                # We include previous phases + the recommendation itself in the EPC transformations
                 epc_transformations = [
-                    x["description_simulation"] for x in previous_phase_representatives
+                    x["description_simulation"] for x in previous_phase_representatives + [rec]
                 ]
 
                 # It is possible that we could have two simulations applied to the same descriptions
@@ -439,8 +440,6 @@ class Property:
 
             sim_epc.update(
                 {
-                    "heating-cost-current": rec_impact["epc_heating_cost"],
-                    "hot-water-cost-current": rec_impact["epc_hot_water_cost"],
                     # CO₂ emissions per square metre floor area per year in kg/m². Since CO₂ emissions are in tonnes
                     # per year, we multiply by 1000 to get kg/m²
                     "co2-emiss-curr-per-floor-area": round(
@@ -450,7 +449,6 @@ class Property:
                     "current-energy-rating": sap_to_epc(rec_impact["sap"]),
                     "current-energy-efficiency": int(np.floor(rec_impact["sap"])),
                     "energy-consumption-current": rec_impact["heat_demand"],
-                    "lighting-cost-current": rec_impact["epc_lighting_cost"],
                     "id": "+".join([str(self.id), rec_id])
                 }
             )
@@ -594,8 +592,7 @@ class Property:
         Given the cleaning that has been performed, we'll use this to identify the property
         components, from roof to walls to windows, heating and hot water
         :param cleaned: This is the dictionary of components found in cleaner.cleaned
-        :param energy_consumption_client: Contains the heating and hot water kwh models - used to predict current
-                                        energy annual consumption in kWh
+        :param energy_consumption_client: The client that will be used to convert the energy costs to today's costs
         :param kwh_predictions: Contains the kwh predictions for heating and hot water
         :return:
         """
@@ -686,14 +683,6 @@ class Property:
         # 2) Predicted KwH
 
         # Today's costs
-        todays_heating_cost = energy_consumption_client.convert_cost_to_today(
-            original_cost=float(self.data["heating-cost-current"]),
-            lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"]).tz_localize(None)
-        )
-        todays_hot_water_cost = energy_consumption_client.convert_cost_to_today(
-            original_cost=float(self.data["hot-water-cost-current"]),
-            lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"]).tz_localize(None)
-        )
         todays_lighting_cost = energy_consumption_client.convert_cost_to_today(
             original_cost=float(self.data["lighting-cost-current"]),
             lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"]).tz_localize(None)
@@ -702,15 +691,6 @@ class Property:
         # If we have the kwh figures, we don't need to predict them
         condition_data = self.energy_assessment_condition_data.copy()
 
-        # scoring_df = pd.DataFrame([self.epc_record.prepared_epc])
-        # # Change columns from underscores to hyphens
-        # scoring_df.columns = [
-        #     x.lower().replace("_", "-") for x in scoring_df.columns
-        # ]
-        # for col in ["heating_kwh", "hot_water_kwh"]:
-        #     scoring_df[col] = None
-        #
-        # energy_consumption_client.data = None
         heating_kwh_predictions = kwh_predictions["heating_kwh_predictions"]
         hotwater_kwh_predictions = kwh_predictions["hotwater_kwh_predictions"]
 
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 05f8f88f..02e669a5 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -588,7 +588,7 @@ async def trigger_plan(body: PlanTriggerRequest):
                 raise Exception("Missed setting of spatial data for a property")
             p.get_components(
                 cleaned=cleaned,
-                energy_consumption_client=energy_consumption_client,  # TODO: Full remove me
+                energy_consumption_client=energy_consumption_client,
                 kwh_predictions=kwh_predictions
             )
 
@@ -799,7 +799,6 @@ async def trigger_plan(body: PlanTriggerRequest):
         logger.info("Optimising recommendations")
         scoring_epcs = []  # For scoring the kwh models
         for property_id in recommendations.keys():
-
             property_instance = [p for p in input_properties if p.id == property_id][0]
 
             recommendations_with_impact, impact_summary = (
@@ -880,6 +879,8 @@ async def trigger_plan(body: PlanTriggerRequest):
             extract_ids=True
         )
 
+        # TODO: Costing model, which should include today's costs!
+
         # We now insert into the recommendations
         for property_id in recommendations.keys():
             property_recommendations = recommendations[property_id]
diff --git a/backend/ml_models/api.py b/backend/ml_models/api.py
index 0de7977f..c401e0f4 100644
--- a/backend/ml_models/api.py
+++ b/backend/ml_models/api.py
@@ -12,20 +12,20 @@ class ModelApi:
         "sap_change_predictions",
         "heat_demand_predictions",
         "carbon_change_predictions",
-        "lighting_cost_predictions",
-        "heating_cost_predictions",
-        "hot_water_cost_predictions",
+        # "lighting_cost_predictions",
+        # "heating_cost_predictions",
+        # "hot_water_cost_predictions",
     ]
 
     MODEL_URLS = {
         "sap_change_predictions": "sapmodel",
         "heat_demand_predictions": "heatmodel",
         "carbon_change_predictions": "carbonmodel",
-        "lighting_cost_predictions": "lightingmodel",
-        "heating_cost_predictions": "heatingmodel",
-        "hot_water_cost_predictions": "hotwatermodel",
         "hotwater_kwh_predictions": "hotwaterkwhmodel",
         "heating_kwh_predictions": "heatingkwhmodel",
+        # "lighting_cost_predictions": "lightingmodel",
+        # "heating_cost_predictions": "heatingmodel",
+        # "hot_water_cost_predictions": "hotwatermodel",
     }
 
     def __init__(
diff --git a/etl/bill_savings/data_collection.py b/etl/bill_savings/data_collection.py
index df95f8e2..0341b885 100644
--- a/etl/bill_savings/data_collection.py
+++ b/etl/bill_savings/data_collection.py
@@ -134,8 +134,8 @@ def app():
     for i, directory in tqdm(enumerate(epc_directories), total=len(epc_directories)):
 
         # Skip the first 50
-        # if i < 57:
-        #     continue
+        if i < 18:
+            continue
 
         data = pd.read_csv(directory / "certificates.csv", low_memory=False)
         # Rename the columns to the same format as the api returns
diff --git a/etl/testing_data/bills_model_testing.py b/etl/testing_data/bills_model_testing.py
index c10bbd8a..ea13f796 100644
--- a/etl/testing_data/bills_model_testing.py
+++ b/etl/testing_data/bills_model_testing.py
@@ -226,16 +226,18 @@ base_prediction = model_api.predict_all(
     extract_ids=False
 )
 
-cwi_epc = epcs_for_scoring.copy()
-cwi_epc["walls-description"] = "Cavity wall, filled cavity"
-cwi_epc["walls-energy-eff"] = "Good"
-cwi_epc["heating-cost-current"] = 1650
-cwi_epc["current-energy-efficiency"] = 72
-cwi_epc["current-energy-rating"] = "C"
-cwi_epc["co2-emissions-current"] = 3.7
-cwi_epc["energy-consumption-current"] = 121
-cwi_epc["co2-emiss-curr-per-floor-area"] = 19
-cwi_epc["photo-supply"] = 0
+cwi_epc = pd.DataFrame([property_scoring_epcs[1].copy()])
+cwi_epc = add_features_from_code(cwi_epc)
+cwi_epc = add_estimate_annual_kwh(cwi_epc)
+# cwi_epc["walls-description"] = "Cavity wall, filled cavity"
+# cwi_epc["walls-energy-eff"] = "Good"
+# cwi_epc["heating-cost-current"] = 1650
+# cwi_epc["current-energy-efficiency"] = 72
+# cwi_epc["current-energy-rating"] = "C"
+# cwi_epc["co2-emissions-current"] = 3.7
+# cwi_epc["energy-consumption-current"] = 121
+# cwi_epc["co2-emiss-curr-per-floor-area"] = 19
+# cwi_epc["photo-supply"] = 0
 # cwi_epc["energy-consumption-current"] =
 # cwi_epc["roof-description"] = "Pitched, 300 mm loft insulation"
 # cwi_epc["roof-energy-eff"] = "Very Good"
@@ -259,7 +261,27 @@ cwi_prediction = model_api.predict_all(
     df=cwi_epc,
     bucket=get_settings().DATA_BUCKET,
     prediction_buckets=get_prediction_buckets(),
-    model_prefixes=["heating_kwh_predictions"],
+    model_prefixes=["heating_kwh_predictions", "hotwater_kwh_predictions"],
     extract_ids=False
 )
-2344 - 2060
+
+# 77 perryn
+starting_heating = 19837.2
+starting_hot_water = 2974.1
+
+ending_heating = 17041.1
+ending_hot_water = 2735.3
+
+# 44 lindlings
+starting_heating = 13327.1
+starting_hot_water = 2349.5
+
+ending_heating = 9672.3
+ending_hot_water = 2030.2
+
+ending_heating = 8695.1
+ending_hot_water = 2437.0
+
+heating_impact = starting_heating - ending_heating
+hot_water_impact = starting_hot_water - ending_hot_water
+total_impact = heating_impact + hot_water_impact
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 9d709639..b8174ae0 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -379,26 +379,13 @@ class Recommendations:
         property_predictions = {
             prefix + "_predictions": all_predictions[prefix + "_predictions"][
                 all_predictions[prefix + "_predictions"]["property_id"] == str(property_instance.id)
-                ].copy() for prefix in [
-                "sap_change", "heat_demand", "carbon_change", "lighting_cost", "heating_cost", "hot_water_cost"
-            ]
+                ].copy() for prefix in ["sap_change", "heat_demand", "carbon_change"]
         }
 
-        # We apply adjustments to each of the heating costs
-        for prefix in ["lighting_cost", "heating_cost", "hot_water_cost"]:
-            property_predictions[f"{prefix}_predictions"]["adjusted_cost"] = (
-                property_predictions[f"{prefix}_predictions"]["predictions"].apply(
-                    lambda x: AnnualBillSavings.adjust_energy_to_metered(
-                        x, current_epc_rating=property_instance.data["current-energy-rating"]
-                    )
-                )
-            )
-
         property_recommendations = recommendations[property_instance.id].copy()
 
-        # TODO: should fabric upgrades have an impact on hot water costs/kwh?
-        # TODO: Generally, the costing models are just increasing. Maybe they're including something in the model
-        #       that they shouldn't e.g. SAP, carbon, heat demand etc?
+        increasing_variables = ["sap"]
+        decreasing_variables = ["carbon", "heat_demand"]
 
         impact_summary = []
         for recommendations_by_type in property_recommendations:
@@ -414,14 +401,6 @@ class Recommendations:
                         )]["predictions"].values[0] for prefix in ["sap_change", "heat_demand", "carbon_change"]
                 }
 
-                # For phase costs, we need adusted and unadjusted values
-                phase_cost = {
-                    prefix: property_predictions[prefix + "_predictions"][
-                        property_predictions[prefix + "_predictions"]["recommendation_id"] ==
-                        str(rec["recommendation_id"])
-                        ] for prefix in ["lighting_cost", "heating_cost", "hot_water_cost"]
-                }
-
                 # We structure this so that depending on the phase, we capture the previous phase impacts and
                 # then just have one piece of code to calculate the difference
                 if rec["phase"] == 0:
@@ -433,9 +412,6 @@ class Recommendations:
                         "sap": float(property_instance.data["current-energy-efficiency"]),
                         "carbon": float(property_instance.data["co2-emissions-current"]),
                         "heat_demand": float(property_instance.data["energy-consumption-current"]),
-                        "epc_heating_cost": float(property_instance.data["heating-cost-current"]),
-                        "epc_hot_water_cost": float(property_instance.data["hot-water-cost-current"]),
-                        "epc_lighting_cost": float(property_instance.data["lighting-cost-current"])
                     }
 
                 else:
@@ -463,26 +439,6 @@ class Recommendations:
                     "heat_demand": phase_energy_efficiency_metrics["heat_demand"],
                 }
 
-                static_cost_variables = (
-                    ["epc_heating_cost", "epc_hot_water_cost"] if
-                    rec["type"] == "low_energy_lighting" else ["epc_lighting_cost"]
-                )
-                dynamic_cost_variables = [
-                    v for v in ["epc_heating_cost", "epc_hot_water_cost", "epc_lighting_cost"]
-                    if v not in static_cost_variables
-                ]
-                # Take the static variables from the previous phase
-                current_phase_costs = {k: v for k, v in previous_phase_values.items() if k in static_cost_variables}
-                # Insert the dynamic variables from the current phase
-                for v in dynamic_cost_variables:
-                    current_phase_costs[v] = phase_cost[v.split("epc_")[1]]["adjusted_cost"].values[0]
-
-                current_phase_values.update(current_phase_costs)
-
-                increasing_variables = ["sap"]
-                decreasing_variables = [
-                    "carbon", "heat_demand", "epc_heating_cost", "epc_hot_water_cost", "epc_lighting_cost"
-                ]
                 # For increasing variables, the new value needs to be higher than the previous, otherwise we set it to
                 # the previous
                 # For decreasing variables, the new value should be lower than the previous, otherwise we set it to
@@ -507,21 +463,6 @@ class Recommendations:
                     "carbon": previous_phase_values["carbon"] - current_phase_values["carbon"],
                     # Decreasing
                     "heat_demand": previous_phase_values["heat_demand"] - current_phase_values["heat_demand"],
-                    # Decreasing
-                    "epc_heating_cost": (
-                        previous_phase_values["epc_heating_cost"] -
-                        current_phase_values["epc_heating_cost"]
-                    ),
-                    # Decreasing
-                    "epc_hot_water_cost": (
-                        previous_phase_values["epc_hot_water_cost"] -
-                        current_phase_values["epc_hot_water_cost"]
-                    ),
-                    # Decreasing
-                    "epc_lighting_cost": (
-                        previous_phase_values["epc_lighting_cost"] -
-                        current_phase_values["epc_lighting_cost"]
-                    )
                 }
 
                 # Prevent from being negative

From 66d2a401e8962cf692a3d8a84d63cc33ccff87ae Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 9 Aug 2024 10:26:25 +0100
Subject: [PATCH 075/182] tidied up kwh model data

---
 backend/app/plan/router.py    | 35 +++++-----------------------
 etl/bill_savings/KwhData.py   |  0
 etl/spatial/OpenUprnClient.py | 44 ++++++++++++++++++++++++++++++++++-
 3 files changed, 49 insertions(+), 30 deletions(-)
 create mode 100644 etl/bill_savings/KwhData.py

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 02e669a5..9562af86 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -20,7 +20,7 @@ from backend.app.db.functions.property_functions import (
     update_or_create_property_spatial_details
 )
 from backend.app.db.functions.recommendations_functions import (
-    create_plan, create_plan_recommendations, upload_recommendations, create_scenario
+    create_plan, upload_recommendations, create_scenario
 )
 from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn
 from backend.app.db.models.portfolio import rating_lookup
@@ -32,7 +32,6 @@ from backend.app.utils import epc_to_sap_lower_bound, sap_to_epc
 from backend.ml_models.api import ModelApi
 from backend.Property import Property
 from backend.apis.GoogleSolarApi import GoogleSolarApi
-from etl.solar.SolarPhotoSupply import SolarPhotoSupply
 
 from recommendations.optimiser.CostOptimiser import CostOptimiser
 from recommendations.optimiser.GainOptimiser import GainOptimiser
@@ -42,7 +41,10 @@ from recommendations.Mds import Mds
 from utils.logger import setup_logger
 from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3
 from backend.ml_models.Valuation import PropertyValuation
+
 from etl.bill_savings.EnergyConsumptionModel import EnergyConsumptionModel
+from etl.spatial.OpenUprnClient import OpenUprnClient
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
 
 logger = setup_logger()
 
@@ -414,9 +416,6 @@ async def trigger_plan(body: PlanTriggerRequest):
         materials = get_materials(session)
         cleaned = get_cleaned()
 
-        uprn_filenames = read_dataframe_from_s3_parquet(
-            bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet"
-        )
         solar_api_client = GoogleSolarApi(api_key=get_settings().GOOGLE_SOLAR_API_KEY)
 
         dataset_version = "2024-07-08"
@@ -559,33 +558,11 @@ async def trigger_plan(body: PlanTriggerRequest):
             extract_ids=False
         )
 
-        # TODO: Move this/tidy it up
-        uprn_map = {}
-        for uprn in [p.uprn for p in input_properties]:
-            filtered_df = uprn_filenames[
-                (uprn_filenames["lower"] <= int(uprn))
-                & (uprn_filenames["upper"] >= int(uprn))
-                ]
-            if filtered_df["filenames"].values[0] in uprn_map:
-                uprn_map[filtered_df["filenames"].values[0]].append(int(uprn))
-            else:
-                uprn_map[filtered_df["filenames"].values[0]] = [int(uprn)]
-
-        for filename, associated_uprn in tqdm(uprn_map.items(), total=len(uprn_map)):
-            # Read in the file
-            spatial_data = read_dataframe_from_s3_parquet(
-                bucket_name="retrofit-data-dev", file_key=f"spatial/{filename}"
-            )
-
-            spatial_df = spatial_data[spatial_data["UPRN"].isin(associated_uprn)]
-            for p in input_properties:
-                if p.uprn in associated_uprn:
-                    p.set_spatial(spatial_df[spatial_df["UPRN"] == p.uprn])
+        # Insert the spatial data
+        input_properties = OpenUprnClient.set_spatial_data(input_properties, bucket_name=get_settings().DATA_BUCKET)
 
         logger.info("Getting spatial data")
         for p in tqdm(input_properties):
-            if p.spatial is None:
-                raise Exception("Missed setting of spatial data for a property")
             p.get_components(
                 cleaned=cleaned,
                 energy_consumption_client=energy_consumption_client,
diff --git a/etl/bill_savings/KwhData.py b/etl/bill_savings/KwhData.py
new file mode 100644
index 00000000..e69de29b
diff --git a/etl/spatial/OpenUprnClient.py b/etl/spatial/OpenUprnClient.py
index 7392c4ac..198f9945 100644
--- a/etl/spatial/OpenUprnClient.py
+++ b/etl/spatial/OpenUprnClient.py
@@ -3,7 +3,8 @@ from tqdm import tqdm
 import pandas as pd
 import geopandas as gpd
 from utils.logger import setup_logger
-from utils.s3 import read_io_from_s3, save_dataframe_to_s3_parquet
+from utils.s3 import read_io_from_s3, save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet
+from backend.Property import Property
 
 logger = setup_logger()
 
@@ -116,3 +117,44 @@ class OpenUprnClient:
             file_key=file_key,
             bucket_name=bucket_name
         )
+
+    @staticmethod
+    def set_spatial_data(input_properties: list[Property], bucket_name):
+        """
+        Given a list of properties, this method will set the spatial data for each property
+        The method will look for the minimal set of uprn datasets that it needs to read in to get all of the spatial
+        data for the properties
+        """
+
+        uprn_filenames = read_dataframe_from_s3_parquet(
+            bucket_name=bucket_name, file_key="spatial/filename_meta.parquet"
+        )
+
+        uprn_map = {}
+        for uprn in [p.uprn for p in input_properties]:
+            filtered_df = uprn_filenames[
+                (uprn_filenames["lower"] <= int(uprn))
+                & (uprn_filenames["upper"] >= int(uprn))
+                ]
+            if filtered_df["filenames"].values[0] in uprn_map:
+                uprn_map[filtered_df["filenames"].values[0]].append(int(uprn))
+            else:
+                uprn_map[filtered_df["filenames"].values[0]] = [int(uprn)]
+
+        for filename, associated_uprn in tqdm(uprn_map.items(), total=len(uprn_map)):
+            # Read in the file
+            spatial_data = read_dataframe_from_s3_parquet(
+                bucket_name="retrofit-data-dev", file_key=f"spatial/{filename}"
+            )
+
+            spatial_df = spatial_data[spatial_data["UPRN"].isin(associated_uprn)]
+            for p in input_properties:
+                if p.uprn in associated_uprn:
+                    p.set_spatial(spatial_df[spatial_df["UPRN"] == p.uprn])
+
+        # Perform a final check to ensure that all properties have spatial data
+        for p in input_properties:
+            if p.spatial is None:
+                raise Exception(f"Property with UPRN {p.uprn} does not have spatial data")
+
+        return input_properties

From 3002a2c740d6cebc59fd337ef5d4a48032c6433c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 9 Aug 2024 10:38:40 +0100
Subject: [PATCH 076/182] created KwhData class

---
 etl/bill_savings/KwhData.py        | 118 +++++++++++++++++++++++++++++
 etl/bill_savings/data_combining.py |  98 +-----------------------
 2 files changed, 121 insertions(+), 95 deletions(-)

diff --git a/etl/bill_savings/KwhData.py b/etl/bill_savings/KwhData.py
index e69de29b..ad7a375a 100644
--- a/etl/bill_savings/KwhData.py
+++ b/etl/bill_savings/KwhData.py
@@ -0,0 +1,118 @@
+import re
+import pandas as pd
+from datetime import datetime
+from tqdm import tqdm
+from utils.logger import setup_logger
+from utils.s3 import list_files_in_s3_folder, read_pickle_from_s3, save_dataframe_to_s3_parquet
+
+logger = setup_logger()
+
+
+class KwhData:
+    COLS_TO_STRINGIFY = ["main-heating-controls", "floor-level"]
+
+    def __init__(self, bucket):
+        self.run_date = datetime.now().strftime("%Y-%m-%d")
+        self.bucket = bucket
+        self.data = None
+
+        self.consumption_data_filepath = None
+        self.consumption_averages_filepath = None
+
+    @staticmethod
+    def extract_kwh_value(text: str):
+        """
+        Extract the numerical kWh value from a given string.
+
+        :param text: The input string containing the kWh value.
+        :return: The extracted numerical kWh value as an integer.
+        """
+        # Use regular expression to find the numerical value followed by "kWh per year"
+        match = re.search(r'([\d,]+) kWh per year', text)
+
+        if match:
+            # Remove commas from the extracted value and convert to integer
+            kwh_value = int(match.group(1).replace(',', ''))
+            return kwh_value
+        else:
+            # If no match is found, return None or raise an exception
+            return None
+
+    def combine(self):
+        """
+        Given the data that is collected containing the kwh values for heating and hot water, this method will combine
+        and save the data
+        :return:
+        """
+
+        # Firstly, list all of the saved files in s3
+        data_files = list_files_in_s3_folder(bucket_name="retrofit-datalake-dev", folder_name="energy_consumption_data")
+
+        complete_data = []
+        for files in tqdm(data_files):
+            dataset_run_date = files.split("/")[-1].split(".")[0]
+            # Extract the date from the file name
+            dataset_run_date = pd.Timestamp(dataset_run_date)
+
+            # Load the data from the file
+            data = read_pickle_from_s3(bucket_name="retrofit-datalake-dev", s3_file_name=files)
+
+            # We check that the retrieved energy consumption sufficiently matches the EPC data
+            internal_dataset = []
+            for x in data:
+                epc_data = x["epc"]
+                epc_sap = epc_data["current-energy-efficiency"]
+                epc_potential_sap = epc_data["potential-energy-efficiency"]
+                # Make sure this matches the extracted sap
+                if int(epc_sap) != int(x["current_epc_efficiency"]) or int(epc_potential_sap) != int(
+                    x["potential_epc_efficiency"]
+                ):
+                    continue
+
+                heating_kwh = self.extract_kwh_value(x["heating_text"])
+                hot_water_kwh = self.extract_kwh_value(x["hot_water_text"])
+                internal_dataset.append(
+                    {
+                        **epc_data,
+                        "heating_kwh": heating_kwh,
+                        "hot_water_kwh": hot_water_kwh,
+                        "dataset_run_date": dataset_run_date
+                    }
+                )
+
+            complete_data.extend(internal_dataset)
+
+        df = pd.DataFrame(complete_data)
+        # Because we collate multiple runs into a single data source, it's possible that we have duplicated data at
+        # the uprn level, so we dedupe based on the newest dataset_run_date
+
+        df = df.sort_values("dataset_run_date", ascending=False).drop_duplicates(subset="uprn", keep="first")
+        df = df.drop(columns=["dataset_run_date"])
+
+        for col in self.COLS_TO_STRINGIFY:
+            df[col] = df[col].astype(str)
+
+        # Save the data back to s3, but this time as a parquet file
+        self.consumption_data_filepath = f"energy_consumption/{self.run_date}/energy_consumption_dataset.parquet"
+        logger.info(f"Storing energy consumption dataset in s3 at {self.consumption_data_filepath}")
+        save_dataframe_to_s3_parquet(
+            bucket_name=self.bucket,
+            file_key=self.consumption_data_filepath,
+            df=df
+        )
+
+        # We also estimate the energy consumption reduction from this data, by band
+        df["total_consumption"] = df["heating_kwh"] + df["hot_water_kwh"]
+        consumption_averages = df.groupby("current-energy-efficiency")["total_consumption"].mean().reset_index()
+        df = df.drop(columns=["total_consumption"])
+
+        self.consumption_averages_filepath = f"energy_consumption/{self.run_date}/consumption_averages.parquet"
+        logger.info(f"Storing consumption averages in s3 at {self.consumption_averages_filepath}")
+        # Save the consumption averages back to s3
+        save_dataframe_to_s3_parquet(
+            bucket_name="retrofit-data-dev",
+            file_key=self.consumption_averages_filepath,
+            df=consumption_averages
+        )
+
+        self.data = df
diff --git a/etl/bill_savings/data_combining.py b/etl/bill_savings/data_combining.py
index dece3834..970c92bf 100644
--- a/etl/bill_savings/data_combining.py
+++ b/etl/bill_savings/data_combining.py
@@ -1,32 +1,4 @@
-import re
-from datetime import datetime
-from tqdm import tqdm
-
-import pandas as pd
-
-from utils.s3 import list_files_in_s3_folder, read_pickle_from_s3, save_dataframe_to_s3_parquet
-
-# These columns we co-erce to strings before saving
-PROBLEMATIC_COLUMNS = ["main-heating-controls", "floor-level"]
-
-
-def extract_kwh_value(text):
-    """
-    Extract the numerical kWh value from a given string.
-
-    :param text: The input string containing the kWh value.
-    :return: The extracted numerical kWh value as an integer.
-    """
-    # Use regular expression to find the numerical value followed by "kWh per year"
-    match = re.search(r'([\d,]+) kWh per year', text)
-
-    if match:
-        # Remove commas from the extracted value and convert to integer
-        kwh_value = int(match.group(1).replace(',', ''))
-        return kwh_value
-    else:
-        # If no match is found, return None or raise an exception
-        return None
+from etl.bill_savings.KwhData import KwhData
 
 
 def app():
@@ -36,69 +8,5 @@ def app():
     :return:
     """
 
-    # Firstly, list all of the saved files in s3
-    data_files = list_files_in_s3_folder(bucket_name="retrofit-datalake-dev", folder_name="energy_consumption_data")
-
-    run_date = datetime.now().strftime("%Y-%m-%d")
-
-    complete_data = []
-    for files in tqdm(data_files):
-        dataset_run_date = files.split("/")[-1].split(".")[0]
-        # Extract the date from the file name
-        dataset_run_date = pd.Timestamp(dataset_run_date)
-
-        # Load the data from the file
-        data = read_pickle_from_s3(bucket_name="retrofit-datalake-dev", s3_file_name=files)
-
-        # We check that the retrieved energy consumption sufficiently matches the EPC data
-        internal_dataset = []
-        for x in data:
-            epc_data = x["epc"]
-            epc_sap = epc_data["current-energy-efficiency"]
-            epc_potential_sap = epc_data["potential-energy-efficiency"]
-            # Make sure this matches the extracted sap
-            if int(epc_sap) != int(x["current_epc_efficiency"]) or int(epc_potential_sap) != int(
-                x["potential_epc_efficiency"]
-            ):
-                continue
-
-            heating_kwh = extract_kwh_value(x["heating_text"])
-            hot_water_kwh = extract_kwh_value(x["hot_water_text"])
-            internal_dataset.append(
-                {
-                    **epc_data,
-                    "heating_kwh": heating_kwh,
-                    "hot_water_kwh": hot_water_kwh,
-                    "dataset_run_date": dataset_run_date
-                }
-            )
-
-        complete_data.extend(internal_dataset)
-
-    df = pd.DataFrame(complete_data)
-    # Because we collate multiple runs into a single data source, it's possible that we have duplicated data at
-    # the uprn level, so we dedupe based on the newest dataset_run_date
-
-    df = df.sort_values("dataset_run_date", ascending=False).drop_duplicates(subset="uprn", keep="first")
-    df = df.drop(columns=["dataset_run_date"])
-
-    for col in PROBLEMATIC_COLUMNS:
-        df[col] = df[col].astype(str)
-
-    # Save the data back to s3, but this time as a parquet file
-    save_dataframe_to_s3_parquet(
-        bucket_name="retrofit-data-dev",
-        file_key=f"energy_consumption/{run_date}/energy_consumption_dataset.parquet",
-        df=df
-    )
-
-    # We also estimate the energy consumption reduction from this data, by band
-    df["total_consumption"] = df["heating_kwh"] + df["hot_water_kwh"]
-    consumption_averages = df.groupby("current-energy-efficiency")["total_consumption"].mean().reset_index()
-
-    # Save the consumption averages back to s3
-    save_dataframe_to_s3_parquet(
-        bucket_name="retrofit-data-dev",
-        file_key=f"energy_consumption/{run_date}/consumption_averages.parquet",
-        df=consumption_averages
-    )
+    kwh_data_client = KwhData(bucket="retrofit-datalake-dev")
+    kwh_data_client.combine()

From 73be979c29bc3f49572ef5953ae09bb319a7ac25 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 9 Aug 2024 11:07:16 +0100
Subject: [PATCH 077/182] created transform method

---
 etl/bill_savings/EnergyConsumptionModel.py    |   2 +-
 etl/bill_savings/KwhData.py                   | 100 ++++++++++++++++++
 etl/bill_savings/data_collection.py           |   2 +-
 etl/bill_savings/training.py                  |   2 +-
 .../{data_combining.py => training_data.py}   |  10 ++
 5 files changed, 113 insertions(+), 3 deletions(-)
 rename etl/bill_savings/{data_combining.py => training_data.py} (51%)

diff --git a/etl/bill_savings/EnergyConsumptionModel.py b/etl/bill_savings/EnergyConsumptionModel.py
index 01dcce7a..8aa0cbf8 100644
--- a/etl/bill_savings/EnergyConsumptionModel.py
+++ b/etl/bill_savings/EnergyConsumptionModel.py
@@ -586,7 +586,7 @@ class EnergyConsumptionModel:
 
     def estimate_new_consumption(self, current_energy_efficiency, target_efficiency, current_consumption):
         """
-        Given then consumption_averages dataset, which is produced as a result of the data_combining.py script,
+        Given then consumption_averages dataset, which is produced as a result of the training_data.py script,
         for the energy kwh models, this function will estimate the new consumption based on the current consumption,
         based on the expected reduction in consumption from the current rating to the target rating.
         :param current_energy_efficiency:
diff --git a/etl/bill_savings/KwhData.py b/etl/bill_savings/KwhData.py
index ad7a375a..3c68f33f 100644
--- a/etl/bill_savings/KwhData.py
+++ b/etl/bill_savings/KwhData.py
@@ -1,5 +1,6 @@
 import re
 import pandas as pd
+import numpy as np
 from datetime import datetime
 from tqdm import tqdm
 from utils.logger import setup_logger
@@ -11,6 +12,23 @@ logger = setup_logger()
 class KwhData:
     COLS_TO_STRINGIFY = ["main-heating-controls", "floor-level"]
 
+    CATEGORICAL_COLUMNS = [
+        "lodgement-year", "lodgement-month", "main-fuel", "mainheat-description", "number-heated-rooms",
+        "number-habitable-rooms", "mainheat-energy-eff", "mainheatcont-description", "property-type",
+        "built-form",
+        "construction-age-band", "secondheat-description", "hotwater-description", "hot-water-energy-eff",
+        "walls-description", "walls-energy-eff", "roof-description", "roof-energy-eff", "floor-description",
+        "county",
+        "windows-description", "windows-energy-eff", "flat-top-storey",
+        "flat-storey-count", "unheated-corridor-length", "solar-water-heating-flag", "mechanical-ventilation",
+        "low-energy-lighting", "environment-impact-current", "energy-tariff", "current-energy-rating"
+    ]
+
+    NUMERICAL_COLUMNS = [
+        'heating-cost-current', 'total-floor-area', 'co2-emissions-current', 'energy-consumption-current',
+        'heating-cost-potential', 'hot-water-cost-current', 'current-energy-efficiency'
+    ]
+
     def __init__(self, bucket):
         self.run_date = datetime.now().strftime("%Y-%m-%d")
         self.bucket = bucket
@@ -18,6 +36,7 @@ class KwhData:
 
         self.consumption_data_filepath = None
         self.consumption_averages_filepath = None
+        self.model_training_data_filepath = None
 
     @staticmethod
     def extract_kwh_value(text: str):
@@ -116,3 +135,84 @@ class KwhData:
         )
 
         self.data = df
+
+    def transform(
+        self, data: pd.DataFrame, cleaned, new=False, save=False
+    ):
+        """
+        Given the input EPCs, this method will transform the data into a format that can be used by the model
+        This method can be used to transform the training data, or new epcs within the backend engine
+        :return:
+        """
+
+        # TODO: New is a temporary parameter, which will transform the epc descriptions to their transformed features
+        #       in anticipation of the new model
+
+        data["lodgement-date"] = pd.to_datetime(data["lodgement-date"])
+        data["lodgement-year"] = data["lodgement-date"].dt.year
+        data["lodgement-month"] = data["lodgement-date"].dt.month
+
+        # For walls, roof, floor description where we have average thermal transmittance, to avoid too many
+        # categories
+        # we group them
+        ranges = {
+            "lessthan 0.1": (0, 0.1),
+            "0.1 - 0.3": (0.1, 0.3),
+            "0.3 - 0.5": (0.3, 0.5),
+            "morethan 0.5": (0.5, 2.5),
+        }
+
+        # Generate the lookup table
+        thermal_transmittance_lookup_table = []
+        for i in range(1, 251):
+            value = i / 100
+            for label, (low, high) in ranges.items():
+                if low < value <= high:
+                    thermal_transmittance_lookup_table.append({"from": value, "to": label})
+                    break
+
+        # Convert to DataFrame for display
+        thermal_transmittance_lookup_table = pd.DataFrame(thermal_transmittance_lookup_table)
+        thermal_transmittance_lookup_table["from"] = thermal_transmittance_lookup_table["from"].astype(str)
+
+        # Apply the lookup table to the data
+        for feature in ["walls-description", "roof-description", "floor-description"]:
+            cleaned_df = pd.DataFrame(cleaned[feature])[["original_description", "thermal_transmittance"]]
+            # Round to 2 decimal places and convert to string
+            cleaned_df["thermal_transmittance"] = cleaned_df["thermal_transmittance"].round(2).astype(str)
+
+            data = data.merge(
+                cleaned_df,
+                how="left",
+                left_on=feature,
+                right_on="original_description",
+            )
+            # We now have the thermal transmittance in the data, which we can use to group with the lookup table
+            data = data.merge(
+                thermal_transmittance_lookup_table,
+                how="left",
+                left_on="thermal_transmittance",
+                right_on="from",
+            )
+            # Where "to" is populated, replace feature with to
+            data[feature] = np.where(
+                ~pd.isnull(data["to"]),
+                data["to"],
+                data[feature]
+            )
+            data = data.drop(columns=["original_description", "thermal_transmittance", "from", "to"])
+
+        data[self.NUMERICAL_COLUMNS] = data[self.NUMERICAL_COLUMNS].apply(pd.to_numeric)
+        data[self.CATEGORICAL_COLUMNS] = data[self.CATEGORICAL_COLUMNS].astype(str)
+
+        # Create new features:
+        data['estimate_annual_kwh'] = data['energy-consumption-current'] * data['total-floor-area']
+
+        if save:
+            self.model_training_data_filepath = f"energy_consumption/{self.run_date}/training_data.parquet"
+            logger.info(f"Storing energy consumption dataset in s3 at {self.consumption_data_filepath}")
+            save_dataframe_to_s3_parquet(
+                bucket_name=self.bucket,
+                file_key=self.model_training_data_filepath,
+                df=data
+            )
diff --git a/etl/bill_savings/data_collection.py b/etl/bill_savings/data_collection.py
index 0341b885..85a403f1 100644
--- a/etl/bill_savings/data_collection.py
+++ b/etl/bill_savings/data_collection.py
@@ -7,7 +7,7 @@ import inspect
 import pandas as pd
 from tqdm import tqdm
 from bs4 import BeautifulSoup
-from etl.epc.settings import EARLIEST_EPC_DATE
+from training_data.epc.settings import EARLIEST_EPC_DATE
 from pathlib import Path
 import numpy as np
 from utils.s3 import save_pickle_to_s3
diff --git a/etl/bill_savings/training.py b/etl/bill_savings/training.py
index df60298b..5d89a79e 100644
--- a/etl/bill_savings/training.py
+++ b/etl/bill_savings/training.py
@@ -1,7 +1,7 @@
 from pprint import pprint
 import msgpack
 from utils.s3 import read_from_s3
-from etl.bill_savings.EnergyConsumptionModel import EnergyConsumptionModel
+from training_data.bill_savings.EnergyConsumptionModel import EnergyConsumptionModel
 
 
 def handler():
diff --git a/etl/bill_savings/data_combining.py b/etl/bill_savings/training_data.py
similarity index 51%
rename from etl/bill_savings/data_combining.py
rename to etl/bill_savings/training_data.py
index 970c92bf..85b53bca 100644
--- a/etl/bill_savings/data_combining.py
+++ b/etl/bill_savings/training_data.py
@@ -1,4 +1,6 @@
+import msgpack
 from etl.bill_savings.KwhData import KwhData
+from utils.s3 import read_from_s3
 
 
 def app():
@@ -8,5 +10,13 @@ def app():
     :return:
     """
 
+    cleaned = read_from_s3(
+        s3_file_name="cleaned_epc_data/cleaned.bson",
+        bucket_name="retrofit-data-dev"
+    )
+
+    cleaned = msgpack.unpackb(cleaned, raw=False)
+
     kwh_data_client = KwhData(bucket="retrofit-datalake-dev")
     kwh_data_client.combine()
+    kwh_data_client.transform(data=kwh_data_client.data, cleaned=cleaned, save=True)

From fffb1792190c11f2d83a3f65984f4d7154711d9d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 9 Aug 2024 11:14:48 +0100
Subject: [PATCH 078/182] adding new kwh etl process to backend

---
 backend/app/plan/router.py | 122 ++-----------------------------------
 1 file changed, 5 insertions(+), 117 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 9562af86..925bb725 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -43,6 +43,7 @@ from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3
 from backend.ml_models.Valuation import PropertyValuation
 
 from etl.bill_savings.EnergyConsumptionModel import EnergyConsumptionModel
+from etl.bill_savings.KwhData import KwhData
 from etl.spatial.OpenUprnClient import OpenUprnClient
 from etl.solar.SolarPhotoSupply import SolarPhotoSupply
 
@@ -432,123 +433,10 @@ async def trigger_plan(body: PlanTriggerRequest):
 
         model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
 
-        epcs_for_scoring = energy_consumption_client.prepare_new_data(input_properties)
-
-        # prepare the data
-
-        # TODO - this needs to be moved to the etl process
-        import numpy as np
-        def add_features_from_code(df):
-
-            FEATURES = {
-                "heating_kwh": [
-                    "lodgement-year", "lodgement-month", "current-energy-efficiency", "energy-consumption-current",
-                    "heating-cost-current", "heating-cost-potential", "total-floor-area", "number-heated-rooms",
-                    "mainheat-description", "mainheat-energy-eff", "main-fuel", "secondheat-description",
-                    "property-type",
-                    "built-form", "mainheatcont-description", "hotwater-description", "hot-water-energy-eff",
-                    "walls-energy-eff",
-                    "roof-energy-eff", "windows-description", "windows-energy-eff", "floor-description",
-                    "flat-top-storey",
-                    "flat-storey-count", "unheated-corridor-length", "solar-water-heating-flag",
-                    "mechanical-ventilation",
-                    "low-energy-lighting", "environment-impact-current", "energy-tariff",
-                    "county", "construction-age-band", "co2-emissions-current",
-                ],
-                "hot_water_kwh": [
-                    "lodgement-year", "lodgement-month",
-                    "current-energy-efficiency",
-                    "energy-consumption-current",
-                    "hot-water-cost-current",
-                    "total-floor-area", "number-heated-rooms",
-                    "hotwater-description", "hot-water-energy-eff", "main-fuel", "property-type", "built-form",
-                    "co2-emissions-current",
-                ]
-            }
-            CATEGORICAL_COLUMNS = [
-                "lodgement-year", "lodgement-month", "main-fuel", "mainheat-description", "number-heated-rooms",
-                "number-habitable-rooms", "mainheat-energy-eff", "mainheatcont-description", "property-type",
-                "built-form",
-                "construction-age-band", "secondheat-description", "hotwater-description", "hot-water-energy-eff",
-                "walls-description", "walls-energy-eff", "roof-description", "roof-energy-eff", "floor-description",
-                "county",
-                "windows-description", "windows-energy-eff", "flat-top-storey",
-                "flat-storey-count", "unheated-corridor-length", "solar-water-heating-flag", "mechanical-ventilation",
-                "low-energy-lighting", "environment-impact-current", "energy-tariff", "current-energy-rating"
-            ]
-
-            NUMERICAL_COLUMNS = list({
-                x for x in FEATURES["heating_kwh"] + FEATURES["hot_water_kwh"]
-                if x not in CATEGORICAL_COLUMNS
-            })
-
-            """Performs feature engineering on the dataset."""
-            df["lodgement-date"] = pd.to_datetime(df["lodgement-date"])
-            df["lodgement-year"] = df["lodgement-date"].dt.year
-            df["lodgement-month"] = df["lodgement-date"].dt.month
-
-            # For walls, roof, floor description where we have average thermal transmittance, to avoid too many
-            # categories
-            # we group them
-            ranges = {
-                "lessthan 0.1": (0, 0.1),
-                "0.1 - 0.3": (0.1, 0.3),
-                "0.3 - 0.5": (0.3, 0.5),
-                "morethan 0.5": (0.5, 2.5),
-            }
-
-            # Generate the lookup table
-            thermal_transmittance_lookup_table = []
-            for i in range(1, 251):
-                value = i / 100
-                for label, (low, high) in ranges.items():
-                    if low < value <= high:
-                        thermal_transmittance_lookup_table.append({"from": value, "to": label})
-                        break
-
-            # Convert to DataFrame for display
-            thermal_transmittance_lookup_table = pd.DataFrame(thermal_transmittance_lookup_table)
-            thermal_transmittance_lookup_table["from"] = thermal_transmittance_lookup_table["from"].astype(str)
-
-            # Apply the lookup table to the data
-            for feature in ["walls-description", "roof-description", "floor-description"]:
-                cleaned_df = pd.DataFrame(cleaned[feature])[["original_description", "thermal_transmittance"]]
-                # Round to 2 decimal places and convert to string
-                cleaned_df["thermal_transmittance"] = cleaned_df["thermal_transmittance"].round(2).astype(str)
-
-                df = df.merge(
-                    cleaned_df,
-                    how="left",
-                    left_on=feature,
-                    right_on="original_description",
-                )
-                # We now have the thermal transmittance in the data, which we can use to group with the lookup table
-                df = df.merge(
-                    thermal_transmittance_lookup_table,
-                    how="left",
-                    left_on="thermal_transmittance",
-                    right_on="from",
-                )
-                # Where "to" is populated, replace feature with to
-                df[feature] = np.where(
-                    ~pd.isnull(df["to"]),
-                    df["to"],
-                    df[feature]
-                )
-                df = df.drop(columns=["original_description", "thermal_transmittance", "from", "to"])
-
-            # Convert data types
-            df[NUMERICAL_COLUMNS] = df[NUMERICAL_COLUMNS].apply(pd.to_numeric)
-            df[CATEGORICAL_COLUMNS] = df[CATEGORICAL_COLUMNS].astype(str)
-
-            return df
-
-        def add_estimate_annual_kwh(df):
-            df['estimate_annual_kwh'] = df['energy-consumption-current'] * df['total-floor-area']
-            return df
-
-        epcs_for_scoring = add_features_from_code(epcs_for_scoring)
-        epcs_for_scoring = add_estimate_annual_kwh(epcs_for_scoring)
+        epcs_for_scoring = KwhData.transform(
+            data=pd.DataFrame([p.epc_record.original_epc for p in input_properties]),
+            cleaned=cleaned,
+        )
 
         kwh_predictions = model_api.predict_all(
             df=epcs_for_scoring,

From c9720cd78cbb9dd0914f7b23b3d01aec18013dbc Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 9 Aug 2024 12:03:58 +0100
Subject: [PATCH 079/182] Added KwhData client to router

---
 backend/app/plan/router.py                 |  9 +--
 etl/bill_savings/EnergyConsumptionModel.py | 60 -------------------
 etl/bill_savings/KwhData.py                | 68 +++++++++++++++++++++-
 3 files changed, 70 insertions(+), 67 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 925bb725..8a9cbd53 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -247,8 +247,8 @@ def create_epc_records(epc_searcher: SearchEpc, energy_assessment: dict):
 
     # We insert county into the epc, since right now this isn't something that we pull out from the energy
     # assessment
-    epc["county"] = epc_searcher.newest_epc["county"]
-    epc["constituency"] = epc_searcher.newest_epc["constituency"]
+    for col in ["county", "constituency", "constituency-label", "local-authority", "local-authority-label"]:
+        epc[col] = epc_searcher.newest_epc[col]
 
     # We check if the energy assessment is newer than the newest EPC
     if pd.to_datetime(energy_assessment_date) > pd.to_datetime(epc_searcher.newest_epc["inspection-date"]):
@@ -433,10 +433,7 @@ async def trigger_plan(body: PlanTriggerRequest):
 
         model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
 
-        epcs_for_scoring = KwhData.transform(
-            data=pd.DataFrame([p.epc_record.original_epc for p in input_properties]),
-            cleaned=cleaned,
-        )
+        epcs_for_scoring = KwhData().transform(data=KwhData().prepare_epc(input_properties), cleaned=cleaned)
 
         kwh_predictions = model_api.predict_all(
             df=epcs_for_scoring,
diff --git a/etl/bill_savings/EnergyConsumptionModel.py b/etl/bill_savings/EnergyConsumptionModel.py
index 8aa0cbf8..25bd04ed 100644
--- a/etl/bill_savings/EnergyConsumptionModel.py
+++ b/etl/bill_savings/EnergyConsumptionModel.py
@@ -507,66 +507,6 @@ class EnergyConsumptionModel:
 
         return prediction
 
-    @staticmethod
-    def _prepare_new_data(p: Property):
-        """
-        Given an instance of the property class, this method will ensure that the EPC is ready for scoring with the
-        kwh models. In the backend, we perform some cleaning and transformation on an EPC so we just ensure that the
-        data is in the format required by the model
-        :return:
-        """
-
-        epc = p.data.copy()
-        numeric_cols = [
-            'current-energy-efficiency',
-            'potential-energy-efficiency', 'environment-impact-current',
-            'environment-impact-potential', 'energy-consumption-current',
-            'energy-consumption-potential', 'co2-emissions-current',
-            'co2-emiss-curr-per-floor-area', 'co2-emissions-potential',
-            'lighting-cost-current', 'lighting-cost-potential',
-            'heating-cost-current', 'heating-cost-potential',
-            'hot-water-cost-current', 'hot-water-cost-potential',
-            'total-floor-area', 'multi-glaze-proportion',
-            'extension-count', 'number-habitable-rooms', 'number-heated-rooms',
-            'low-energy-lighting', 'number-open-fireplaces',
-            'wind-turbine-count', 'unheated-corridor-length',
-            'floor-height', 'photo-supply', 'fixed-lighting-outlets-count',
-            'low-energy-fixed-light-count',
-        ]
-        for v in numeric_cols:
-            if epc[v] is not None:
-                epc[v] = float(epc[v])
-
-        bools_to_remap = ['mains-gas-flag', 'flat-top-storey']
-        bool_map = {
-            True: "Y",
-            False: "N",
-            None: "N",
-            "Y": "Y",
-            "N": "N"
-        }
-        for v in bools_to_remap:
-            epc[v] = bool_map[epc[v]]
-
-        no_data = {
-            "floor-level": "NODATA!",
-            "floor-energy-eff": "NO DATA!"
-        }
-        for v, fill_val in no_data.items():
-            if pd.isnull(epc[v]):
-                epc[v] = fill_val
-
-        return epc
-
-    def prepare_new_data(self, input_properties: list[Property]):
-        scoring_data = pd.DataFrame([self._prepare_new_data(p) for p in input_properties])
-        scoring_data["lodgement-year"] = pd.to_datetime(scoring_data["lodgement-date"]).dt.year
-        scoring_data["lodgement-month"] = pd.to_datetime(scoring_data["lodgement-date"]).dt.month
-
-        scoring_data["id"] = scoring_data["uprn"].copy()
-
-        return scoring_data
-
     @staticmethod
     def calculate_percentage_decrease(start_efficiency, end_efficiency, consumption_averages):
 
diff --git a/etl/bill_savings/KwhData.py b/etl/bill_savings/KwhData.py
index 3c68f33f..39461c81 100644
--- a/etl/bill_savings/KwhData.py
+++ b/etl/bill_savings/KwhData.py
@@ -5,6 +5,7 @@ from datetime import datetime
 from tqdm import tqdm
 from utils.logger import setup_logger
 from utils.s3 import list_files_in_s3_folder, read_pickle_from_s3, save_dataframe_to_s3_parquet
+from backend.Property import Property
 
 logger = setup_logger()
 
@@ -29,7 +30,7 @@ class KwhData:
         'heating-cost-potential', 'hot-water-cost-current', 'current-energy-efficiency'
     ]
 
-    def __init__(self, bucket):
+    def __init__(self, bucket=None):
         self.run_date = datetime.now().strftime("%Y-%m-%d")
         self.bucket = bucket
         self.data = None
@@ -144,6 +145,8 @@ class KwhData:
         This method can be used to transform the training data, or new epcs within the backend engine
         :return:
         """
+        if save and self.bucket is None:
+            raise Exception("bucket not set, cannot save data")
 
         # TODO: New is a temporary parameter, which will transform the epc descriptions to their transformed features
         #       in anticipation of the new model
@@ -216,3 +219,66 @@ class KwhData:
                 file_key=self.model_training_data_filepath,
                 df=data
             )
+            return
+
+        return data
+
+    @staticmethod
+    def _prepare_epc(p: Property):
+        """
+        Given an instance of the property class, this method will ensure that the EPC is ready for scoring with the
+        kwh models. In the backend, we perform some cleaning and transformation on an EPC so we just ensure that the
+        data is in the format required by the model
+        :return:
+        """
+
+        epc = p.data.copy()
+        numeric_cols = [
+            'current-energy-efficiency',
+            'potential-energy-efficiency', 'environment-impact-current',
+            'environment-impact-potential', 'energy-consumption-current',
+            'energy-consumption-potential', 'co2-emissions-current',
+            'co2-emiss-curr-per-floor-area', 'co2-emissions-potential',
+            'lighting-cost-current', 'lighting-cost-potential',
+            'heating-cost-current', 'heating-cost-potential',
+            'hot-water-cost-current', 'hot-water-cost-potential',
+            'total-floor-area', 'multi-glaze-proportion',
+            'extension-count', 'number-habitable-rooms', 'number-heated-rooms',
+            'low-energy-lighting', 'number-open-fireplaces',
+            'wind-turbine-count', 'unheated-corridor-length',
+            'floor-height', 'photo-supply', 'fixed-lighting-outlets-count',
+            'low-energy-fixed-light-count',
+        ]
+        for v in numeric_cols:
+            if epc[v] is not None:
+                epc[v] = float(epc[v])
+
+        bools_to_remap = ['mains-gas-flag', 'flat-top-storey']
+        bool_map = {
+            True: "Y",
+            False: "N",
+            None: "N",
+            "Y": "Y",
+            "N": "N"
+        }
+        for v in bools_to_remap:
+            epc[v] = bool_map[epc[v]]
+
+        no_data = {
+            "floor-level": "NODATA!",
+            "floor-energy-eff": "NO DATA!"
+        }
+        for v, fill_val in no_data.items():
+            if pd.isnull(epc[v]):
+                epc[v] = fill_val
+
+        return epc
+
+    def prepare_epc(self, input_properties: list[Property]):
+        scoring_data = pd.DataFrame([self._prepare_epc(p) for p in input_properties])
+        scoring_data["lodgement-year"] = pd.to_datetime(scoring_data["lodgement-date"]).dt.year
+        scoring_data["lodgement-month"] = pd.to_datetime(scoring_data["lodgement-date"]).dt.month
+
+        scoring_data["id"] = scoring_data["uprn"].copy()
+
+        return scoring_data

From f3c53847bf9f14e60c7b058d7a2a1283edc0ea0d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 9 Aug 2024 12:29:20 +0100
Subject: [PATCH 080/182] adding in new kwh client

---
 backend/Property.py                        | 10 +--
 backend/app/plan/router.py                 | 11 +--
 etl/bill_savings/EnergyConsumptionModel.py | 31 --------
 etl/bill_savings/KwhData.py                | 82 +++++++++++++++++++++-
 4 files changed, 91 insertions(+), 43 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 497d976a..5c065458 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -585,14 +585,14 @@ class Property:
     def get_components(
         self,
         cleaned,
-        energy_consumption_client,
+        kwh_client,
         kwh_predictions
     ):
         """
         Given the cleaning that has been performed, we'll use this to identify the property
         components, from roof to walls to windows, heating and hot water
         :param cleaned: This is the dictionary of components found in cleaner.cleaned
-        :param energy_consumption_client: The client that will be used to convert the energy costs to today's costs
+        :param kwh_client: The client that will be used to convert the energy costs to today's costs
         :param kwh_predictions: Contains the kwh predictions for heating and hot water
         :return:
         """
@@ -658,7 +658,7 @@ class Property:
         self.set_windows_count()
         self.set_energy_source()
         self.find_energy_sources()
-        self.set_current_energy_bill(energy_consumption_client, kwh_predictions)
+        self.set_current_energy_bill(kwh_client, kwh_predictions)
 
     def set_solar_panel_configuration(
         self, solar_panel_configuration, roof_area
@@ -671,7 +671,7 @@ class Property:
         # We also set the roof area
         self.roof_area = roof_area
 
-    def set_current_energy_bill(self, energy_consumption_client, kwh_predictions):
+    def set_current_energy_bill(self, kwh_client, kwh_predictions):
         """
         Given what we know about the property now, estimates the current energy consumption using the UCL paper
         https://www.sciencedirect.com/science/article/pii/S0378778823002542
@@ -683,7 +683,7 @@ class Property:
         # 2) Predicted KwH
 
         # Today's costs
-        todays_lighting_cost = energy_consumption_client.convert_cost_to_today(
+        todays_lighting_cost = kwh_client.convert_cost_to_today(
             original_cost=float(self.data["lighting-cost-current"]),
             lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"]).tz_localize(None)
         )
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 8a9cbd53..56b4909e 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -431,9 +431,11 @@ async def trigger_plan(body: PlanTriggerRequest):
             environment=get_settings().ENVIRONMENT
         )
 
+        kwh_client = KwhData(bucket=get_settings().DATA_BUCKET, read_consumption_data=True)
+
         model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
 
-        epcs_for_scoring = KwhData().transform(data=KwhData().prepare_epc(input_properties), cleaned=cleaned)
+        epcs_for_scoring = kwh_client.transform(data=kwh_client.prepare_epc(input_properties), cleaned=cleaned)
 
         kwh_predictions = model_api.predict_all(
             df=epcs_for_scoring,
@@ -444,14 +446,13 @@ async def trigger_plan(body: PlanTriggerRequest):
         )
 
         # Insert the spatial data
+        logger.info("Getting spatial data")
         input_properties = OpenUprnClient.set_spatial_data(input_properties, bucket_name=get_settings().DATA_BUCKET)
 
-        logger.info("Getting spatial data")
+        logger.info("Setting property components")
         for p in tqdm(input_properties):
             p.get_components(
-                cleaned=cleaned,
-                energy_consumption_client=energy_consumption_client,
-                kwh_predictions=kwh_predictions
+                cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=kwh_predictions
             )
 
         logger.info("Performing solar analysis")
diff --git a/etl/bill_savings/EnergyConsumptionModel.py b/etl/bill_savings/EnergyConsumptionModel.py
index 25bd04ed..4daf2b31 100644
--- a/etl/bill_savings/EnergyConsumptionModel.py
+++ b/etl/bill_savings/EnergyConsumptionModel.py
@@ -126,37 +126,6 @@ class EnergyConsumptionModel:
         self.retail_price_comparison = pd.DataFrame(data_rows, columns=header)
         self.retail_price_comparison['Date'] = pd.to_datetime(self.retail_price_comparison['Date'], errors='coerce')
 
-    def convert_cost_to_today(self, original_cost, lodgement_date):
-        """
-        Given energy costs in an EPC, this function converts that energy cost to a figure based on today's energy costs
-        (or as close to today as possible)
-        :param original_cost: The original energy cost
-        :param lodgement_date: The date the EPC was lodged
-        :return:
-        """
-        closest_date = self.retail_price_comparison.iloc[
-            (self.retail_price_comparison['Date'] - lodgement_date).abs().argsort()[:1]
-        ]['Date'].values[0]
-        closest_date = pd.Timestamp(closest_date)
-
-        # Extract the tariff price on the closest date
-        tariff_2024 = self.retail_price_comparison[
-            self.retail_price_comparison['Date'] == closest_date
-            ]['Average standard variable tariff (Large legacy suppliers)'].values[0]
-
-        # Extract the latest available tariff price
-        latest_tariff = self.retail_price_comparison[
-            'Average standard variable tariff (Large legacy suppliers)'
-        ].iloc[-1]
-
-        # Calculate the ratio
-        ratio = float(latest_tariff) / float(tariff_2024)
-
-        # Calculate the updated heating cost
-        updated_cost = original_cost * ratio
-
-        return updated_cost
-
     def read_dataset(self, file_path):
         """Reads the dataset from the specified file path."""
         logger.info(f"Reading dataset from {file_path}")
diff --git a/etl/bill_savings/KwhData.py b/etl/bill_savings/KwhData.py
index 39461c81..5563014b 100644
--- a/etl/bill_savings/KwhData.py
+++ b/etl/bill_savings/KwhData.py
@@ -4,7 +4,10 @@ import numpy as np
 from datetime import datetime
 from tqdm import tqdm
 from utils.logger import setup_logger
-from utils.s3 import list_files_in_s3_folder, read_pickle_from_s3, save_dataframe_to_s3_parquet
+from utils.s3 import (
+    list_files_in_s3_folder, read_pickle_from_s3, save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet,
+    read_csv_from_s3
+)
 from backend.Property import Property
 
 logger = setup_logger()
@@ -30,7 +33,7 @@ class KwhData:
         'heating-cost-potential', 'hot-water-cost-current', 'current-energy-efficiency'
     ]
 
-    def __init__(self, bucket=None):
+    def __init__(self, bucket=None, read_consumption_data=False):
         self.run_date = datetime.now().strftime("%Y-%m-%d")
         self.bucket = bucket
         self.data = None
@@ -39,6 +42,50 @@ class KwhData:
         self.consumption_averages_filepath = None
         self.model_training_data_filepath = None
 
+        self.consumption_averages = None
+        self.retail_price_comparison = None
+        if read_consumption_data:
+            self.get_consumption_data()
+            self.read_retail_price_comparison()
+
+    def get_consumption_data(self):
+
+        # Look for the latest version of this file
+        s3_contents = list_files_in_s3_folder(bucket_name=self.bucket, folder_name="energy_consumption/")
+        consumption_averages = [
+            {"run_date": pd.to_datetime(x.split("/")[1]), "filepath": x}
+            for x in s3_contents if "consumption_averages.parquet" in x
+        ]
+        # Get the file with the soonest run date
+        consumption_averages = sorted(consumption_averages, key=lambda x: x["run_date"])
+        if not consumption_averages:
+            raise ValueError("No consumption averages data found, something went wrong")
+
+        self.consumption_averages = read_dataframe_from_s3_parquet(
+            bucket_name=self.bucket,
+            file_key=consumption_averages[-1]["filepath"]
+        )
+
+    def read_retail_price_comparison(self):
+        data = read_csv_from_s3(
+            bucket_name=self.bucket,
+            filepath="energy_consumption/retail-price-comparison.csv"
+        )
+        header = ['Date', 'Average standard variable tariff (Large legacy suppliers)',
+                  'Average standard variable tariff (Other suppliers)', 'Average fixed tariff',
+                  'Cheapest tariff (Large legacy suppliers)', 'Cheapest tariff (All suppliers)',
+                  'Cheapest tariff (Basket)', 'Default tariff cap level']
+
+        # Extract data rows
+        data_rows = []
+        for row in data[1:]:
+            date = row['\ufeff"']
+            values = row[None]
+            data_rows.append([date] + values)
+
+        self.retail_price_comparison = pd.DataFrame(data_rows, columns=header)
+        self.retail_price_comparison['Date'] = pd.to_datetime(self.retail_price_comparison['Date'], errors='coerce')
+
     @staticmethod
     def extract_kwh_value(text: str):
         """
@@ -282,3 +329,34 @@ class KwhData:
         scoring_data["id"] = scoring_data["uprn"].copy()
 
         return scoring_data
+
+    def convert_cost_to_today(self, original_cost, lodgement_date):
+        """
+        Given energy costs in an EPC, this function converts that energy cost to a figure based on today's energy costs
+        (or as close to today as possible)
+        :param original_cost: The original energy cost
+        :param lodgement_date: The date the EPC was lodged
+        :return:
+        """
+        closest_date = self.retail_price_comparison.iloc[
+            (self.retail_price_comparison['Date'] - lodgement_date).abs().argsort()[:1]
+        ]['Date'].values[0]
+        closest_date = pd.Timestamp(closest_date)
+
+        # Extract the tariff price on the closest date
+        tariff_2024 = self.retail_price_comparison[
+            self.retail_price_comparison['Date'] == closest_date
+            ]['Average standard variable tariff (Large legacy suppliers)'].values[0]
+
+        # Extract the latest available tariff price
+        latest_tariff = self.retail_price_comparison[
+            'Average standard variable tariff (Large legacy suppliers)'
+        ].iloc[-1]
+
+        # Calculate the ratio
+        ratio = float(latest_tariff) / float(tariff_2024)
+
+        # Calculate the updated heating cost
+        updated_cost = original_cost * ratio
+
+        return updated_cost

From fe80fa3036689ed7e10887dab534c3f6e217647c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 9 Aug 2024 12:32:33 +0100
Subject: [PATCH 081/182] renamed set_features

---
 backend/Property.py        | 2 +-
 backend/app/plan/router.py | 9 +++------
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 5c065458..96e3a308 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -582,7 +582,7 @@ class Property:
 
         return output
 
-    def get_components(
+    def set_features(
         self,
         cleaned,
         kwh_client,
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 56b4909e..d0922a2b 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -437,7 +437,7 @@ async def trigger_plan(body: PlanTriggerRequest):
 
         epcs_for_scoring = kwh_client.transform(data=kwh_client.prepare_epc(input_properties), cleaned=cleaned)
 
-        kwh_predictions = model_api.predict_all(
+        kwh_preds = model_api.predict_all(
             df=epcs_for_scoring,
             bucket=get_settings().DATA_BUCKET,
             prediction_buckets=get_prediction_buckets(),
@@ -449,11 +449,8 @@ async def trigger_plan(body: PlanTriggerRequest):
         logger.info("Getting spatial data")
         input_properties = OpenUprnClient.set_spatial_data(input_properties, bucket_name=get_settings().DATA_BUCKET)
 
-        logger.info("Setting property components")
-        for p in tqdm(input_properties):
-            p.get_components(
-                cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=kwh_predictions
-            )
+        logger.info("Setting property features")
+        [p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=kwh_preds) for p in input_properties]
 
         logger.info("Performing solar analysis")
         # TODO: Tidy this up

From d032263857aef4bc1b74ed1b2a652bb98bfb8813 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 9 Aug 2024 12:37:32 +0100
Subject: [PATCH 082/182] remove missing costs

---
 backend/Property.py        |  4 ++--
 backend/app/plan/router.py | 41 ++++++++++++++++++++++++--------------
 2 files changed, 28 insertions(+), 17 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 96e3a308..062dcc14 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -728,8 +728,8 @@ class Property:
         }
 
         unadjusted_heating_costs = {
-            "heating": float(todays_heating_cost),
-            "hot_water": float(todays_hot_water_cost),
+            "heating": None,
+            "hot_water": None,
             "lighting": float(todays_lighting_cost),
             "appliances": float(appliances_kwh) * AnnualBillSavings.ELECTRICITY_PRICE_CAP
         }
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index d0922a2b..d4dc3054 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -286,6 +286,31 @@ def create_epc_records(epc_searcher: SearchEpc, energy_assessment: dict):
     }, energy_assessment_is_newer
 
 
+def get_on_site_data(body: PlanTriggerRequest):
+    """
+    This function will read in the on-site data from the S3 bucket
+    :param body: The request body
+    :return:
+    """
+    patches = []
+    if body.patches_file_path:
+        patches = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.patches_file_path)
+
+    already_installed = []
+    if body.already_installed_file_path:
+        already_installed = read_csv_from_s3(
+            bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.already_installed_file_path
+        )
+
+    non_invasive_recommendations = []
+    if body.non_invasive_recommendations_file_path:
+        non_invasive_recommendations = read_csv_from_s3(
+            bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.non_invasive_recommendations_file_path
+        )
+
+    return patches, already_installed, non_invasive_recommendations
+
+
 router = APIRouter(
     prefix="/plan",
     tags=["plan"],
@@ -307,21 +332,7 @@ async def trigger_plan(body: PlanTriggerRequest):
         logger.info("Getting the inputs")
         plan_input = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path)
         # If we have patches or overrides, we should read them in here
-        patches = []
-        if body.patches_file_path:
-            patches = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.patches_file_path)
-
-        already_installed = []
-        if body.already_installed_file_path:
-            already_installed = read_csv_from_s3(
-                bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.already_installed_file_path
-            )
-
-        non_invasive_recommendations = []
-        if body.non_invasive_recommendations_file_path:
-            non_invasive_recommendations = read_csv_from_s3(
-                bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.non_invasive_recommendations_file_path
-            )
+        patches, already_installed, non_invasive_recommendations = get_on_site_data(body)
 
         cleaning_data = read_dataframe_from_s3_parquet(
             bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",

From a5e6dc3399ce98634a0d5f11508c3d0806ea7a33 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 9 Aug 2024 14:52:38 +0100
Subject: [PATCH 083/182] added solar api back

---
 backend/Property.py        |   3 +-
 backend/app/plan/router.py | 198 +++++++++++++++++++------------------
 2 files changed, 101 insertions(+), 100 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 062dcc14..e65ba272 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -738,12 +738,11 @@ class Property:
             k: AnnualBillSavings.adjust_energy_to_metered(
                 epc_energy=v,
                 current_epc_rating=self.data["current-energy-rating"],
-            ) for k, v in unadjusted_heating_costs.items()
+            ) for k, v in unadjusted_heating_costs.items() if v is not None
         }
 
         # Sum up the adjusted kwh figures
         self.current_adjusted_energy = sum(list(adjusted_kwh_estimates.values()))
-        self.current_energy_bill = sum(list(adjusted_heating_costs.values()))
 
         self.energy_cost_estimates = {
             "adjusted": adjusted_heating_costs,
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index d4dc3054..f92d5a10 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -470,7 +470,6 @@ async def trigger_plan(body: PlanTriggerRequest):
         #       extensions, since it doesn't seem to do a great job
         # TODO: For simple properties, we should do a comparison/check between the solar API's roof area and the
         #       basic estimate of roof area
-        # TODO: TEMP SWITCHED OFF
         building_ids = [
             {
                 "building_id": p.building_id,
@@ -508,112 +507,115 @@ async def trigger_plan(body: PlanTriggerRequest):
                 "uprn": p.uprn
             } for p in input_properties if p.building_id is None
         ]
-        if False:
-            if building_ids:
-                # Find the unique longitude and latitude pairs for each building id
-                unique_coordinates = {}
-                building_uprns = {}
-                for entry in building_ids:
-                    building_id = entry['building_id']
-                    coordinate_pair = {'longitude': entry['longitude'], 'latitude': entry['latitude']}
+        if building_ids:
+            # Find the unique longitude and latitude pairs for each building id
+            unique_coordinates = {}
+            building_uprns = {}
+            for entry in building_ids:
+                building_id = entry['building_id']
+                coordinate_pair = {'longitude': entry['longitude'], 'latitude': entry['latitude']}
 
-                    if building_id not in unique_coordinates:
-                        unique_coordinates[building_id] = []
+                if building_id not in unique_coordinates:
+                    unique_coordinates[building_id] = []
 
-                    if coordinate_pair not in unique_coordinates[building_id]:
-                        unique_coordinates[building_id].append(coordinate_pair)
+                if coordinate_pair not in unique_coordinates[building_id]:
+                    unique_coordinates[building_id].append(coordinate_pair)
 
-                    if building_id not in building_uprns:
-                        building_uprns[building_id] = []
+                if building_id not in building_uprns:
+                    building_uprns[building_id] = []
 
-                    if entry['uprn'] not in building_uprns[building_id]:
-                        building_uprns[building_id].append(
-                            {
-                                "uprn": entry['uprn'], "longitude": entry['longitude'], "latitude": entry['latitude']
-                            }
+                if entry['uprn'] not in building_uprns[building_id]:
+                    building_uprns[building_id].append(
+                        {
+                            "uprn": entry['uprn'], "longitude": entry['longitude'], "latitude": entry['latitude']
+                        }
+                    )
+
+            solar_panel_configuration = {}
+            for building_id, coordinates in unique_coordinates.items():
+                if len(coordinates) > 1:
+                    raise NotImplementedError("more than one coordinate for a building - handle me")
+
+                coordinates = coordinates[0]
+                energy_consumption = sum(
+                    [entry['energy_consumption'] for entry in building_ids if entry['building_id'] == building_id]
+                )
+                solar_api_client.get(
+                    longitude=coordinates["longitude"],
+                    latitude=coordinates["latitude"],
+                    energy_consumption=energy_consumption,
+                    is_building=True,
+                    session=session
+                )
+                solar_panel_configuration[building_id] = {
+                    "insights_data": solar_api_client.insights_data,
+                    "panel_performance": solar_api_client.panel_performance,
+                    "n_units": len([entry for entry in building_ids if entry['building_id'] == building_id])
+                }
+
+                # Store the data in the database
+                # TODO: Rather than just doing a straight insert, we should overwrite what's already there if it
+                #  exists
+                solar_api_client.save_to_db(
+                    session=session, uprns_to_location=building_uprns[building_id], scenario_type="building"
+                )
+
+                # Insert this into the properties that have this building id
+                for p in input_properties:
+                    if p.building_id == building_id:
+                        unit_solar_panel_configuration = solar_panel_configuration[building_id].copy()
+
+                        unit_solar_panel_configuration["unit_share_of_energy"] = (
+                            [x for x in building_ids if x["property_id"] == p.id][0]["energy_consumption"] /
+                            energy_consumption
                         )
+                        p.set_solar_panel_configuration(unit_solar_panel_configuration)
 
-                solar_panel_configuration = {}
-                for building_id, coordinates in unique_coordinates.items():
-                    if len(coordinates) > 1:
-                        raise NotImplementedError("more than one coordinate for a building - handle me")
+        if individual_units:
+            # Model the solar potential at the property level
+            for unit in individual_units:
+                property_instance = [p for p in input_properties if p.id == unit["property_id"]][0]
+                # At this level, we check if the property is suitable for solar and if now, skip
+                if not property_instance.is_solar_pv_valid():
+                    continue
 
-                    coordinates = coordinates[0]
-                    energy_consumption = sum(
-                        [entry['energy_consumption'] for entry in building_ids if entry['building_id'] == building_id]
-                    )
-                    solar_api_client.get(
-                        longitude=coordinates["longitude"],
-                        latitude=coordinates["latitude"],
-                        energy_consumption=energy_consumption,
-                        is_building=True,
-                        session=session
-                    )
-                    solar_panel_configuration[building_id] = {
+                # We check if we have a solar non-invasive recommendation
+                if [r for r in property_instance.non_invasive_recommendations if r["type"] == "solar_pv"]:
+                    continue
+
+                solar_api_client.get(
+                    longitude=unit["longitude"],
+                    latitude=unit["latitude"],
+                    energy_consumption=unit["energy_consumption"],
+                    is_building=False,
+                    session=session,
+                    uprn=unit["uprn"],
+                    property_instance=property_instance
+                )
+
+                # Store the data in the database
+                # TODO: Rather than just doing a straight insert, we should overwrite what's already there if it
+                #  exists
+                solar_api_client.save_to_db(
+                    session=session,
+                    uprns_to_location=[
+                        {
+                            "uprn": property_instance.uprn,
+                            "longitude": property_instance.spatial["longitude"],
+                            "latitude": property_instance.spatial["latitude"]
+                        }
+                    ],
+                    scenario_type="unit"
+                )
+
+                property_instance.set_solar_panel_configuration(
+                    solar_panel_configuration={
                         "insights_data": solar_api_client.insights_data,
                         "panel_performance": solar_api_client.panel_performance,
-                        "n_units": len([entry for entry in building_ids if entry['building_id'] == building_id])
-                    }
-
-                    # Store the data in the database
-                    # TODO: Rather than just doing a straight insert, we should overwrite what's already there if it
-                    #  exists
-                    solar_api_client.save_to_db(
-                        session=session, uprns_to_location=building_uprns[building_id], scenario_type="building"
-                    )
-
-                    # Insert this into the properties that have this building id
-                    for p in input_properties:
-                        if p.building_id == building_id:
-                            unit_solar_panel_configuration = solar_panel_configuration[building_id].copy()
-
-                            unit_solar_panel_configuration["unit_share_of_energy"] = (
-                                [x for x in building_ids if x["property_id"] == p.id][0]["energy_consumption"] /
-                                energy_consumption
-                            )
-                            p.set_solar_panel_configuration(unit_solar_panel_configuration)
-
-            if individual_units:
-                # Model the solar potential at the property level
-                for unit in individual_units:
-                    property_instance = [p for p in input_properties if p.id == unit["property_id"]][0]
-                    # At this level, we check if the property is suitable for solar and if now, skip
-                    if not property_instance.is_solar_pv_valid():
-                        continue
-
-                    solar_api_client.get(
-                        longitude=unit["longitude"],
-                        latitude=unit["latitude"],
-                        energy_consumption=unit["energy_consumption"],
-                        is_building=False,
-                        session=session,
-                        uprn=unit["uprn"],
-                        property_instance=property_instance
-                    )
-
-                    # Store the data in the database
-                    # TODO: Rather than just doing a straight insert, we should overwrite what's already there if it
-                    #  exists
-                    solar_api_client.save_to_db(
-                        session=session,
-                        uprns_to_location=[
-                            {
-                                "uprn": property_instance.uprn,
-                                "longitude": property_instance.spatial["longitude"],
-                                "latitude": property_instance.spatial["latitude"]
-                            }
-                        ],
-                        scenario_type="unit"
-                    )
-
-                    property_instance.set_solar_panel_configuration(
-                        solar_panel_configuration={
-                            "insights_data": solar_api_client.insights_data,
-                            "panel_performance": solar_api_client.panel_performance,
-                            "unit_share_of_energy": 1
-                        },
-                        roof_area=solar_api_client.roof_area
-                    )
+                        "unit_share_of_energy": 1
+                    },
+                    roof_area=solar_api_client.roof_area
+                )
 
         logger.info("Getting components and epc recommendations")
         recommendations = {}

From 4ebd516d2eae34b733d4deef0fa56331a81dbbe0 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 9 Aug 2024 14:57:29 +0100
Subject: [PATCH 084/182] refactoring router

---
 backend/app/plan/router.py | 58 +++++++++++++++++++-------------------
 1 file changed, 29 insertions(+), 29 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index f92d5a10..fbdc2323 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -661,15 +661,7 @@ async def trigger_plan(body: PlanTriggerRequest):
             for key, scored in predictions_dict.items():
                 all_predictions[key] = pd.concat([all_predictions[key], scored])
 
-        # We now produce predictions for the kwh models
-
-        # Insert the predictions into the recommendations and run the optimiser
-        # TODO: If a recommendation has a negative impact on SAP, we should remove it - this seems to have become a
-        #       possibility with heating system
-        # TODO: After optimising, if there are any cheap, quick win measures (e.g. insulate water tank with hot water
-        #      cylinder jacket), we should add these to the recommendations as default
-        raise Exception("Add the cost impacts into the cost model")
-        logger.info("Optimising recommendations")
+        # Insert the predictions into the recommendations, and get the impact summary
         scoring_epcs = []  # For scoring the kwh models
         for property_id in recommendations.keys():
             property_instance = [p for p in input_properties if p.id == property_id][0]
@@ -686,10 +678,33 @@ async def trigger_plan(body: PlanTriggerRequest):
             # at each phase
             property_scoring_epcs = property_instance.update_simulation_epcs(impact_summary)
             scoring_epcs.extend(property_scoring_epcs)
+            recommendations[property_id] = recommendations_with_impact
 
-            input_measures = prepare_input_measures(recommendations_with_impact, body.goal)
+        # We call the API with the scoring epcs
+        scoring_epcs = pd.DataFrame(scoring_epcs)
+        scoring_epcs = kwh_client.transform(data=scoring_epcs, cleaned=cleaned)
 
-            current_sap_points = int(property_instance.data["current-energy-efficiency"])
+        kwh_simulation_predictions = model_api.predict_all(
+            df=scoring_epcs,
+            bucket=get_settings().DATA_BUCKET,
+            prediction_buckets=get_prediction_buckets(),
+            model_prefixes=["heating_kwh_predictions", "hotwater_kwh_predictions"],
+        )
+
+        # TODO: Costing model, which should include today's costs!
+
+        # Insert the predictions into the recommendations and run the optimiser
+        # TODO: If a recommendation has a negative impact on SAP, we should remove it - this seems to have become a
+        #       possibility with heating system
+        # TODO: After optimising, if there are any cheap, quick win measures (e.g. insulate water tank with hot water
+        #      cylinder jacket), we should add these to the recommendations as default
+
+        for p in input_properties.keys():
+            if not recommendations[p.id]:
+                continue
+            input_measures = prepare_input_measures(recommendations[p.id], body.goal)
+
+            current_sap_points = int(p.data["current-energy-efficiency"])
             target_sap_points = epc_to_sap_lower_bound(body.goal_value)
             sap_gain = CostOptimiser.calculate_sap_gain_with_slack(target_sap_points - current_sap_points)
 
@@ -716,7 +731,7 @@ async def trigger_plan(body: PlanTriggerRequest):
                 "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation"
             ]):
                 ventilation_rec = next(
-                    (r[0] for r in recommendations_with_impact if r[0]["type"] == "mechanical_ventilation"),
+                    (r[0] for r in recommendations[p.id] if r[0]["type"] == "mechanical_ventilation"),
                     None
                 )
 
@@ -730,29 +745,14 @@ async def trigger_plan(body: PlanTriggerRequest):
                     {**rec, "default": True if rec["recommendation_id"] in selected_recommendations else False}
                     for rec in recommendations_by_type
                 ]
-                for recommendations_by_type in recommendations_with_impact
+                for recommendations_by_type in recommendations[p.id]
             ]
 
             # We'll also unlist the recommendations so they're a bit easier to handle from here onwards
             final_recommendations = [
                 rec for recommendations_by_type in final_recommendations for rec in recommendations_by_type
             ]
-            recommendations[property_id] = final_recommendations
-
-        # We call the API with the scoring epcs
-        scoring_epcs = pd.DataFrame(scoring_epcs)
-        scoring_epcs = add_features_from_code(scoring_epcs)
-        scoring_epcs = add_estimate_annual_kwh(scoring_epcs)
-        # TODO: Drop all potential and env columns
-        kwh_simulation_predictions = model_api.predict_all(
-            df=scoring_epcs,
-            bucket=get_settings().DATA_BUCKET,
-            prediction_buckets=get_prediction_buckets(),
-            model_prefixes=["heating_kwh_predictions", "hotwater_kwh_predictions"],
-            extract_ids=True
-        )
-
-        # TODO: Costing model, which should include today's costs!
+            recommendations[p.id] = final_recommendations
 
         # We now insert into the recommendations
         for property_id in recommendations.keys():

From aa391966efe99d0697277923ebe9f9d872ae78d3 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 9 Aug 2024 15:15:26 +0100
Subject: [PATCH 085/182] recommendation fuel wip

---
 backend/app/plan/router.py | 125 +++++++++++++++----------------------
 backend/ml_models/api.py   |   7 ++-
 2 files changed, 56 insertions(+), 76 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index fbdc2323..e75e65a1 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -444,14 +444,17 @@ async def trigger_plan(body: PlanTriggerRequest):
 
         kwh_client = KwhData(bucket=get_settings().DATA_BUCKET, read_consumption_data=True)
 
-        model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
+        model_api = ModelApi(
+            portfolio_id=body.portfolio_id,
+            timestamp=created_at,
+            prediction_buckets=get_prediction_buckets()
+        )
 
         epcs_for_scoring = kwh_client.transform(data=kwh_client.prepare_epc(input_properties), cleaned=cleaned)
 
         kwh_preds = model_api.predict_all(
             df=epcs_for_scoring,
             bucket=get_settings().DATA_BUCKET,
-            prediction_buckets=get_prediction_buckets(),
             model_prefixes=["heating_kwh_predictions", "hotwater_kwh_predictions"],
             extract_ids=False
         )
@@ -687,11 +690,57 @@ async def trigger_plan(body: PlanTriggerRequest):
         kwh_simulation_predictions = model_api.predict_all(
             df=scoring_epcs,
             bucket=get_settings().DATA_BUCKET,
-            prediction_buckets=get_prediction_buckets(),
             model_prefixes=["heating_kwh_predictions", "hotwater_kwh_predictions"],
         )
 
         # TODO: Costing model, which should include today's costs!
+        # We now insert into the recommendations
+        for property_id in recommendations.keys():
+            property_recommendations = recommendations[property_id]
+            property_instance = [p for p in input_properties if p.id == property_id][0]
+
+            kwh_impact_table = kwh_simulation_predictions["heating_kwh_predictions"][
+                kwh_simulation_predictions["heating_kwh_predictions"]["property_id"] == str(property_id)
+                ].merge(
+                kwh_simulation_predictions["hotwater_kwh_predictions"].drop(
+                    columns=["property_id", "recommendation_id", "phase"]
+                ),
+                how="inner",
+                on="id",
+                suffixes=("_heating", "_hotwater")
+            )
+
+            property_kwh = property_instance.energy_consumption_estimates["unadjusted"]
+
+            kwh_impact_table = pd.concat(
+                [
+                    pd.DataFrame(
+                        [
+                            {
+                                "id": None,
+                                "predictions_heating": property_kwh["heating"],
+                                "predictions_hotwater": property_kwh["hot_water"],
+                            }
+                        ]
+                    ),
+                    kwh_impact_table
+                ]
+            )
+            # We adjust the predictions with the UCL model
+            for k in ["heating", "hotwater"]:
+                kwh_impact_table[f"adjusted_{k}"] = kwh_impact_table[f"predictions_{k}"].apply(
+                    lambda x: AnnualBillSavings.adjust_energy_to_metered(
+                        epc_energy=x, current_epc_rating=property_instance.data["current-energy-rating"]
+                    )
+                )
+
+            kwh_impact_table["heating_fuel"] = property_instance.heating_energy_source
+            kwh_impact_table["hotwater_fuel"] = property_instance.hot_water_energy_source
+
+            # We now deduce if any of the recommendations result in a change of fuel type
+            for recs in property_recommendations:
+                for rec in recs:
+                    print(rec["description_simulation"])
 
         # Insert the predictions into the recommendations and run the optimiser
         # TODO: If a recommendation has a negative impact on SAP, we should remove it - this seems to have become a
@@ -754,70 +803,6 @@ async def trigger_plan(body: PlanTriggerRequest):
             ]
             recommendations[p.id] = final_recommendations
 
-        # We now insert into the recommendations
-        for property_id in recommendations.keys():
-            property_recommendations = recommendations[property_id]
-            property_instance = [p for p in input_properties if p.id == property_id][0]
-            # The predicted kwhs are without appliances
-            consumption = property_instance.energy_consumption_estimates["adjusted"]
-            # Starting consumption is the sum of the consumption values, without appliances
-            starting_heating = consumption["heating"]
-            starting_hotwater = consumption["hot_water"]
-            property_kwh_predictions = {
-                k: kwh_simulation_predictions[k][kwh_simulation_predictions[k]["property_id"] == str(property_id)]
-                for k in ['heating_kwh_predictions', 'hotwater_kwh_predictions']
-            }
-            # We adjust the predictions
-            from backend.ml_models.AnnualBillSavings import AnnualBillSavings
-            for k in ["heating_kwh_predictions", "hotwater_kwh_predictions"]:
-                property_kwh_predictions[k]["adjusted"] = property_kwh_predictions[k]["predictions"].apply(
-                    lambda x: AnnualBillSavings.adjust_energy_to_metered(
-                        epc_energy=x, current_epc_rating=property_instance.data["current-energy-rating"]
-                    )
-                )
-
-            # For each recommendation, we difference the predictions
-            property_kwh_predictions["heating_kwh_predictions"]["savings"] = np.diff(
-                property_kwh_predictions["heating_kwh_predictions"]["adjusted"], prepend=starting_heating
-            )
-            property_kwh_predictions["hotwater_kwh_predictions"]["savings"] = np.diff(
-                property_kwh_predictions["hotwater_kwh_predictions"]["adjusted"], prepend=starting_hotwater
-            )
-
-            for recommendations_by_type in property_recommendations:
-                for rec in recommendations_by_type:
-                    # In the case of mechanical ventilation, there is no impact, and for low energy lighting we
-                    # calculate the savings inside of the recommendation itself
-                    if rec["type"] in ["mechanical_ventilation", "low_energy_lighing"]:
-                        continue
-
-                    heating_kwh_savings = property_kwh_predictions["heating_kwh_predictions"][
-                        (
-                            property_kwh_predictions["heating_kwh_predictions"]["recommendation_id"] ==
-                            rec["recommendation_id"]
-                        )
-                    ]["savings"].values[0]
-                    # This should be negative
-                    if heating_kwh_savings > 0:
-                        print("Positive heating kwh savings")
-                        # TODO: Raise an exception to investigate
-                        # raise Exception("Positive heating kwh savings")
-
-                    hot_water_kwh_savings = property_kwh_predictions["hotwater_kwh_predictions"][
-                        (
-                            property_kwh_predictions["hotwater_kwh_predictions"]["recommendation_id"] ==
-                            rec["recommendation_id"]
-                        )
-                    ]["savings"].values[0]
-
-                    # This should be negative
-                    if hot_water_kwh_savings > 0:
-                        print("Positive hot water kwh savings")
-                        # TODO: Raise an exception to investigate
-                        # raise Exception("Positive hot water kwh savings")
-
-                    rec["kwh_savings"] = abs(heating_kwh_savings + hot_water_kwh_savings)
-
         # 1) the property data
         # 2) the property details (epc)
         # 3) the recommendations
@@ -1154,12 +1139,6 @@ async def build_mds(body: MdsRequest):
         for chunk in tqdm(to_loop_over, total=len(to_loop_over)):
             predictions_dict = model_api.predict_all(
                 df=recommendations_scoring_data.iloc[chunk:chunk + SCORING_BATCH_SIZE],
-                bucket=get_settings().DATA_BUCKET,
-                prediction_buckets={
-                    "sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET,
-                    "heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET,
-                    "carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET
-                }
             )
 
             # Append the predictions to the predictions dictionary
diff --git a/backend/ml_models/api.py b/backend/ml_models/api.py
index c401e0f4..fab28e89 100644
--- a/backend/ml_models/api.py
+++ b/backend/ml_models/api.py
@@ -32,6 +32,7 @@ class ModelApi:
         self,
         portfolio_id,
         timestamp,
+        prediction_buckets,
         base_url="https://api.dev.hestia.homes",
     ):
         """
@@ -46,6 +47,7 @@ class ModelApi:
         self.base_url = base_url
         self.portfolio_id = portfolio_id
         self.timestamp = timestamp
+        self.prediction_buckets = prediction_buckets
 
     @staticmethod
     def predictions_template():
@@ -125,7 +127,7 @@ class ModelApi:
         else:
             return None
 
-    def predict_all(self, df, bucket, prediction_buckets, model_prefixes=None, extract_ids=True) -> dict:
+    def predict_all(self, df, bucket, model_prefixes=None, extract_ids=True) -> dict:
 
         """
         For each model prefix, this method will upload the scoring data to s3 and then make a request to the
@@ -134,7 +136,6 @@ class ModelApi:
         a dictionary of panaas dataframes
         :param df:  Pandas dataframe with scoring data to be uploaded to s3
         :param bucket: Name of the bucket in s3 to upload to
-        :param prediction_buckets: Dictionary containing the prediction buckets for each model prefix
         :param model_prefixes: List of model prefixes to generate predictions for. If None, all model prefixes will be
         used
         :param extract_ids: Boolean to determine if the property_id and recommendation_id should be extracted from the
@@ -152,7 +153,7 @@ class ModelApi:
                 "s3://{DATA_BUCKET}/".format(DATA_BUCKET=bucket) + file_location, model_prefix
             )
 
-            predictions_bucket = prediction_buckets[model_prefix]
+            predictions_bucket = self.prediction_buckets[model_prefix]
 
             # Retrieve the predictions
             predictions_df = pd.DataFrame(

From a0eabd5f09825f86264d7213eb8cb5237ad7dd90 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 9 Aug 2024 15:45:14 +0100
Subject: [PATCH 086/182] removing weird ashp recommendation

---
 backend/app/plan/router.py                   | 45 ++++++++++++--------
 backend/ml_models/api.py                     | 23 ++++++++--
 recommendations/HeatingControlRecommender.py |  2 +-
 recommendations/Recommendations.py           |  5 +--
 4 files changed, 49 insertions(+), 26 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index e75e65a1..985588e9 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -642,7 +642,6 @@ async def trigger_plan(body: PlanTriggerRequest):
             recommendations_scoring_data.extend(p.recommendations_scoring_data)
 
         # TODO: Make sure that number_habitable_rooms has been dropped
-
         logger.info("Preparing data for scoring in sap change api")
         recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)
 
@@ -651,18 +650,11 @@ async def trigger_plan(body: PlanTriggerRequest):
                      "carbon_ending"]
         )
 
-        all_predictions = model_api.predictions_template()
-        to_loop_over = range(0, recommendations_scoring_data.shape[0], SCORING_BATCH_SIZE)
-        for chunk in tqdm(to_loop_over, total=len(to_loop_over)):
-            predictions_dict = model_api.predict_all(
-                df=recommendations_scoring_data.iloc[chunk:chunk + SCORING_BATCH_SIZE],
-                bucket=get_settings().DATA_BUCKET,
-                prediction_buckets=get_prediction_buckets(),
-            )
-
-            # Append the predictions to the predictions dictionary
-            for key, scored in predictions_dict.items():
-                all_predictions[key] = pd.concat([all_predictions[key], scored])
+        all_predictions = model_api.paginated_predictions(
+            data=recommendations_scoring_data,
+            bucket=get_settings().DATA_BUCKET,
+            batch_size=SCORING_BATCH_SIZE
+        )
 
         # Insert the predictions into the recommendations, and get the impact summary
         scoring_epcs = []  # For scoring the kwh models
@@ -687,14 +679,29 @@ async def trigger_plan(body: PlanTriggerRequest):
         scoring_epcs = pd.DataFrame(scoring_epcs)
         scoring_epcs = kwh_client.transform(data=scoring_epcs, cleaned=cleaned)
 
-        kwh_simulation_predictions = model_api.predict_all(
-            df=scoring_epcs,
+        # There should be no difference between index 9 and index 8, apart from photo-supply (other that sap, etc)
+        a = scoring_epcs[scoring_epcs.index == 6]
+        b = scoring_epcs[scoring_epcs.index == 11]
+        difference = []
+        for col in a.columns:
+            if a[col].values[0] != b[col].values[0]:
+                difference.append(
+                    {
+                        "col": col,
+                        "without_solar": a[col].values[0],
+                        "with_solar": b[col].values[0]
+                    }
+                )
+        difference = pd.DataFrame(difference)
+
+        kwh_simulation_predictions = model_api.paginated_predictions(
+            data=scoring_epcs,
             bucket=get_settings().DATA_BUCKET,
             model_prefixes=["heating_kwh_predictions", "hotwater_kwh_predictions"],
+            batch_size=SCORING_BATCH_SIZE
         )
 
-        # TODO: Costing model, which should include today's costs!
-        # We now insert into the recommendations
+        # We now insert kwh estimates and costs into the recommendations
         for property_id in recommendations.keys():
             property_recommendations = recommendations[property_id]
             property_instance = [p for p in input_properties if p.id == property_id][0]
@@ -1128,7 +1135,9 @@ async def build_mds(body: MdsRequest):
                      "carbon_ending"]
         )
 
-        model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
+        model_api = ModelApi(
+            portfolio_id=body.portfolio_id, timestamp=created_at, prediction_buckets=get_prediction_buckets()
+        )
 
         all_predictions = {
             "sap_change_predictions": pd.DataFrame(),
diff --git a/backend/ml_models/api.py b/backend/ml_models/api.py
index fab28e89..e922d7fc 100644
--- a/backend/ml_models/api.py
+++ b/backend/ml_models/api.py
@@ -1,4 +1,5 @@
 import pandas as pd
+from tqdm import tqdm
 import requests
 from requests.exceptions import RequestException
 from utils.logger import setup_logger
@@ -55,9 +56,8 @@ class ModelApi:
             "sap_change_predictions": pd.DataFrame(),
             "heat_demand_predictions": pd.DataFrame(),
             "carbon_change_predictions": pd.DataFrame(),
-            "lighting_cost_predictions": pd.DataFrame(),
-            "heating_cost_predictions": pd.DataFrame(),
-            "hot_water_cost_predictions": pd.DataFrame(),
+            "hotwater_kwh_predictions": pd.DataFrame(),
+            "heating_kwh_predictions": pd.DataFrame(),
         }
 
     def upload_scoring_data(self, df: pd.DataFrame, bucket: str, model_prefix: str) -> str:
@@ -179,3 +179,20 @@ class ModelApi:
             predictions[model_prefix] = predictions_df
 
         return predictions
+
+    def paginated_predictions(self, data, bucket, batch_size, model_prefixes=None, extract_ids=True):
+        all_predictions = self.predictions_template()
+        to_loop_over = range(0, data.shape[0], batch_size)
+        for chunk in tqdm(to_loop_over, total=len(to_loop_over)):
+            predictions_dict = self.predict_all(
+                df=data.iloc[chunk:chunk + batch_size],
+                bucket=bucket,
+                model_prefixes=model_prefixes,
+                extract_ids=extract_ids
+            )
+
+            # Append the predictions to the predictions dictionary
+            for key, scored in predictions_dict.items():
+                all_predictions[key] = pd.concat([all_predictions[key], scored])
+
+        return all_predictions
diff --git a/recommendations/HeatingControlRecommender.py b/recommendations/HeatingControlRecommender.py
index 6e827084..3e47c355 100644
--- a/recommendations/HeatingControlRecommender.py
+++ b/recommendations/HeatingControlRecommender.py
@@ -43,7 +43,7 @@ class HeatingControlRecommender:
             # For an ASHP, we can recommend time and temperature zone controls, as well as programmer, trvs and a bypass
             # which are common configurations for ASHPs
             self.recommend_time_temperature_zone_controls()
-            self.recommend_programmer_trvs_bypass()
+            # self.recommend_programmer_trvs_bypass()
 
     def recommend_room_heaters_electric_controls(self):
         """
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index b8174ae0..588d2316 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -419,10 +419,7 @@ class Recommendations:
                     previous_phase_values_multiple = [x for x in impact_summary if x["phase"] == (rec["phase"] - 1)]
                     if len(previous_phase_values_multiple) != 1:
                         # Take an average of each of the previous phases
-                        keys_to_median = [
-                            "sap", "carbon", "heat_demand", "epc_heating_cost", "epc_hot_water_cost",
-                            "epc_lighting_cost"
-                        ]
+                        keys_to_median = ["sap", "carbon", "heat_demand"]
 
                         previous_phase_values = {}
                         for key in keys_to_median:

From 2efb2a4f3ed248ec619b099b91f06227fd5722ad Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 9 Aug 2024 18:21:23 +0100
Subject: [PATCH 087/182] implemented recommendation level kwh and cost savings

---
 backend/Property.py        |   4 +
 backend/app/plan/router.py | 178 ++++++++++++++++++++++++++++++++-----
 2 files changed, 160 insertions(+), 22 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index e65ba272..bcb24325 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -183,6 +183,7 @@ class Property:
 
         self.recommendations_scoring_data = []
         self.simulation_epcs = {}
+        self.updated_simulation_epcs = []
 
         # This additional condition data should change how we pass kwargs to this. We should no longer need to pass
         # kwargs to this class, but instead, we should pass the energy assessment condition data
@@ -454,6 +455,9 @@ class Property:
             )
             updated_simulation_epcs.append(sim_epc)
 
+        # Now we havet this data inthe
+        self.updated_simulation_epcs = updated_simulation_epcs
+
         return updated_simulation_epcs
 
     @staticmethod
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 985588e9..b3a385be 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -671,29 +671,14 @@ async def trigger_plan(body: PlanTriggerRequest):
 
             # We use the impact_summary to update the simulation_epcs with the new SAP, heat demand, carbon, cost etc
             # at each phase
-            property_scoring_epcs = property_instance.update_simulation_epcs(impact_summary)
-            scoring_epcs.extend(property_scoring_epcs)
+            property_instance.update_simulation_epcs(impact_summary)
+            scoring_epcs.extend(property_instance.updated_simulation_epcs)
             recommendations[property_id] = recommendations_with_impact
 
         # We call the API with the scoring epcs
         scoring_epcs = pd.DataFrame(scoring_epcs)
         scoring_epcs = kwh_client.transform(data=scoring_epcs, cleaned=cleaned)
 
-        # There should be no difference between index 9 and index 8, apart from photo-supply (other that sap, etc)
-        a = scoring_epcs[scoring_epcs.index == 6]
-        b = scoring_epcs[scoring_epcs.index == 11]
-        difference = []
-        for col in a.columns:
-            if a[col].values[0] != b[col].values[0]:
-                difference.append(
-                    {
-                        "col": col,
-                        "without_solar": a[col].values[0],
-                        "with_solar": b[col].values[0]
-                    }
-                )
-        difference = pd.DataFrame(difference)
-
         kwh_simulation_predictions = model_api.paginated_predictions(
             data=scoring_epcs,
             bucket=get_settings().DATA_BUCKET,
@@ -715,16 +700,42 @@ async def trigger_plan(body: PlanTriggerRequest):
                 how="inner",
                 on="id",
                 suffixes=("_heating", "_hotwater")
+            ).reset_index(drop=True)
+
+            # We adjust this table with the kwh estimates for low energy lighting kwh values, and solar kwh estimates
+            led_recommendation = pd.DataFrame([
+                {
+                    "phase": r["phase"],
+                    "recommendation_id": r["recommendation_id"],
+                    "lighting_kwh_savings": r["kwh_savings"] * GoogleSolarApi.SOLAR_CONSUMPTION_PROPORTION,
+                } for recs in property_recommendations for r in recs if r["type"] == "low_energy_lighting"
+            ], columns=["phase", "recommendation_id", "lighting_kwh_savings"])
+            solar_recommendations = pd.DataFrame([
+                {
+                    "phase": r["phase"],
+                    "recommendation_id": r["recommendation_id"],
+                    "solar_kwh_savings": r["initial_ac_kwh_per_year"] * GoogleSolarApi.SOLAR_CONSUMPTION_PROPORTION,
+                } for recs in property_recommendations for r in recs if r["type"] == "solar_pv"
+            ], columns=["phase", "recommendation_id", "solar_kwh_savings"])
+
+            # merge them on
+            kwh_impact_table = kwh_impact_table.merge(
+                led_recommendation, how="left", on=["phase", "recommendation_id"]
+            ).merge(
+                solar_recommendations, how="left", on=["phase", "recommendation_id"]
             )
 
             property_kwh = property_instance.energy_consumption_estimates["unadjusted"]
 
+            starting_dummy_id_value = -9999
             kwh_impact_table = pd.concat(
                 [
                     pd.DataFrame(
                         [
                             {
-                                "id": None,
+                                "id": starting_dummy_id_value,
+                                "phase": starting_dummy_id_value,
+                                "recommendation_id": starting_dummy_id_value,
                                 "predictions_heating": property_kwh["heating"],
                                 "predictions_hotwater": property_kwh["hot_water"],
                             }
@@ -732,7 +743,19 @@ async def trigger_plan(body: PlanTriggerRequest):
                     ),
                     kwh_impact_table
                 ]
-            )
+            ).sort_values(["phase", "recommendation_id"], ascending=True).reset_index(drop=True)
+
+            for i in range(0, len(kwh_impact_table)):
+                current_phase = kwh_impact_table.loc[i, 'phase']
+                previous_phase_id = (current_phase - 1) if (current_phase > 0) else -9999
+                previous_phase = kwh_impact_table[kwh_impact_table['phase'] == previous_phase_id]
+
+                if not previous_phase.empty:
+                    for col in ["predictions_heating", "predictions_hotwater"]:
+                        if kwh_impact_table.loc[i, col] > previous_phase[col].max():
+                            kwh_impact_table.loc[i, col] = previous_phase[col].max()
+
+            from backend.ml_models.AnnualBillSavings import AnnualBillSavings
             # We adjust the predictions with the UCL model
             for k in ["heating", "hotwater"]:
                 kwh_impact_table[f"adjusted_{k}"] = kwh_impact_table[f"predictions_{k}"].apply(
@@ -741,13 +764,124 @@ async def trigger_plan(body: PlanTriggerRequest):
                     )
                 )
 
-            kwh_impact_table["heating_fuel"] = property_instance.heating_energy_source
-            kwh_impact_table["hotwater_fuel"] = property_instance.hot_water_energy_source
+            ASHP_COP = 3
+            descriptions_to_fuel_types = {
+                "Air source heat pump, radiators, electric": {"fuel": "Electricity", "cop": ASHP_COP},
+                "Boiler and radiators, mains gas": {"fuel": 'Natural Gas', "cop": 0.9}
+            }
+
+            def map_descriptions_to_fuel(heating_description, hotwater_description):
+                mapped = descriptions_to_fuel_types[heating_description]
+                heating_fuel = mapped["fuel"]
+
+                if hotwater_description == "From main system":
+                    return {
+                        "heating_fuel_type": heating_fuel, "hotwater_fuel_type": heating_fuel,
+                        "heating_cop": mapped["cop"], "hotwater_cop": mapped["cop"]
+                    }
+
+                raise NotImplementedError("Implement me")
+
+            # For heating system recommendations, this could result in a fuel type change so we reflect that
+            fuel_mapping = pd.DataFrame([
+                {
+                    "id": epc["id"],
+                    **map_descriptions_to_fuel(epc["mainheat-description"], epc["hotwater-description"])
+                } for epc in property_instance.updated_simulation_epcs
+            ])
+
+            fuel_mapping = pd.concat(
+                [
+                    pd.DataFrame(
+                        [
+                            {
+                                "id": starting_dummy_id_value,
+                                **map_descriptions_to_fuel(
+                                    property_instance.data["mainheat-description"],
+                                    property_instance.data["hotwater-description"]
+                                )
+                            }
+                        ]
+                    ),
+                    fuel_mapping
+                ]
+            )
+
+            kwh_impact_table = kwh_impact_table.merge(
+                fuel_mapping, how="left", on="id"
+            ).sort_values(["phase", "recommendation_id"], ascending=True).reset_index(drop=True)
+
+            kwh_impact_table["heating_fuel_type"] = np.where(
+                kwh_impact_table["id"] == starting_dummy_id_value,
+                property_instance.heating_energy_source,
+                kwh_impact_table["heating_fuel_type"]
+            )
+
+            kwh_impact_table["hotwater_fuel_type"] = np.where(
+                kwh_impact_table["id"] == starting_dummy_id_value,
+                property_instance.hot_water_energy_source,
+                kwh_impact_table["hotwater_fuel_type"]
+            )
+
+            def calculate_recommendation_fuel_cost(kwh, fuel, cop):
+                if fuel == "Electricity":
+                    return (kwh / cop) * AnnualBillSavings.ELECTRICITY_PRICE_CAP
+
+                if fuel == "Natural Gas":
+                    return (kwh / cop) * AnnualBillSavings.GAS_PRICE_CAP
+
+            # We now calculate the fuel cost
+            for k in ["heating", "hotwater"]:
+                kwh_impact_table[f"{k}_cost"] = kwh_impact_table.apply(
+                    lambda x: calculate_recommendation_fuel_cost(
+                        x[f"adjusted_{k}"], x[f"{k}_fuel_type"], x[f"{k}_cop"]
+                    ), axis=1
+                )
+
+            # TODO: The impact of remapping EPC is huge!
 
             # We now deduce if any of the recommendations result in a change of fuel type
             for recs in property_recommendations:
                 for rec in recs:
-                    print(rec["description_simulation"])
+                    if rec["type"] == "mechanical_ventilation":
+                        continue
+
+                    rec_impact = kwh_impact_table[kwh_impact_table["recommendation_id"] == rec["recommendation_id"]]
+                    prevous_phase_id = (rec["phase"] - 1) if (rec["phase"] > 0) else starting_dummy_id_value
+                    previous_phase_impact = kwh_impact_table[kwh_impact_table["phase"] == prevous_phase_id]
+
+                    if rec["type"] == "solar_pv":
+                        rec["kwh_savings"] = rec_impact["solar_kwh_savings"].values[0]
+                        rec["energy_cost_savings"] = (
+                            rec_impact["solar_kwh_savings"].values[0] * AnnualBillSavings.ELECTRICITY_PRICE_CAP
+                        )
+                        continue
+
+                    heating_kwh_savings = (
+                        previous_phase_impact["adjusted_heating"].mean() - rec_impact["adjusted_heating"].values[0]
+                    )
+                    heating_cost_savings = (
+                        previous_phase_impact["heating_cost"].mean() - rec_impact["heating_cost"].values[0]
+                    )
+
+                    hotwater_kwh_savings = (
+                        previous_phase_impact["adjusted_hotwater"].mean() - rec_impact["adjusted_hotwater"].values[0]
+                    )
+                    hotwater_host = (
+                        previous_phase_impact["hotwater_cost"].mean() - rec_impact["hotwater_cost"].values[0]
+                    )
+
+                    total_kwh_savings = heating_kwh_savings + hotwater_kwh_savings
+                    energy_cost_savings = heating_cost_savings + hotwater_host
+
+                    if rec["type"] == "lighting":
+                        # In this case, we should probably just SKIP but check when we have one!
+                        raise Exception("Implement me 3")
+
+                    rec["kwh_savings"] = total_kwh_savings
+                    rec["energy_cost_savings"] = energy_cost_savings
+
+            # TODO: Given the default recommendations, calcualte a total kwh and cost saving for the property!!!
 
         # Insert the predictions into the recommendations and run the optimiser
         # TODO: If a recommendation has a negative impact on SAP, we should remove it - this seems to have become a

From f985f2b11ccf1f15271185f710e11d56a283b667 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 9 Aug 2024 18:23:39 +0100
Subject: [PATCH 088/182] need to calculate property default savings

---
 backend/app/plan/router.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index b3a385be..675aa59f 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -881,8 +881,6 @@ async def trigger_plan(body: PlanTriggerRequest):
                     rec["kwh_savings"] = total_kwh_savings
                     rec["energy_cost_savings"] = energy_cost_savings
 
-            # TODO: Given the default recommendations, calcualte a total kwh and cost saving for the property!!!
-
         # Insert the predictions into the recommendations and run the optimiser
         # TODO: If a recommendation has a negative impact on SAP, we should remove it - this seems to have become a
         #       possibility with heating system
@@ -944,6 +942,14 @@ async def trigger_plan(body: PlanTriggerRequest):
             ]
             recommendations[p.id] = final_recommendations
 
+        # # TODO: Given the default recommendations, calcualte a total kwh and cost saving for the property!!!
+        # default_savings = [
+        #     {
+        #         "kwh_savings": rec["kwh_savings"],
+        #         "energy_cost_savings": rec["energy_cost_savings"]
+        #     } for recs in property_recommendations for rec in recs if rec["default"]
+        # ]
+
         # 1) the property data
         # 2) the property details (epc)
         # 3) the recommendations

From 28fb19cd6ce0bf369743a62d86a6cf8eb1c31062 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 9 Aug 2024 20:49:04 +0100
Subject: [PATCH 089/182] fixed import error in data collection etl

---
 backend/app/plan/router.py          | 1 +
 etl/bill_savings/data_collection.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 675aa59f..dd4fa9a2 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -942,6 +942,7 @@ async def trigger_plan(body: PlanTriggerRequest):
             ]
             recommendations[p.id] = final_recommendations
 
+        raise Exception("Finish me!!")
         # # TODO: Given the default recommendations, calcualte a total kwh and cost saving for the property!!!
         # default_savings = [
         #     {
diff --git a/etl/bill_savings/data_collection.py b/etl/bill_savings/data_collection.py
index 85a403f1..0341b885 100644
--- a/etl/bill_savings/data_collection.py
+++ b/etl/bill_savings/data_collection.py
@@ -7,7 +7,7 @@ import inspect
 import pandas as pd
 from tqdm import tqdm
 from bs4 import BeautifulSoup
-from training_data.epc.settings import EARLIEST_EPC_DATE
+from etl.epc.settings import EARLIEST_EPC_DATE
 from pathlib import Path
 import numpy as np
 from utils.s3 import save_pickle_to_s3

From 5764175e321d8cb08477858a7afbeb1c467d73d3 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 9 Aug 2024 22:37:08 +0100
Subject: [PATCH 090/182] fixed property_non_invasive_recommendations:

---
 backend/app/plan/router.py | 55 ++++++++++++++++++++++----------------
 1 file changed, 32 insertions(+), 23 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index dd4fa9a2..3fbc2492 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -311,6 +311,29 @@ def get_on_site_data(body: PlanTriggerRequest):
     return patches, already_installed, non_invasive_recommendations
 
 
+def extract_propert_on_site_recommendations(config, already_installed, non_invasive_recommendations, uprn):
+    property_already_installed = next((
+        x for x in already_installed if
+        (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
+    ), {})
+
+    # Because we have some non-invasive recommendations that match on address and postcode, but not UPRN
+    # we need to check existence of uprn
+    has_uprn = "non_invasive_recommendations" in non_invasive_recommendations[0]
+    if has_uprn:
+        property_non_invasive_recommendations = next((
+            x for x in non_invasive_recommendations if
+            (x["uprn"] == uprn)
+        ), {})
+    else:
+        property_non_invasive_recommendations = next((
+            x for x in non_invasive_recommendations if
+            (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
+        ), {})
+
+    return property_already_installed, property_non_invasive_recommendations
+
+
 router = APIRouter(
     prefix="/plan",
     tags=["plan"],
@@ -394,15 +417,9 @@ async def trigger_plan(body: PlanTriggerRequest):
                 cleaning_data=cleaning_data
             )
 
-            property_already_installed = next((
-                x for x in already_installed if
-                (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
-            ), {})
-
-            property_non_invasive_recommendations = next((
-                x for x in non_invasive_recommendations if
-                (x["uprn"] == config["uprn"])
-            ), {})
+            property_already_installed, property_non_invasive_recommendations = extract_propert_on_site_recommendations(
+                config, already_installed, non_invasive_recommendations, uprn
+            )
 
             input_properties.append(
                 Property(
@@ -573,7 +590,6 @@ async def trigger_plan(body: PlanTriggerRequest):
                             energy_consumption
                         )
                         p.set_solar_panel_configuration(unit_solar_panel_configuration)
-
         if individual_units:
             # Model the solar potential at the property level
             for unit in individual_units:
@@ -887,7 +903,7 @@ async def trigger_plan(body: PlanTriggerRequest):
         # TODO: After optimising, if there are any cheap, quick win measures (e.g. insulate water tank with hot water
         #      cylinder jacket), we should add these to the recommendations as default
 
-        for p in input_properties.keys():
+        for p in input_properties:
             if not recommendations[p.id]:
                 continue
             input_measures = prepare_input_measures(recommendations[p.id], body.goal)
@@ -942,18 +958,11 @@ async def trigger_plan(body: PlanTriggerRequest):
             ]
             recommendations[p.id] = final_recommendations
 
-        raise Exception("Finish me!!")
-        # # TODO: Given the default recommendations, calcualte a total kwh and cost saving for the property!!!
-        # default_savings = [
-        #     {
-        #         "kwh_savings": rec["kwh_savings"],
-        #         "energy_cost_savings": rec["energy_cost_savings"]
-        #     } for recs in property_recommendations for rec in recs if rec["default"]
-        # ]
-
-        # 1) the property data
-        # 2) the property details (epc)
-        # 3) the recommendations
+            # With that complete, we now total the kwh and cost savings for the property
+            # total_kwh_savings = sum([rec["kwh_savings"] for rec in final_recommendations if rec["default"]])
+            # total_energy_cost_savings = sum(
+            #     [rec["energy_cost_savings"] for rec in final_recommendations if rec["default"]]
+            # )
 
         logger.info("Uploading recommendations to the database")
         # If we have any work to do, we create a new scenario

From 85f73eda5c828258efe3921d46465c71c3634aac Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 9 Aug 2024 23:58:21 +0100
Subject: [PATCH 091/182] fixing string non-invasive recs

---
 backend/app/plan/router.py | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 3fbc2492..6367fe1c 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -325,12 +325,26 @@ def extract_propert_on_site_recommendations(config, already_installed, non_invas
             x for x in non_invasive_recommendations if
             (x["uprn"] == uprn)
         ), {})
+
+        # We patch the non-invasive recs that are ['cavity_extract_and_refill']
     else:
         property_non_invasive_recommendations = next((
             x for x in non_invasive_recommendations if
             (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
         ), {})
 
+    if isinstance(property_non_invasive_recommendations["recommendations"], str):
+        import ast
+        property_non_invasive_recommendations["recommendations"] = ast.literal_eval(
+            property_non_invasive_recommendations["recommendations"]
+        )
+        transformed = []
+        for rec in property_non_invasive_recommendations["recommendations"]:
+            if isinstance(rec, str):
+                transformed.append({"type": rec, })
+
+        property_non_invasive_recommendations["recommendations"] = str(transformed)
+
     return property_already_installed, property_non_invasive_recommendations
 
 
@@ -469,8 +483,8 @@ async def trigger_plan(body: PlanTriggerRequest):
 
         epcs_for_scoring = kwh_client.transform(data=kwh_client.prepare_epc(input_properties), cleaned=cleaned)
 
-        kwh_preds = model_api.predict_all(
-            df=epcs_for_scoring,
+        kwh_preds = model_api.paginated_predictions(
+            data=epcs_for_scoring,
             bucket=get_settings().DATA_BUCKET,
             model_prefixes=["heating_kwh_predictions", "hotwater_kwh_predictions"],
             extract_ids=False

From 8c711c96587ec979d855c54c245d631051022019 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 10 Aug 2024 02:16:17 +0100
Subject: [PATCH 092/182] added total calcs

---
 backend/app/plan/router.py | 40 +++++++++++++++++++++++++++++++++++---
 1 file changed, 37 insertions(+), 3 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 6367fe1c..6dce42a9 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -487,7 +487,8 @@ async def trigger_plan(body: PlanTriggerRequest):
             data=epcs_for_scoring,
             bucket=get_settings().DATA_BUCKET,
             model_prefixes=["heating_kwh_predictions", "hotwater_kwh_predictions"],
-            extract_ids=False
+            extract_ids=False,
+            batch_size=SCORING_BATCH_SIZE
         )
 
         # Insert the spatial data
@@ -797,7 +798,13 @@ async def trigger_plan(body: PlanTriggerRequest):
             ASHP_COP = 3
             descriptions_to_fuel_types = {
                 "Air source heat pump, radiators, electric": {"fuel": "Electricity", "cop": ASHP_COP},
-                "Boiler and radiators, mains gas": {"fuel": 'Natural Gas', "cop": 0.9}
+                "Boiler and radiators, mains gas": {"fuel": 'Natural Gas', "cop": 0.9},
+                'Electric storage heaters': {"fuel": 'Electricity', "cop": 1},
+                "Electric immersion, off-peak": {"fuel": 'Electricity', "cop": 1},
+                "Electric storage heaters, radiators": {"fuel": 'Electricity', "cop": 1},
+                "Room heaters, electric": {"fuel": 'Electricity', "cop": 1},
+                "Electric immersion, standard tariff": {"fuel": 'Electricity', "cop": 1},
+                "Portable electric heaters assumed for most rooms": {"fuel": 'Electricity', "cop": 1},
             }
 
             def map_descriptions_to_fuel(heating_description, hotwater_description):
@@ -810,7 +817,12 @@ async def trigger_plan(body: PlanTriggerRequest):
                         "heating_cop": mapped["cop"], "hotwater_cop": mapped["cop"]
                     }
 
-                raise NotImplementedError("Implement me")
+                mapped_hotwater = descriptions_to_fuel_types[hotwater_description]
+
+                return {
+                    "heating_fuel_type": heating_fuel, "hotwater_fuel_type": mapped_hotwater["fuel"],
+                    "heating_cop": mapped["cop"], "hotwater_cop": mapped_hotwater["cop"]
+                }
 
             # For heating system recommendations, this could result in a fuel type change so we reflect that
             fuel_mapping = pd.DataFrame([
@@ -820,6 +832,9 @@ async def trigger_plan(body: PlanTriggerRequest):
                 } for epc in property_instance.updated_simulation_epcs
             ])
 
+            for epc in property_instance.updated_simulation_epcs:
+                map_descriptions_to_fuel(epc["mainheat-description"], epc["hotwater-description"])
+
             fuel_mapping = pd.concat(
                 [
                     pd.DataFrame(
@@ -911,6 +926,25 @@ async def trigger_plan(body: PlanTriggerRequest):
                     rec["kwh_savings"] = total_kwh_savings
                     rec["energy_cost_savings"] = energy_cost_savings
 
+            # Finally, we set the current energy bill
+            starting_figures = kwh_impact_table[kwh_impact_table["id"] == starting_dummy_id_value].squeeze()
+            gas_standing_charge = 0
+            if (
+                (starting_figures["heating_fuel_type"] == "Natural Gas") or
+                (starting_figures["hotwater_fuel_type"] == "Natural Gas")
+            ):
+                gas_standing_charge = AnnualBillSavings.DAILY_STANDARD_CHARGE_GAS * 365
+
+            electricity_standing_charge = AnnualBillSavings.DAILY_STANDARD_CHARGE_ELECTRICITY * 365
+
+            property_instance.current_energy_bill = (
+                starting_figures["heating_cost"].values[0] +
+                starting_figures["hotwater_cost"].values[0] +
+                property_instance.energy_cost_estimates["unadjusted"]["lighting"] +
+                property_instance.energy_cost_estimates["unadjusted"]["appliances"] +
+                gas_standing_charge + electricity_standing_charge
+            )
+
         # Insert the predictions into the recommendations and run the optimiser
         # TODO: If a recommendation has a negative impact on SAP, we should remove it - this seems to have become a
         #       possibility with heating system

From d65ce731c06e8f31f4d6c495da9b9ec86531faf6 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 10 Aug 2024 02:18:27 +0100
Subject: [PATCH 093/182] minor

---
 etl/bill_savings/data_collection.py | 91 +++++++++++++++--------------
 etl/bill_savings/training.py        |  2 +-
 2 files changed, 49 insertions(+), 44 deletions(-)

diff --git a/etl/bill_savings/data_collection.py b/etl/bill_savings/data_collection.py
index 0341b885..a073a70e 100644
--- a/etl/bill_savings/data_collection.py
+++ b/etl/bill_savings/data_collection.py
@@ -132,51 +132,56 @@ def app():
 
     energy_consumption_data = []
     for i, directory in tqdm(enumerate(epc_directories), total=len(epc_directories)):
-
-        # Skip the first 50
-        if i < 18:
-            continue
-
-        data = pd.read_csv(directory / "certificates.csv", low_memory=False)
-        # Rename the columns to the same format as the api returns
-        data.columns = [c.replace("_", "-").lower() for c in data.columns]
-
-        # Take just date before the date threshold
-        data = data[data["lodgement-date"] >= EARLIEST_EPC_DATE]
-
-        data = data[~pd.isnull(data["uprn"])]
-        # Take just the newest EPC per uprn, based on lodgement-date
-        data = data.sort_values("lodgement-date", ascending=False).drop_duplicates("uprn")
-
-        data = data.sample(sample_size, replace=False)
-        # We use the addreess data to find the related information
-
-        collected_data = []
-        for _, property_data in data.iterrows():
-            time.sleep(np.random.uniform(0.2, 1.5))
-
-            uprn = int(property_data["uprn"])
-            address = property_data["address1"]
-            postcode = property_data["postcode"]
-            expected_expiry_date = calculate_expiry_date(property_data["lodgement-date"])
-
-            response = retrieve_find_my_epc_data(
-                uprn=uprn,
-                postcode=postcode,
-                address=address,
-                expected_expiry_date=expected_expiry_date
-            )
-            if response is None:
+        try:
+            # Skip the first 50
+            if i < 40:
                 continue
-            collected_data.append(
-                {
-                    **response,
-                    "epc": property_data.to_dict(),
-                    "epc_directory": str(directory)
-                }
-            )
 
-        energy_consumption_data.extend(collected_data)
+            data = pd.read_csv(directory / "certificates.csv", low_memory=False)
+            # Rename the columns to the same format as the api returns
+            data.columns = [c.replace("_", "-").lower() for c in data.columns]
+
+            # Take just date before the date threshold
+            data = data[data["lodgement-date"] >= EARLIEST_EPC_DATE]
+
+            data = data[~pd.isnull(data["uprn"])]
+            # Take just the newest EPC per uprn, based on lodgement-date
+            data = data.sort_values("lodgement-date", ascending=False).drop_duplicates("uprn")
+
+            data = data.sample(sample_size, replace=False)
+            # We use the addreess data to find the related information
+
+            collected_data = []
+            for _, property_data in data.iterrows():
+                time.sleep(np.random.uniform(0.2, 1.5))
+
+                uprn = int(property_data["uprn"])
+                address = property_data["address1"]
+                postcode = property_data["postcode"]
+                expected_expiry_date = calculate_expiry_date(property_data["lodgement-date"])
+
+                response = retrieve_find_my_epc_data(
+                    uprn=uprn,
+                    postcode=postcode,
+                    address=address,
+                    expected_expiry_date=expected_expiry_date
+                )
+                if response is None:
+                    continue
+                collected_data.append(
+                    {
+                        **response,
+                        "epc": property_data.to_dict(),
+                        "epc_directory": str(directory)
+                    }
+                )
+
+            energy_consumption_data.extend(collected_data)
+        except Exception as e:
+            print(f"Error for directory {directory}: {e}")
+            # If we have an error, then we wait for a bit since it's likely due to timeout
+            time.sleep(300)
+            continue
 
     # Store the pickle in s3
     save_time = datetime.now()
diff --git a/etl/bill_savings/training.py b/etl/bill_savings/training.py
index 5d89a79e..df60298b 100644
--- a/etl/bill_savings/training.py
+++ b/etl/bill_savings/training.py
@@ -1,7 +1,7 @@
 from pprint import pprint
 import msgpack
 from utils.s3 import read_from_s3
-from training_data.bill_savings.EnergyConsumptionModel import EnergyConsumptionModel
+from etl.bill_savings.EnergyConsumptionModel import EnergyConsumptionModel
 
 
 def handler():

From 6ec7995ac3fb4f660fbca9d3a32dd7afdbe3b9ee Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 10 Aug 2024 02:18:57 +0100
Subject: [PATCH 094/182] deleted training file for redundant kwh model

---
 etl/bill_savings/training.py | 57 ------------------------------------
 1 file changed, 57 deletions(-)
 delete mode 100644 etl/bill_savings/training.py

diff --git a/etl/bill_savings/training.py b/etl/bill_savings/training.py
deleted file mode 100644
index df60298b..00000000
--- a/etl/bill_savings/training.py
+++ /dev/null
@@ -1,57 +0,0 @@
-from pprint import pprint
-import msgpack
-from utils.s3 import read_from_s3
-from etl.bill_savings.EnergyConsumptionModel import EnergyConsumptionModel
-
-
-def handler():
-    """
-    This function is used to train the model and store the final models in s3 as pickles
-    :return:
-    """
-
-    dataset_version = "2024-07-08"
-
-    # Usage:
-    cleaned = read_from_s3(
-        s3_file_name="cleaned_epc_data/cleaned.bson",
-        bucket_name="retrofit-data-dev"
-    )
-
-    cleaned = msgpack.unpackb(cleaned, raw=False)
-
-    model = EnergyConsumptionModel(cleaned=cleaned, n_jobs=2)
-    model.read_dataset(f'energy_consumption/{dataset_version}/energy_consumption_dataset.parquet')
-    model.feature_engineering()
-    model.save_dummy_schema(dataset_version=dataset_version)
-
-    # For heating_kwh
-    model.split_dataset(target='heating_kwh')
-    model.fit_model(target='heating_kwh')
-    model.re_train_final_model(target='heating_kwh')
-    evaluation_results = model.evaluate_model(target='heating_kwh')
-
-    pprint(evaluation_results["train"])
-    pprint(evaluation_results["test"])
-
-    model.save_model(target='heating_kwh', dataset_version=dataset_version)
-
-    # importance_df = evaluation_results["train"]["Feature Importance"]
-    # testing_predictions = model.testing_predictions["heating_kwh"]
-    # testing_predictions = testing_predictions.sort_values("residual", ascending=False)
-    # training_predictions = model.training_predictions["heating_kwh"]
-    # training_predictions = training_predictions.sort_values("residual", ascending=False)
-    # # Merge on model.input_data, by the index
-    # merged_data = testing_predictions.merge(model.input_data, left_index=True, right_index=True)
-    # merged_data_train = training_predictions.merge(model.input_data, left_index=True, right_index=True)
-
-    # For hot_water_kwh
-    model.split_dataset(target='hot_water_kwh')
-    model.fit_model(target='hot_water_kwh')
-    model.re_train_final_model(target='hot_water_kwh')
-    evaluation_results = model.evaluate_model(target='hot_water_kwh')
-
-    pprint(evaluation_results["train"])
-    pprint(evaluation_results["test"])
-
-    model.save_model(target='hot_water_kwh', dataset_version=dataset_version)

From 2cb5308711279ded9986ecf22c61676f1124c3f3 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 12 Aug 2024 10:32:26 +0100
Subject: [PATCH 095/182] cleaning setting of kwh and energy bills

---
 backend/app/plan/router.py             | 229 +------------------------
 backend/ml_models/AnnualBillSavings.py |   8 +
 etl/bill_savings/data_collection.py    |   2 +-
 etl/bill_savings/training_data.py      |   2 +
 recommendations/Recommendations.py     | 218 +++++++++++++++++++++++
 5 files changed, 236 insertions(+), 223 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 6dce42a9..53a6d813 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -30,6 +30,7 @@ from backend.app.plan.utils import get_cleaned
 from backend.app.utils import epc_to_sap_lower_bound, sap_to_epc
 
 from backend.ml_models.api import ModelApi
+from backend.ml_models.AnnualBillSavings import AnnualBillSavings
 from backend.Property import Property
 from backend.apis.GoogleSolarApi import GoogleSolarApi
 
@@ -722,228 +723,12 @@ async def trigger_plan(body: PlanTriggerRequest):
             property_recommendations = recommendations[property_id]
             property_instance = [p for p in input_properties if p.id == property_id][0]
 
-            kwh_impact_table = kwh_simulation_predictions["heating_kwh_predictions"][
-                kwh_simulation_predictions["heating_kwh_predictions"]["property_id"] == str(property_id)
-                ].merge(
-                kwh_simulation_predictions["hotwater_kwh_predictions"].drop(
-                    columns=["property_id", "recommendation_id", "phase"]
-                ),
-                how="inner",
-                on="id",
-                suffixes=("_heating", "_hotwater")
-            ).reset_index(drop=True)
-
-            # We adjust this table with the kwh estimates for low energy lighting kwh values, and solar kwh estimates
-            led_recommendation = pd.DataFrame([
-                {
-                    "phase": r["phase"],
-                    "recommendation_id": r["recommendation_id"],
-                    "lighting_kwh_savings": r["kwh_savings"] * GoogleSolarApi.SOLAR_CONSUMPTION_PROPORTION,
-                } for recs in property_recommendations for r in recs if r["type"] == "low_energy_lighting"
-            ], columns=["phase", "recommendation_id", "lighting_kwh_savings"])
-            solar_recommendations = pd.DataFrame([
-                {
-                    "phase": r["phase"],
-                    "recommendation_id": r["recommendation_id"],
-                    "solar_kwh_savings": r["initial_ac_kwh_per_year"] * GoogleSolarApi.SOLAR_CONSUMPTION_PROPORTION,
-                } for recs in property_recommendations for r in recs if r["type"] == "solar_pv"
-            ], columns=["phase", "recommendation_id", "solar_kwh_savings"])
-
-            # merge them on
-            kwh_impact_table = kwh_impact_table.merge(
-                led_recommendation, how="left", on=["phase", "recommendation_id"]
-            ).merge(
-                solar_recommendations, how="left", on=["phase", "recommendation_id"]
-            )
-
-            property_kwh = property_instance.energy_consumption_estimates["unadjusted"]
-
-            starting_dummy_id_value = -9999
-            kwh_impact_table = pd.concat(
-                [
-                    pd.DataFrame(
-                        [
-                            {
-                                "id": starting_dummy_id_value,
-                                "phase": starting_dummy_id_value,
-                                "recommendation_id": starting_dummy_id_value,
-                                "predictions_heating": property_kwh["heating"],
-                                "predictions_hotwater": property_kwh["hot_water"],
-                            }
-                        ]
-                    ),
-                    kwh_impact_table
-                ]
-            ).sort_values(["phase", "recommendation_id"], ascending=True).reset_index(drop=True)
-
-            for i in range(0, len(kwh_impact_table)):
-                current_phase = kwh_impact_table.loc[i, 'phase']
-                previous_phase_id = (current_phase - 1) if (current_phase > 0) else -9999
-                previous_phase = kwh_impact_table[kwh_impact_table['phase'] == previous_phase_id]
-
-                if not previous_phase.empty:
-                    for col in ["predictions_heating", "predictions_hotwater"]:
-                        if kwh_impact_table.loc[i, col] > previous_phase[col].max():
-                            kwh_impact_table.loc[i, col] = previous_phase[col].max()
-
-            from backend.ml_models.AnnualBillSavings import AnnualBillSavings
-            # We adjust the predictions with the UCL model
-            for k in ["heating", "hotwater"]:
-                kwh_impact_table[f"adjusted_{k}"] = kwh_impact_table[f"predictions_{k}"].apply(
-                    lambda x: AnnualBillSavings.adjust_energy_to_metered(
-                        epc_energy=x, current_epc_rating=property_instance.data["current-energy-rating"]
-                    )
-                )
-
-            ASHP_COP = 3
-            descriptions_to_fuel_types = {
-                "Air source heat pump, radiators, electric": {"fuel": "Electricity", "cop": ASHP_COP},
-                "Boiler and radiators, mains gas": {"fuel": 'Natural Gas', "cop": 0.9},
-                'Electric storage heaters': {"fuel": 'Electricity', "cop": 1},
-                "Electric immersion, off-peak": {"fuel": 'Electricity', "cop": 1},
-                "Electric storage heaters, radiators": {"fuel": 'Electricity', "cop": 1},
-                "Room heaters, electric": {"fuel": 'Electricity', "cop": 1},
-                "Electric immersion, standard tariff": {"fuel": 'Electricity', "cop": 1},
-                "Portable electric heaters assumed for most rooms": {"fuel": 'Electricity', "cop": 1},
-            }
-
-            def map_descriptions_to_fuel(heating_description, hotwater_description):
-                mapped = descriptions_to_fuel_types[heating_description]
-                heating_fuel = mapped["fuel"]
-
-                if hotwater_description == "From main system":
-                    return {
-                        "heating_fuel_type": heating_fuel, "hotwater_fuel_type": heating_fuel,
-                        "heating_cop": mapped["cop"], "hotwater_cop": mapped["cop"]
-                    }
-
-                mapped_hotwater = descriptions_to_fuel_types[hotwater_description]
-
-                return {
-                    "heating_fuel_type": heating_fuel, "hotwater_fuel_type": mapped_hotwater["fuel"],
-                    "heating_cop": mapped["cop"], "hotwater_cop": mapped_hotwater["cop"]
-                }
-
-            # For heating system recommendations, this could result in a fuel type change so we reflect that
-            fuel_mapping = pd.DataFrame([
-                {
-                    "id": epc["id"],
-                    **map_descriptions_to_fuel(epc["mainheat-description"], epc["hotwater-description"])
-                } for epc in property_instance.updated_simulation_epcs
-            ])
-
-            for epc in property_instance.updated_simulation_epcs:
-                map_descriptions_to_fuel(epc["mainheat-description"], epc["hotwater-description"])
-
-            fuel_mapping = pd.concat(
-                [
-                    pd.DataFrame(
-                        [
-                            {
-                                "id": starting_dummy_id_value,
-                                **map_descriptions_to_fuel(
-                                    property_instance.data["mainheat-description"],
-                                    property_instance.data["hotwater-description"]
-                                )
-                            }
-                        ]
-                    ),
-                    fuel_mapping
-                ]
-            )
-
-            kwh_impact_table = kwh_impact_table.merge(
-                fuel_mapping, how="left", on="id"
-            ).sort_values(["phase", "recommendation_id"], ascending=True).reset_index(drop=True)
-
-            kwh_impact_table["heating_fuel_type"] = np.where(
-                kwh_impact_table["id"] == starting_dummy_id_value,
-                property_instance.heating_energy_source,
-                kwh_impact_table["heating_fuel_type"]
-            )
-
-            kwh_impact_table["hotwater_fuel_type"] = np.where(
-                kwh_impact_table["id"] == starting_dummy_id_value,
-                property_instance.hot_water_energy_source,
-                kwh_impact_table["hotwater_fuel_type"]
-            )
-
-            def calculate_recommendation_fuel_cost(kwh, fuel, cop):
-                if fuel == "Electricity":
-                    return (kwh / cop) * AnnualBillSavings.ELECTRICITY_PRICE_CAP
-
-                if fuel == "Natural Gas":
-                    return (kwh / cop) * AnnualBillSavings.GAS_PRICE_CAP
-
-            # We now calculate the fuel cost
-            for k in ["heating", "hotwater"]:
-                kwh_impact_table[f"{k}_cost"] = kwh_impact_table.apply(
-                    lambda x: calculate_recommendation_fuel_cost(
-                        x[f"adjusted_{k}"], x[f"{k}_fuel_type"], x[f"{k}_cop"]
-                    ), axis=1
-                )
-
-            # TODO: The impact of remapping EPC is huge!
-
-            # We now deduce if any of the recommendations result in a change of fuel type
-            for recs in property_recommendations:
-                for rec in recs:
-                    if rec["type"] == "mechanical_ventilation":
-                        continue
-
-                    rec_impact = kwh_impact_table[kwh_impact_table["recommendation_id"] == rec["recommendation_id"]]
-                    prevous_phase_id = (rec["phase"] - 1) if (rec["phase"] > 0) else starting_dummy_id_value
-                    previous_phase_impact = kwh_impact_table[kwh_impact_table["phase"] == prevous_phase_id]
-
-                    if rec["type"] == "solar_pv":
-                        rec["kwh_savings"] = rec_impact["solar_kwh_savings"].values[0]
-                        rec["energy_cost_savings"] = (
-                            rec_impact["solar_kwh_savings"].values[0] * AnnualBillSavings.ELECTRICITY_PRICE_CAP
-                        )
-                        continue
-
-                    heating_kwh_savings = (
-                        previous_phase_impact["adjusted_heating"].mean() - rec_impact["adjusted_heating"].values[0]
-                    )
-                    heating_cost_savings = (
-                        previous_phase_impact["heating_cost"].mean() - rec_impact["heating_cost"].values[0]
-                    )
-
-                    hotwater_kwh_savings = (
-                        previous_phase_impact["adjusted_hotwater"].mean() - rec_impact["adjusted_hotwater"].values[0]
-                    )
-                    hotwater_host = (
-                        previous_phase_impact["hotwater_cost"].mean() - rec_impact["hotwater_cost"].values[0]
-                    )
-
-                    total_kwh_savings = heating_kwh_savings + hotwater_kwh_savings
-                    energy_cost_savings = heating_cost_savings + hotwater_host
-
-                    if rec["type"] == "lighting":
-                        # In this case, we should probably just SKIP but check when we have one!
-                        raise Exception("Implement me 3")
-
-                    rec["kwh_savings"] = total_kwh_savings
-                    rec["energy_cost_savings"] = energy_cost_savings
-
-            # Finally, we set the current energy bill
-            starting_figures = kwh_impact_table[kwh_impact_table["id"] == starting_dummy_id_value].squeeze()
-            gas_standing_charge = 0
-            if (
-                (starting_figures["heating_fuel_type"] == "Natural Gas") or
-                (starting_figures["hotwater_fuel_type"] == "Natural Gas")
-            ):
-                gas_standing_charge = AnnualBillSavings.DAILY_STANDARD_CHARGE_GAS * 365
-
-            electricity_standing_charge = AnnualBillSavings.DAILY_STANDARD_CHARGE_ELECTRICITY * 365
-
-            property_instance.current_energy_bill = (
-                starting_figures["heating_cost"].values[0] +
-                starting_figures["hotwater_cost"].values[0] +
-                property_instance.energy_cost_estimates["unadjusted"]["lighting"] +
-                property_instance.energy_cost_estimates["unadjusted"]["appliances"] +
-                gas_standing_charge + electricity_standing_charge
+            property_current_energy_bill = Recommendations.calculate_recommendation_tenant_savings(
+                property_instance=property_instance,
+                kwh_simulation_predictions=kwh_simulation_predictions,
+                property_recommendations=property_recommendations
             )
+            property_instance.current_energy_bill = property_current_energy_bill
 
         # Insert the predictions into the recommendations and run the optimiser
         # TODO: If a recommendation has a negative impact on SAP, we should remove it - this seems to have become a
@@ -1284,7 +1069,7 @@ async def build_mds(body: MdsRequest):
         recommendations = {}
 
         for p in tqdm(input_properties):
-            p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
+            p.set_features(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
 
             mds = Mds(property_instance=p, materials=materials, optimise_measures=optimise_measures)
             mds_recommendations, property_representative_recommendations, errors = mds.build()
diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py
index e4d9d143..0317b9e3 100644
--- a/backend/ml_models/AnnualBillSavings.py
+++ b/backend/ml_models/AnnualBillSavings.py
@@ -199,3 +199,11 @@ class AnnualBillSavings:
             return current_epc_rating
 
         return cls.EPC_BANDS[expected_index - 1]
+
+    @classmethod
+    def calculate_recommendation_fuel_cost(cls, kwh, fuel, cop):
+        if fuel == "Electricity":
+            return (kwh / cop) * cls.ELECTRICITY_PRICE_CAP
+
+        if fuel == "Natural Gas":
+            return (kwh / cop) * cls.GAS_PRICE_CAP
diff --git a/etl/bill_savings/data_collection.py b/etl/bill_savings/data_collection.py
index a073a70e..75fd9df2 100644
--- a/etl/bill_savings/data_collection.py
+++ b/etl/bill_savings/data_collection.py
@@ -134,7 +134,7 @@ def app():
     for i, directory in tqdm(enumerate(epc_directories), total=len(epc_directories)):
         try:
             # Skip the first 50
-            if i < 40:
+            if i < 200:
                 continue
 
             data = pd.read_csv(directory / "certificates.csv", low_memory=False)
diff --git a/etl/bill_savings/training_data.py b/etl/bill_savings/training_data.py
index 85b53bca..a3d58af3 100644
--- a/etl/bill_savings/training_data.py
+++ b/etl/bill_savings/training_data.py
@@ -17,6 +17,8 @@ def app():
 
     cleaned = msgpack.unpackb(cleaned, raw=False)
 
+    # If there is any problematic data, it could be:
+    # s3://retrofit-datalake-dev/energy_consumption_data/2024-08-10 18:48:06.866647.pkl
     kwh_data_client = KwhData(bucket="retrofit-datalake-dev")
     kwh_data_client.combine()
     kwh_data_client.transform(data=kwh_data_client.data, cleaned=cleaned, save=True)
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 588d2316..21c4f551 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -17,6 +17,19 @@ from recommendations.SecondaryHeating import SecondaryHeating
 from backend.ml_models.AnnualBillSavings import AnnualBillSavings
 from backend.apis.GoogleSolarApi import GoogleSolarApi
 
+ASHP_COP = 3
+DESCRIPTIONS_TO_FUEL_TYPES = {
+    "Air source heat pump, radiators, electric": {"fuel": "Electricity", "cop": ASHP_COP},
+    "Boiler and radiators, mains gas": {"fuel": 'Natural Gas', "cop": 0.9},
+    'Electric storage heaters': {"fuel": 'Electricity', "cop": 1},
+    "Electric immersion, off-peak": {"fuel": 'Electricity', "cop": 1},
+    "Electric storage heaters, radiators": {"fuel": 'Electricity', "cop": 1},
+    "Room heaters, electric": {"fuel": 'Electricity', "cop": 1},
+    "Electric immersion, standard tariff": {"fuel": 'Electricity', "cop": 1},
+    "Portable electric heaters assumed for most rooms": {"fuel": 'Electricity', "cop": 1},
+}
+STARTING_DUMMY_ID_VALUE = -9999
+
 
 class Recommendations:
     """
@@ -497,3 +510,208 @@ class Recommendations:
                 )
 
         return property_recommendations, impact_summary
+
+    @staticmethod
+    def map_descriptions_to_fuel(heating_description, hotwater_description):
+        mapped = DESCRIPTIONS_TO_FUEL_TYPES[heating_description]
+        heating_fuel = mapped["fuel"]
+
+        if hotwater_description == "From main system":
+            return {
+                "heating_fuel_type": heating_fuel, "hotwater_fuel_type": heating_fuel,
+                "heating_cop": mapped["cop"], "hotwater_cop": mapped["cop"]
+            }
+
+        mapped_hotwater = DESCRIPTIONS_TO_FUEL_TYPES[hotwater_description]
+
+        return {
+            "heating_fuel_type": heating_fuel, "hotwater_fuel_type": mapped_hotwater["fuel"],
+            "heating_cop": mapped["cop"], "hotwater_cop": mapped_hotwater["cop"]
+        }
+
+    @classmethod
+    def calculate_recommendation_tenant_savings(
+        cls, property_instance, kwh_simulation_predictions, property_recommendations
+    ):
+        """
+        This method inserts the kwh savings and the bill savings that the customer will make from the recommendations
+        based on the predictions from the ML model
+        :param property_instance: Instance of the Property class, for the home associated to property_id
+        :param kwh_simulation_predictions: dictionary of predictions from the model apis
+        :param property_recommendations: dictionary of recommendations for the property
+        :return:
+        """
+
+        kwh_impact_table = kwh_simulation_predictions["heating_kwh_predictions"][
+            kwh_simulation_predictions["heating_kwh_predictions"]["property_id"] == str(property_instance.id)
+            ].merge(
+            kwh_simulation_predictions["hotwater_kwh_predictions"].drop(
+                columns=["property_id", "recommendation_id", "phase"]
+            ),
+            how="inner",
+            on="id",
+            suffixes=("_heating", "_hotwater")
+        ).reset_index(drop=True)
+
+        # We adjust this table with the kwh estimates for low energy lighting kwh values, and solar kwh estimates
+        led_recommendation = pd.DataFrame([
+            {
+                "phase": r["phase"],
+                "recommendation_id": r["recommendation_id"],
+                "lighting_kwh_savings": r["kwh_savings"] * GoogleSolarApi.SOLAR_CONSUMPTION_PROPORTION,
+            } for recs in property_recommendations for r in recs if r["type"] == "low_energy_lighting"
+        ], columns=["phase", "recommendation_id", "lighting_kwh_savings"])
+        solar_recommendations = pd.DataFrame([
+            {
+                "phase": r["phase"],
+                "recommendation_id": r["recommendation_id"],
+                "solar_kwh_savings": r["initial_ac_kwh_per_year"] * GoogleSolarApi.SOLAR_CONSUMPTION_PROPORTION,
+            } for recs in property_recommendations for r in recs if r["type"] == "solar_pv"
+        ], columns=["phase", "recommendation_id", "solar_kwh_savings"])
+
+        # merge them on
+        kwh_impact_table = kwh_impact_table.merge(
+            led_recommendation, how="left", on=["phase", "recommendation_id"]
+        ).merge(
+            solar_recommendations, how="left", on=["phase", "recommendation_id"]
+        )
+
+        property_kwh = property_instance.energy_consumption_estimates["unadjusted"]
+
+        kwh_impact_table = pd.concat(
+            [
+                pd.DataFrame(
+                    [
+                        {
+                            "id": STARTING_DUMMY_ID_VALUE,
+                            "phase": STARTING_DUMMY_ID_VALUE,
+                            "recommendation_id": STARTING_DUMMY_ID_VALUE,
+                            "predictions_heating": property_kwh["heating"],
+                            "predictions_hotwater": property_kwh["hot_water"],
+                        }
+                    ]
+                ),
+                kwh_impact_table
+            ]
+        ).sort_values(["phase", "recommendation_id"], ascending=True).reset_index(drop=True)
+
+        for i in range(0, len(kwh_impact_table)):
+            current_phase = kwh_impact_table.loc[i, 'phase']
+            previous_phase_id = (current_phase - 1) if (current_phase > 0) else -9999
+            previous_phase = kwh_impact_table[kwh_impact_table['phase'] == previous_phase_id]
+
+            if not previous_phase.empty:
+                for col in ["predictions_heating", "predictions_hotwater"]:
+                    if kwh_impact_table.loc[i, col] > previous_phase[col].max():
+                        kwh_impact_table.loc[i, col] = previous_phase[col].max()
+
+        # For heating system recommendations, this could result in a fuel type change so we reflect that
+        fuel_mapping = pd.DataFrame([
+            {
+                "id": epc["id"],
+                **cls.map_descriptions_to_fuel(epc["mainheat-description"], epc["hotwater-description"])
+            } for epc in property_instance.updated_simulation_epcs
+        ])
+
+        fuel_mapping = pd.concat(
+            [
+                pd.DataFrame(
+                    [
+                        {
+                            "id": STARTING_DUMMY_ID_VALUE,
+                            **cls.map_descriptions_to_fuel(
+                                property_instance.data["mainheat-description"],
+                                property_instance.data["hotwater-description"]
+                            )
+                        }
+                    ]
+                ),
+                fuel_mapping
+            ]
+        )
+
+        kwh_impact_table = kwh_impact_table.merge(
+            fuel_mapping, how="left", on="id"
+        ).sort_values(["phase", "recommendation_id"], ascending=True).reset_index(drop=True)
+
+        kwh_impact_table["heating_fuel_type"] = np.where(
+            kwh_impact_table["id"] == STARTING_DUMMY_ID_VALUE,
+            property_instance.heating_energy_source,
+            kwh_impact_table["heating_fuel_type"]
+        )
+
+        kwh_impact_table["hotwater_fuel_type"] = np.where(
+            kwh_impact_table["id"] == STARTING_DUMMY_ID_VALUE,
+            property_instance.hot_water_energy_source,
+            kwh_impact_table["hotwater_fuel_type"]
+        )
+
+        # We now calculate the fuel cost
+        for k in ["heating", "hotwater"]:
+            kwh_impact_table[f"{k}_cost"] = kwh_impact_table.apply(
+                lambda x: AnnualBillSavings.calculate_recommendation_fuel_cost(
+                    x[f"adjusted_{k}"], x[f"{k}_fuel_type"], x[f"{k}_cop"]
+                ), axis=1
+            )
+
+        # We now deduce if any of the recommendations result in a change of fuel type
+        for recs in property_recommendations:
+            for rec in recs:
+                if rec["type"] == "mechanical_ventilation":
+                    continue
+
+                rec_impact = kwh_impact_table[kwh_impact_table["recommendation_id"] == rec["recommendation_id"]]
+                prevous_phase_id = (rec["phase"] - 1) if (rec["phase"] > 0) else STARTING_DUMMY_ID_VALUE
+                previous_phase_impact = kwh_impact_table[kwh_impact_table["phase"] == prevous_phase_id]
+
+                if rec["type"] == "solar_pv":
+                    rec["kwh_savings"] = rec_impact["solar_kwh_savings"].values[0]
+                    rec["energy_cost_savings"] = (
+                        rec_impact["solar_kwh_savings"].values[0] * AnnualBillSavings.ELECTRICITY_PRICE_CAP
+                    )
+                    continue
+
+                heating_kwh_savings = (
+                    previous_phase_impact["adjusted_heating"].mean() - rec_impact["adjusted_heating"].values[0]
+                )
+                heating_cost_savings = (
+                    previous_phase_impact["heating_cost"].mean() - rec_impact["heating_cost"].values[0]
+                )
+
+                hotwater_kwh_savings = (
+                    previous_phase_impact["adjusted_hotwater"].mean() - rec_impact["adjusted_hotwater"].values[0]
+                )
+                hotwater_host = (
+                    previous_phase_impact["hotwater_cost"].mean() - rec_impact["hotwater_cost"].values[0]
+                )
+
+                total_kwh_savings = heating_kwh_savings + hotwater_kwh_savings
+                energy_cost_savings = heating_cost_savings + hotwater_host
+
+                if rec["type"] == "lighting":
+                    # In this case, we should probably just SKIP but check when we have one!
+                    raise Exception("Implement me 3")
+
+                rec["kwh_savings"] = total_kwh_savings
+                rec["energy_cost_savings"] = energy_cost_savings
+
+        # Finally, we set the current energy bill
+        starting_figures = kwh_impact_table[kwh_impact_table["id"] == STARTING_DUMMY_ID_VALUE].squeeze()
+        gas_standing_charge = 0
+        if (
+            (starting_figures["heating_fuel_type"] == "Natural Gas") or
+            (starting_figures["hotwater_fuel_type"] == "Natural Gas")
+        ):
+            gas_standing_charge = AnnualBillSavings.DAILY_STANDARD_CHARGE_GAS * 365
+
+        electricity_standing_charge = AnnualBillSavings.DAILY_STANDARD_CHARGE_ELECTRICITY * 365
+
+        current_energy_bill = (
+            starting_figures["heating_cost"].values[0] +
+            starting_figures["hotwater_cost"].values[0] +
+            property_instance.energy_cost_estimates["unadjusted"]["lighting"] +
+            property_instance.energy_cost_estimates["unadjusted"]["appliances"] +
+            gas_standing_charge + electricity_standing_charge
+        )
+
+        return current_energy_bill

From 30c28754b7def39ce495638e68c7b7e6fc7b5ce0 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 12 Aug 2024 10:38:00 +0100
Subject: [PATCH 096/182] fixed bug with adjusted predictions

---
 recommendations/Recommendations.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 21c4f551..0b9e4c7a 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -650,7 +650,7 @@ class Recommendations:
         for k in ["heating", "hotwater"]:
             kwh_impact_table[f"{k}_cost"] = kwh_impact_table.apply(
                 lambda x: AnnualBillSavings.calculate_recommendation_fuel_cost(
-                    x[f"adjusted_{k}"], x[f"{k}_fuel_type"], x[f"{k}_cop"]
+                    x[f"predictions_{k}"], x[f"{k}_fuel_type"], x[f"{k}_cop"]
                 ), axis=1
             )
 
@@ -672,14 +672,14 @@ class Recommendations:
                     continue
 
                 heating_kwh_savings = (
-                    previous_phase_impact["adjusted_heating"].mean() - rec_impact["adjusted_heating"].values[0]
+                    previous_phase_impact["predictions_heating"].mean() - rec_impact["predictions_heating"].values[0]
                 )
                 heating_cost_savings = (
                     previous_phase_impact["heating_cost"].mean() - rec_impact["heating_cost"].values[0]
                 )
 
                 hotwater_kwh_savings = (
-                    previous_phase_impact["adjusted_hotwater"].mean() - rec_impact["adjusted_hotwater"].values[0]
+                    previous_phase_impact["predictions_hotwater"].mean() - rec_impact["predictions_hotwater"].values[0]
                 )
                 hotwater_host = (
                     previous_phase_impact["hotwater_cost"].mean() - rec_impact["hotwater_cost"].values[0]
@@ -707,11 +707,12 @@ class Recommendations:
         electricity_standing_charge = AnnualBillSavings.DAILY_STANDARD_CHARGE_ELECTRICITY * 365
 
         current_energy_bill = (
-            starting_figures["heating_cost"].values[0] +
-            starting_figures["hotwater_cost"].values[0] +
+            starting_figures["heating_cost"] +
+            starting_figures["hotwater_cost"] +
             property_instance.energy_cost_estimates["unadjusted"]["lighting"] +
             property_instance.energy_cost_estimates["unadjusted"]["appliances"] +
-            gas_standing_charge + electricity_standing_charge
+            gas_standing_charge +
+            electricity_standing_charge
         )
 
         return current_energy_bill

From 3920ad413208067c822ec4db73c643c6da526eec Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 12 Aug 2024 15:15:56 +0100
Subject: [PATCH 097/182] revising scenarios, fixing setting of energy sources

---
 backend/Property.py                      |  8 ++-
 backend/app/plan/router.py               |  8 +--
 backend/app/plan/schemas.py              |  2 +-
 etl/bill_savings/KwhData.py              |  3 +-
 etl/customers/newhaven/newhaven_study.py | 71 +++++++++++++++---------
 5 files changed, 58 insertions(+), 34 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index bcb24325..e7341c4d 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -1171,7 +1171,9 @@ class Property:
             'has_exhaust_source_heat_pump': 'Electricity',
             'has_community_heat_pump': 'Electricity',
             'has_wood_pellets': 'Wood Pellets',
-            'has_community_scheme': 'Varied (Community Scheme)'
+            'has_community_scheme': 'Varied (Community Scheme)',
+            "has_dual_fuel_mineral_and_wood": 'Wood Logs',
+            "has_electricaire": 'Electricity',
         }
 
         # Hot water
@@ -1197,9 +1199,9 @@ class Property:
             'community scheme': 'Community Scheme'
         }
 
-        self.heating_energy_source = [
+        self.heating_energy_source = list({
             fuel for key, fuel in heating_fuel_mapping.items() if self.main_heating.get(key, False)
-        ]
+        })
         if len(self.heating_energy_source) == 0 or len(self.heating_energy_source) > 1:
             raise Exception("Investigate me")
 
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 53a6d813..8001e6ef 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -320,11 +320,11 @@ def extract_propert_on_site_recommendations(config, already_installed, non_invas
 
     # Because we have some non-invasive recommendations that match on address and postcode, but not UPRN
     # we need to check existence of uprn
-    has_uprn = "non_invasive_recommendations" in non_invasive_recommendations[0]
+    has_uprn = "uprn" in non_invasive_recommendations[0]
     if has_uprn:
         property_non_invasive_recommendations = next((
             x for x in non_invasive_recommendations if
-            (x["uprn"] == uprn)
+            (str(x["uprn"]) == str(uprn))
         ), {})
 
         # We patch the non-invasive recs that are ['cavity_extract_and_refill']
@@ -334,7 +334,7 @@ def extract_propert_on_site_recommendations(config, already_installed, non_invas
             (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
         ), {})
 
-    if isinstance(property_non_invasive_recommendations["recommendations"], str):
+    if isinstance(property_non_invasive_recommendations.get("recommendations"), str):
         import ast
         property_non_invasive_recommendations["recommendations"] = ast.literal_eval(
             property_non_invasive_recommendations["recommendations"]
@@ -498,8 +498,8 @@ async def trigger_plan(body: PlanTriggerRequest):
 
         logger.info("Setting property features")
         [p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=kwh_preds) for p in input_properties]
-
         logger.info("Performing solar analysis")
+
         # TODO: Tidy this up
         # TODO: If a property is semi-detached, we might get roof surfaces for the main building + the neighbour
         # TODO: If we can't get high image quality, should we use the solar API? Maybe just for semi-detached units with
diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py
index 082f46d3..bbcd5a57 100644
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@@ -34,7 +34,7 @@ class PlanTriggerRequest(BaseModel):
         # Specific measures
         "air_source_heat_pump",
         "internal_wall_insulation",
-        "external_wall_insulation"
+        "external_wall_insulation",
     }
 
     _allowed_goals = {"Increasing EPC"}
diff --git a/etl/bill_savings/KwhData.py b/etl/bill_savings/KwhData.py
index 5563014b..6b5f594a 100644
--- a/etl/bill_savings/KwhData.py
+++ b/etl/bill_savings/KwhData.py
@@ -25,7 +25,8 @@ class KwhData:
         "county",
         "windows-description", "windows-energy-eff", "flat-top-storey",
         "flat-storey-count", "unheated-corridor-length", "solar-water-heating-flag", "mechanical-ventilation",
-        "low-energy-lighting", "environment-impact-current", "energy-tariff", "current-energy-rating"
+        "low-energy-lighting", "environment-impact-current", "energy-tariff", "current-energy-rating",
+        "floor-level"
     ]
 
     NUMERICAL_COLUMNS = [
diff --git a/etl/customers/newhaven/newhaven_study.py b/etl/customers/newhaven/newhaven_study.py
index 4092dd87..9faf6a26 100644
--- a/etl/customers/newhaven/newhaven_study.py
+++ b/etl/customers/newhaven/newhaven_study.py
@@ -11,7 +11,7 @@ EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certif
 CUSTOMER_DATA_DIRECTORY = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/Data"
 
 USER_ID = 8
-PORTFOLIO_ID = 88
+PORTFOLIO_ID = 89
 
 
 def make_asset_list():
@@ -88,20 +88,20 @@ def make_asset_list():
         columns={"Wall Area [m^2]": "insulation_wall_area", "Building Area [m^2]": "floor_area"}
     )
 
-    had_an_epc = asset_list[~pd.isnull(asset_list["current-energy-efficiency"])]
-    below_b = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 80].shape
-    below_c = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 69].shape
-    had_an_epc["energy-efficiency-rating"].value_counts()
-    asset_list["current-energy-rating"].value_counts()
-    asset_list["co2-emissions-current"].mean()
-    # Get the underlying data of a histograme
-    import matplotlib.pyplot as plt
-    n, bins, patches = plt.hist(asset_list["co2-emissions-current"], bins=100, color="blue", alpha=0.7)
-
-    bins = np.arange(0, asset_list["co2-emissions-current"].max(), 1)  # Bins from 50 to 150 with a step of 10
-
-    # Step 3: Calculate the frequency of data in each bin
-    hist, bin_edges = np.histogram(asset_list["co2-emissions-current"], bins=bins)
+    # had_an_epc = asset_list[~pd.isnull(asset_list["current-energy-efficiency"])]
+    # below_b = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 80].shape
+    # below_c = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 69].shape
+    # had_an_epc["energy-efficiency-rating"].value_counts()
+    # asset_list["current-energy-rating"].value_counts()
+    # asset_list["co2-emissions-current"].mean()
+    # # Get the underlying data of a histograme
+    # import matplotlib.pyplot as plt
+    # n, bins, patches = plt.hist(asset_list["co2-emissions-current"], bins=100, color="blue", alpha=0.7)
+    #
+    # bins = np.arange(0, asset_list["co2-emissions-current"].max(), 1)  # Bins from 50 to 150 with a step of 10
+    #
+    # # Step 3: Calculate the frequency of data in each bin
+    # hist, bin_edges = np.histogram(asset_list["co2-emissions-current"], bins=bins)
 
     # Take properties below a B - there are 2844 units
     asset_list = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 80]
@@ -110,7 +110,7 @@ def make_asset_list():
     asset_list = asset_list[~pd.isnull(asset_list["current-energy-efficiency"])]
 
     # Take a 10% sample, for properties that have an EPC, with a seed
-    asset_list = asset_list.sample(frac=0.1, random_state=42)
+    asset_list = asset_list.sample(frac=0.25, random_state=42)
 
     AVG_FLOOR_HEIGHT = asset_list["floor-height"].median()
 
@@ -119,11 +119,11 @@ def make_asset_list():
     ):
 
         if address_base_property_description == "Self Contained Flat (Includes Maisonette / Apartment)":
-            if epc_property_type == "Flat":
+            if epc_property_type in ["Flat"]:
                 return 1
             if epc_property_type == "Maisonette":
                 return 2
-            raise NotImplementedError("Implement me")
+            return None
 
         if pd.isnull(floor_height):
             return np.round(building_height / AVG_FLOOR_HEIGHT)
@@ -140,6 +140,8 @@ def make_asset_list():
         ),
         axis=1
     )
+    # Drop any entires with null floors because that means the ordnance survey data doesn't align with the epc data
+    asset_list = asset_list[~pd.isnull(asset_list["number_of_floors"])]
 
     # D    0.419929
     # C    0.391459
@@ -262,16 +264,16 @@ def make_asset_list():
         "already_installed_file_path": "",
         "patches_file_path": "",
         "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
-        "scenario_name": "Demand Reduction - no solid wall",
+        "scenario_name": "Demand Reduction - no solid wall, windows, LEDs",
         "multi_plan": True,
         "exclusions": [
-            "internal_wall_insulation", "external_wall_insulation", "floor_insulation", "heating", "solar_pv"
+            "internal_wall_insulation", "external_wall_insulation", "floor_insulation", "heating", "solar_pv",
+            "lighting", "windows"
         ],
         "budget": None,
     }
     print(body1)
 
-    # Scenario B
     body2 = {
         "portfolio_id": str(PORTFOLIO_ID),
         "housing_type": "Private",
@@ -281,15 +283,34 @@ def make_asset_list():
         "already_installed_file_path": "",
         "patches_file_path": "",
         "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
-        "scenario_name": "Demand Reduction, Heating Systems, Solar PV - no solid wall",
+        "scenario_name": "Demand Reduction - no solid wall, floors or heating",
         "multi_plan": True,
-        "exclusions": ["internal_wall_insulation", "external_wall_insulation", "floor_insulation"],
+        "exclusions": [
+            "internal_wall_insulation", "external_wall_insulation", "floor_insulation", "heating", "solar_pv",
+        ],
         "budget": None,
     }
     print(body2)
 
-    # Scenario C - deep fabric, no exclusions
+    # Scenario B
     body3 = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increasing EPC",
+        "goal_value": "A",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+        "scenario_name": "Demand Reduction, Heating Systems, Solar PV - no solid wall or floors",
+        "multi_plan": True,
+        "exclusions": ["internal_wall_insulation", "external_wall_insulation", "floor_insulation"],
+        "budget": None,
+    }
+    print(body3)
+
+    # Scenario 4 - deep fabric, no IWI, floor
+    body4 = {
         "portfolio_id": str(PORTFOLIO_ID),
         "housing_type": "Private",
         "goal": "Increasing EPC",
@@ -302,4 +323,4 @@ def make_asset_list():
         "multi_plan": True,
         "budget": None,
     }
-    print(body3)
+    print(body4)

From 7107e37027eab1e7dd521bd759659a3b5f1330ef Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 12 Aug 2024 17:24:06 +0100
Subject: [PATCH 098/182] handling fuel sources

---
 backend/Property.py                      | 56 +++++++++++++-----------
 backend/app/assumptions.py               |  3 +-
 backend/app/plan/router.py               | 12 +++++
 etl/customers/newhaven/newhaven_study.py |  2 +
 4 files changed, 47 insertions(+), 26 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index e7341c4d..cd2f8469 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -173,7 +173,7 @@ class Property:
         self.windows_area = None
         self.solar_pv_percentage = None
 
-        self.current_adjusted_energy = None
+        self.current_energy_consumption = None
         self.expected_adjusted_energy = None
         self.current_energy_bill = None
         self.expected_energy_bill = None
@@ -746,7 +746,7 @@ class Property:
         }
 
         # Sum up the adjusted kwh figures
-        self.current_adjusted_energy = sum(list(adjusted_kwh_estimates.values()))
+        self.current_energy_consumption = sum(list(unadjusted_kwh_estimates.values()))
 
         self.energy_cost_estimates = {
             "adjusted": adjusted_heating_costs,
@@ -759,7 +759,7 @@ class Property:
         }
 
         self.energy_consumption_estimates = {
-            "adjusted": adjusted_kwh_estimates,
+            # "adjusted": adjusted_kwh_estimates,
             "unadjusted": unadjusted_kwh_estimates
         }
 
@@ -899,7 +899,7 @@ class Property:
             "energy_tariff": self.data["energy-tariff"],
             "primary_energy_consumption": self.energy["primary_energy_consumption"],
             "co2_emissions": self.energy["co2_emissions"],
-            "adjusted_energy_consumption": self.current_adjusted_energy,
+            # "adjusted_energy_consumption": self.current_adjusted_energy,
             "estimated": self.data.get("estimated", False),
         }
 
@@ -1265,29 +1265,35 @@ class Property:
 
         exclusions = [] if exclusions is None else exclusions
 
-        if (self.main_fuel["fuel_type"] == "electricity") or (
-            self.main_fuel["fuel_type"] == "mains gas" and not self.is_ashp_valid(exclusions=exclusions)
+        if not self.is_ashp_valid(exclusions=exclusions):
+            return self.current_energy_consumption
+
+        remap_fuel_sources = ["Natural Gas", "LPG", "Wood Logs"]
+        implemented_fuel_sources = ["Electricity"] + remap_fuel_sources
+
+        heating_energy_source = self.heating_energy_source
+        hot_water_energy_source = self.hot_water_energy_source
+        heating_consumption = self.energy_consumption_estimates["unadjusted"]["heating"]
+        hotwater_consumption = self.energy_consumption_estimates["unadjusted"]["hot_water"]
+
+        if (heating_energy_source not in implemented_fuel_sources) or (
+            hot_water_energy_source not in implemented_fuel_sources
         ):
-            # if the primary fuel is already electricity, we don't need to adjust the consumpion
-            return self.current_adjusted_energy
+            raise NotImplementedError("Have not implemented estimating electrical consumption for this fuel type")
 
-        if self.main_fuel["fuel_type"] == "mains gas" and self.is_ashp_valid(exclusions=exclusions):
-            # if the primary fuel is gas, we need to adjust the consumption to reflect the expected
-            # efficiency of an ASHP.
-            # We should adjust the energy consumption to reflect the 200-400% efficiency of an ASHP with
-            # electrified heating, so that the solar panel can cover heating generation.
-            heating_consumption = self.energy_consumption_estimates["adjusted"]["heating"]
-            hot_water_consumption = self.energy_consumption_estimates["adjusted"]["hot_water"]
+        if heating_energy_source in ["Natural Gas", "LPG", "Wood Logs"]:
+            # Adjust the heating consumption to reflect the expected efficiency of an ASHP
+            heating_consumption = heating_consumption / (assumed_ashp_efficiency / 100)
 
-            systems_consumptions = heating_consumption + hot_water_consumption
+        if hot_water_energy_source in remap_fuel_sources:
+            # Adjust the hot water consumption to reflect the expected efficiency of an ASHP
+            hotwater_consumption = hotwater_consumption / (assumed_ashp_efficiency / 100)
 
-            adjusted_consumption = systems_consumptions / (assumed_ashp_efficiency / 100)
-            electric_consumption = (
-                adjusted_consumption +
-                self.energy_consumption_estimates["adjusted"]["lighting"] +
-                self.energy_consumption_estimates["adjusted"]["appliances"]
-            )
+        electric_consumption = (
+            heating_consumption +
+            hotwater_consumption +
+            self.energy_consumption_estimates["unadjusted"]["lighting"] +
+            self.energy_consumption_estimates["unadjusted"]["appliances"]
+        )
 
-            return electric_consumption
-
-        raise NotImplementedError("Have not implemented estimating electrical consumption for this fuel type")
+        return electric_consumption
diff --git a/backend/app/assumptions.py b/backend/app/assumptions.py
index 13bd913f..d2b7b75a 100644
--- a/backend/app/assumptions.py
+++ b/backend/app/assumptions.py
@@ -1,3 +1,4 @@
-# Assumes that the average efficiency of an air source heat pump is 300%, taking the median of the 200-400% range,
+# Assumes that the average efficiency of an air source heat pump is 250%, taking the median of the 200-400% range,
 # which is often quoted as a sensible efficiency range for air source heat pumps.
+PESSIMISTIC_ASHPY_EFFICIENCY = 200
 AVERAGE_ASHP_EFFICIENCY = 300
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 8001e6ef..a7155de0 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -506,6 +506,18 @@ async def trigger_plan(body: PlanTriggerRequest):
         #       extensions, since it doesn't seem to do a great job
         # TODO: For simple properties, we should do a comparison/check between the solar API's roof area and the
         #       basic estimate of roof area
+
+        for p in tqdm(input_properties):
+            if p.uprn in [100060066017, 10033248492]:
+                continue
+            energy_consumption_client.estimate_new_consumption(
+                current_energy_efficiency=p.data["current-energy-efficiency"],
+                target_efficiency="69",
+                current_consumption=p.estimate_electrical_consumption(
+                    assumed_ashp_efficiency=assumptions.PESSIMISTIC_ASHPY_EFFICIENCY, exclusions=body.exclusions
+                ),
+            )
+
         building_ids = [
             {
                 "building_id": p.building_id,
diff --git a/etl/customers/newhaven/newhaven_study.py b/etl/customers/newhaven/newhaven_study.py
index 9faf6a26..58edf578 100644
--- a/etl/customers/newhaven/newhaven_study.py
+++ b/etl/customers/newhaven/newhaven_study.py
@@ -142,6 +142,8 @@ def make_asset_list():
     )
     # Drop any entires with null floors because that means the ordnance survey data doesn't align with the epc data
     asset_list = asset_list[~pd.isnull(asset_list["number_of_floors"])]
+    # Drop any entries with null insulation wall area
+    asset_list = asset_list[~pd.isnull(asset_list["insulation_wall_area"])]
 
     # D    0.419929
     # C    0.391459

From 18b34874383f503ae769dfb403b8585711d5c298 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 12 Aug 2024 17:37:02 +0100
Subject: [PATCH 099/182] debugging estimating electrical consumption

---
 backend/Property.py        | 11 ++++++-----
 backend/app/plan/router.py | 11 -----------
 2 files changed, 6 insertions(+), 16 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index cd2f8469..584f1b23 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -749,7 +749,7 @@ class Property:
         self.current_energy_consumption = sum(list(unadjusted_kwh_estimates.values()))
 
         self.energy_cost_estimates = {
-            "adjusted": adjusted_heating_costs,
+            # "adjusted": adjusted_heating_costs,
             "unadjusted": unadjusted_heating_costs,
             "epc": {
                 "heating": float(self.data["heating-cost-current"]),
@@ -1265,19 +1265,20 @@ class Property:
 
         exclusions = [] if exclusions is None else exclusions
 
+        # If the property currently has an ASHP, we don't gain from any efficiency improvements
         if not self.is_ashp_valid(exclusions=exclusions):
             return self.current_energy_consumption
 
-        remap_fuel_sources = ["Natural Gas", "LPG", "Wood Logs"]
-        implemented_fuel_sources = ["Electricity"] + remap_fuel_sources
+        # If the property currently has an electric boiler, it will still benefit from the ASHP efficiency gain
+        remap_fuel_sources = ["Natural Gas", "LPG", "Wood Logs", "Oil", "Electricity"]
 
         heating_energy_source = self.heating_energy_source
         hot_water_energy_source = self.hot_water_energy_source
         heating_consumption = self.energy_consumption_estimates["unadjusted"]["heating"]
         hotwater_consumption = self.energy_consumption_estimates["unadjusted"]["hot_water"]
 
-        if (heating_energy_source not in implemented_fuel_sources) or (
-            hot_water_energy_source not in implemented_fuel_sources
+        if (heating_energy_source not in remap_fuel_sources) or (
+            hot_water_energy_source not in remap_fuel_sources
         ):
             raise NotImplementedError("Have not implemented estimating electrical consumption for this fuel type")
 
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index a7155de0..90d84bd9 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -507,17 +507,6 @@ async def trigger_plan(body: PlanTriggerRequest):
         # TODO: For simple properties, we should do a comparison/check between the solar API's roof area and the
         #       basic estimate of roof area
 
-        for p in tqdm(input_properties):
-            if p.uprn in [100060066017, 10033248492]:
-                continue
-            energy_consumption_client.estimate_new_consumption(
-                current_energy_efficiency=p.data["current-energy-efficiency"],
-                target_efficiency="69",
-                current_consumption=p.estimate_electrical_consumption(
-                    assumed_ashp_efficiency=assumptions.PESSIMISTIC_ASHPY_EFFICIENCY, exclusions=body.exclusions
-                ),
-            )
-
         building_ids = [
             {
                 "building_id": p.building_id,

From 4b2c4cb0a37fbf9faefcc19fb225c9801fad48db Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 12 Aug 2024 19:09:58 +0100
Subject: [PATCH 100/182] adding fuel pricing table

---
 backend/app/plan/router.py             |  4 ++
 backend/ml_models/AnnualBillSavings.py | 76 ++++++++++++++++++++++++++
 recommendations/Recommendations.py     |  4 +-
 3 files changed, 83 insertions(+), 1 deletion(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 90d84bd9..6eb5d5ad 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -343,6 +343,8 @@ def extract_propert_on_site_recommendations(config, already_installed, non_invas
         for rec in property_non_invasive_recommendations["recommendations"]:
             if isinstance(rec, str):
                 transformed.append({"type": rec, })
+            else:
+                transformed.append(rec)
 
         property_non_invasive_recommendations["recommendations"] = str(transformed)
 
@@ -720,6 +722,8 @@ async def trigger_plan(body: PlanTriggerRequest):
         )
 
         # We now insert kwh estimates and costs into the recommendations
+        # TODO: We should join the methodology which maps the heating and hot water descriptions to the fuel types in
+        #       Recommendations, but also the Property class
         for property_id in recommendations.keys():
             property_recommendations = recommendations[property_id]
             property_instance = [p for p in input_properties if p.id == property_id][0]
diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py
index 0317b9e3..50945874 100644
--- a/backend/ml_models/AnnualBillSavings.py
+++ b/backend/ml_models/AnnualBillSavings.py
@@ -1,4 +1,5 @@
 import numpy as np
+import pandas as pd
 
 QUARTERLY_ENERGY_PRICES = [
     # 2024 Q1
@@ -40,6 +41,53 @@ class AnnualBillSavings:
     DAILY_STANDARD_CHARGE_GAS = 0.3143
     DAILY_STANDARD_CHARGE_ELECTRICITY = 0.601
 
+    # Based on https://www.nottenergy.com/advice-and-tools/project-energy-cost-comparison
+    # For July 2024. These quotes are based on the east midlands region, so we
+    FUEL_DATA = pd.DataFrame([
+        {"Fuel": "Electricity Standard", "Price (p)": 28.58, "Unit": "kWh", "Boiler Efficiency (%)": 100,
+         "Energy Content, Net Calorific value (kWh/unit)": 1.00, "Price per kWh (p) (inc boiler efficiency)": 28.58,
+         "CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.275},
+        {"Fuel": "Mains Gas Standard", "Price (p)": 6.31, "Unit": "kWh", "Boiler Efficiency (%)": 90,
+         "Energy Content, Net Calorific value (kWh/unit)": 1.00, "Price per kWh (p) (inc boiler efficiency)": 7.01,
+         "CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.213},
+        {"Fuel": "Kerosene", "Price (p)": 62.49, "Unit": "Litre", "Boiler Efficiency (%)": 90,
+         "Energy Content, Net Calorific value (kWh/unit)": 9.79, "Price per kWh (p) (inc boiler efficiency)": 7.09,
+         "CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.298},
+        {"Fuel": "Gas oil", "Price (p)": 94.50, "Unit": "Litre", "Boiler Efficiency (%)": 90,
+         "Energy Content, Net Calorific value (kWh/unit)": 9.96, "Price per kWh (p) (inc boiler efficiency)": 10.54,
+         "CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.316},
+        {"Fuel": "LPG", "Price (p)": 55.00, "Unit": "Litre", "Boiler Efficiency (%)": 90,
+         "Energy Content, Net Calorific value (kWh/unit)": 6.78, "Price per kWh (p) (inc boiler efficiency)": 9.01,
+         "CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.240},
+        {"Fuel": "Butane", "Price (p)": 216.58, "Unit": "Litre", "Boiler Efficiency (%)": 90,
+         "Energy Content, Net Calorific value (kWh/unit)": 6.64, "Price per kWh (p) (inc boiler efficiency)": 36.24,
+         "CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.248},
+        {"Fuel": "Propane", "Price (p)": 157.67, "Unit": "Litre", "Boiler Efficiency (%)": 90,
+         "Energy Content, Net Calorific value (kWh/unit)": 7.22, "Price per kWh (p) (inc boiler efficiency)": 24.25,
+         "CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.239},
+        {"Fuel": "Kiln Dried (logs)", "Price (p)": 36.52, "Unit": "kg", "Boiler Efficiency (%)": 85,
+         "Energy Content, Net Calorific value (kWh/unit)": 4.09, "Price per kWh (p) (inc boiler efficiency)": 10.51,
+         "CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.024},
+        {"Fuel": "Pellets (Bagged)", "Price (p)": 39.62, "Unit": "kg", "Boiler Efficiency (%)": 90,
+         "Energy Content, Net Calorific value (kWh/unit)": 4.80, "Price per kWh (p) (inc boiler efficiency)": 9.17,
+         "CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.049},
+        {"Fuel": "Pellets (Blown bulk)", "Price (p)": 33.92, "Unit": "kg", "Boiler Efficiency (%)": 90,
+         "Energy Content, Net Calorific value (kWh/unit)": 4.80, "Price per kWh (p) (inc boiler efficiency)": 7.85,
+         "CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.049},
+        {"Fuel": "Smokeless fuel", "Price (p)": 67.26, "Unit": "kg", "Boiler Efficiency (%)": 75,
+         "Energy Content, Net Calorific value (kWh/unit)": 6.70, "Price per kWh (p) (inc boiler efficiency)": 13.38,
+         "CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.404},
+        {"Fuel": "Coal", "Price (p)": 48.50, "Unit": "kg", "Boiler Efficiency (%)": 75,
+         "Energy Content, Net Calorific value (kWh/unit)": 7.95, "Price per kWh (p) (inc boiler efficiency)": 8.13,
+         "CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.404},
+        {"Fuel": "GSHP", "Price (p)": 28.58, "Unit": "kWh", "Boiler Efficiency (%)": 350,
+         "Energy Content, Net Calorific value (kWh/unit)": 1.00, "Price per kWh (p) (inc boiler efficiency)": 8.17,
+         "CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.079},
+        {"Fuel": "ASHP", "Price (p)": 28.58, "Unit": "kWh", "Boiler Efficiency (%)": 294,
+         "Energy Content, Net Calorific value (kWh/unit)": 1.00, "Price per kWh (p) (inc boiler efficiency)": 9.72,
+         "CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.094}
+    ])
+
     EPC_BANDS = ["G", "F", "E", "D", "C", "B", "A"]
 
     @classmethod
@@ -200,6 +248,15 @@ class AnnualBillSavings:
 
         return cls.EPC_BANDS[expected_index - 1]
 
+    @staticmethod
+    def cost_per_kwh(price_per_unit, energy_content_per_unit):
+        """
+        Calculate the cost of fuel per kWh given the price per unit in GBP and the energy content per unit in kWh.
+        """
+        cost_per_kwh = price_per_unit / energy_content_per_unit
+        # Tgis data is returned in pennies so we convert to pounds
+        return cost_per_kwh / 100
+
     @classmethod
     def calculate_recommendation_fuel_cost(cls, kwh, fuel, cop):
         if fuel == "Electricity":
@@ -207,3 +264,22 @@ class AnnualBillSavings:
 
         if fuel == "Natural Gas":
             return (kwh / cop) * cls.GAS_PRICE_CAP
+
+        if fuel == "LPG":
+            # Get the cost per kwh
+            price_data = cls.FUEL_DATA[cls.FUEL_DATA["Fuel"] == "LPG"].squeeze()
+            cost_per_kwh = cls.cost_per_kwh(
+                price_data["Price (p)"], price_data["Energy Content, Net Calorific value (kWh/unit)"]
+            )
+
+            return (kwh / cop) * cost_per_kwh
+
+        if fuel == "Wood":
+            price_data = cls.FUEL_DATA[cls.FUEL_DATA["Fuel"] == "Pellets (Bagged)"].squeeze()
+            cost_per_kwh = cls.cost_per_kwh(
+                price_data["Price (p)"], price_data["Energy Content, Net Calorific value (kWh/unit)"]
+            )
+
+            return (kwh / cop) * cost_per_kwh
+
+        raise Exception("Fuel not recognised")
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 0b9e4c7a..93abdcae 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -27,6 +27,8 @@ DESCRIPTIONS_TO_FUEL_TYPES = {
     "Room heaters, electric": {"fuel": 'Electricity', "cop": 1},
     "Electric immersion, standard tariff": {"fuel": 'Electricity', "cop": 1},
     "Portable electric heaters assumed for most rooms": {"fuel": 'Electricity', "cop": 1},
+    "Boiler and radiators, LPG": {"fuel": 'LPG', "cop": 0.9},
+    "Room heaters, dual fuel (mineral and wood)": {"fuel": 'Wood', "cop": 1},
 }
 STARTING_DUMMY_ID_VALUE = -9999
 
@@ -516,7 +518,7 @@ class Recommendations:
         mapped = DESCRIPTIONS_TO_FUEL_TYPES[heating_description]
         heating_fuel = mapped["fuel"]
 
-        if hotwater_description == "From main system":
+        if hotwater_description in ["From main system", "From main system, no cylinder thermostat"]:
             return {
                 "heating_fuel_type": heating_fuel, "hotwater_fuel_type": heating_fuel,
                 "heating_cop": mapped["cop"], "hotwater_cop": mapped["cop"]

From b4d8959c16cc647f1c4bc1b4305f0e1045ce0f41 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 12 Aug 2024 20:55:32 +0100
Subject: [PATCH 101/182] handling solar thermal water

---
 backend/Property.py                    |  5 +++++
 backend/ml_models/AnnualBillSavings.py |  2 +-
 recommendations/Recommendations.py     | 16 ++++++++++++++--
 3 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 584f1b23..e0d00427 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -1211,6 +1211,11 @@ class Property:
             self.hot_water_energy_source = heater_type_to_fuel[self.hotwater["heater_type"]]
         else:
             fuel = system_type_modification[self.hotwater["system_type"]]
+
+            if self.hotwater["extra_features"] == "plus solar":
+                self.hot_water_energy_source = self.heating_energy_source + " + Solar Thermal"
+                return
+
             if fuel in ['Main System', "Community Scheme"]:
                 self.hot_water_energy_source = self.heating_energy_source
             else:
diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py
index 50945874..e41dd01c 100644
--- a/backend/ml_models/AnnualBillSavings.py
+++ b/backend/ml_models/AnnualBillSavings.py
@@ -274,7 +274,7 @@ class AnnualBillSavings:
 
             return (kwh / cop) * cost_per_kwh
 
-        if fuel == "Wood":
+        if fuel == "Wood Logs":
             price_data = cls.FUEL_DATA[cls.FUEL_DATA["Fuel"] == "Pellets (Bagged)"].squeeze()
             cost_per_kwh = cls.cost_per_kwh(
                 price_data["Price (p)"], price_data["Energy Content, Net Calorific value (kWh/unit)"]
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 93abdcae..636a43e1 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -28,7 +28,10 @@ DESCRIPTIONS_TO_FUEL_TYPES = {
     "Electric immersion, standard tariff": {"fuel": 'Electricity', "cop": 1},
     "Portable electric heaters assumed for most rooms": {"fuel": 'Electricity', "cop": 1},
     "Boiler and radiators, LPG": {"fuel": 'LPG', "cop": 0.9},
-    "Room heaters, dual fuel (mineral and wood)": {"fuel": 'Wood', "cop": 1},
+    "Room heaters, dual fuel (mineral and wood)": {"fuel": 'Wood Logs', "cop": 1},
+    "Room heaters, mains gas": {"fuel": 'Natural Gas', "cop": 0.9},
+    "Warm air, mains gas": {"fuel": 'Natural Gas', "cop": 0.9},
+    "Boiler, mains gas": {"fuel": 'Natural Gas', "cop": 0.9},
 }
 STARTING_DUMMY_ID_VALUE = -9999
 
@@ -518,12 +521,21 @@ class Recommendations:
         mapped = DESCRIPTIONS_TO_FUEL_TYPES[heating_description]
         heating_fuel = mapped["fuel"]
 
-        if hotwater_description in ["From main system", "From main system, no cylinder thermostat"]:
+        if hotwater_description in [
+            "From main system", "From main system, no cylinder thermostat",
+        ]:
             return {
                 "heating_fuel_type": heating_fuel, "hotwater_fuel_type": heating_fuel,
                 "heating_cop": mapped["cop"], "hotwater_cop": mapped["cop"]
             }
 
+        if hotwater_description == "From main system, plus solar":
+            # The fuel is
+            return {
+                "heating_fuel_type": heating_fuel, "hotwater_fuel_type": heating_fuel + " + Solar Thermal",
+                "heating_cop": mapped["cop"], "hotwater_cop": 1
+            }
+
         mapped_hotwater = DESCRIPTIONS_TO_FUEL_TYPES[hotwater_description]
 
         return {

From 709d4aa958cc0652b65b8874f455e527c886007a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 12 Aug 2024 20:56:56 +0100
Subject: [PATCH 102/182] added costing method for solar thermal

---
 backend/ml_models/AnnualBillSavings.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py
index e41dd01c..3ee4b747 100644
--- a/backend/ml_models/AnnualBillSavings.py
+++ b/backend/ml_models/AnnualBillSavings.py
@@ -1,5 +1,6 @@
 import numpy as np
 import pandas as pd
+from backend.apis.GoogleSolarApi import GoogleSolarApi
 
 QUARTERLY_ENERGY_PRICES = [
     # 2024 Q1
@@ -282,4 +283,8 @@ class AnnualBillSavings:
 
             return (kwh / cop) * cost_per_kwh
 
+        if fuel == "Natural Gas + Solar Thermal":
+            # The solar thermal covers a % of the heating kwh, so we need to adjust the cost
+            return (kwh / cop) * GoogleSolarApi.SOLAR_CONSUMPTION_PROPORTION * cls.GAS_PRICE_CAP
+
         raise Exception("Fuel not recognised")

From 61b572481073f5049685dece9e859112f2ce9cd6 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 13 Aug 2024 12:14:09 +0100
Subject: [PATCH 103/182] handling various fuel types

---
 backend/Property.py                    |  6 ++++
 backend/ml_models/AnnualBillSavings.py |  2 +-
 etl/bill_savings/data_collection.py    |  2 +-
 recommendations/Recommendations.py     | 39 ++++++++++++++++++++++----
 4 files changed, 41 insertions(+), 8 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index e0d00427..966dd7cb 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -1207,6 +1207,12 @@ class Property:
 
         self.heating_energy_source = self.heating_energy_source[0]
 
+        if self.heating_energy_source == "Varied (Community Scheme)":
+            if self.main_fuel["fuel_type"] == "mains gas":
+                self.heating_energy_source = "Natural Gas (Community Scheme)"
+            else:
+                raise Exception("Implement me")
+
         if self.hotwater["heater_type"] is not None:
             self.hot_water_energy_source = heater_type_to_fuel[self.hotwater["heater_type"]]
         else:
diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py
index 3ee4b747..bc3a5d32 100644
--- a/backend/ml_models/AnnualBillSavings.py
+++ b/backend/ml_models/AnnualBillSavings.py
@@ -263,7 +263,7 @@ class AnnualBillSavings:
         if fuel == "Electricity":
             return (kwh / cop) * cls.ELECTRICITY_PRICE_CAP
 
-        if fuel == "Natural Gas":
+        if fuel in ["Natural Gas", "Natural Gas (Community Scheme)"]:
             return (kwh / cop) * cls.GAS_PRICE_CAP
 
         if fuel == "LPG":
diff --git a/etl/bill_savings/data_collection.py b/etl/bill_savings/data_collection.py
index 75fd9df2..49bcff82 100644
--- a/etl/bill_savings/data_collection.py
+++ b/etl/bill_savings/data_collection.py
@@ -134,7 +134,7 @@ def app():
     for i, directory in tqdm(enumerate(epc_directories), total=len(epc_directories)):
         try:
             # Skip the first 50
-            if i < 200:
+            if i < 256:
                 continue
 
             data = pd.read_csv(directory / "certificates.csv", low_memory=False)
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 636a43e1..d689b412 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -32,6 +32,12 @@ DESCRIPTIONS_TO_FUEL_TYPES = {
     "Room heaters, mains gas": {"fuel": 'Natural Gas', "cop": 0.9},
     "Warm air, mains gas": {"fuel": 'Natural Gas', "cop": 0.9},
     "Boiler, mains gas": {"fuel": 'Natural Gas', "cop": 0.9},
+    "Gas multipoint": {"fuel": "Natural Gas", "cop": 0.9},
+    "Warm air, Electricaire": {"fuel": "Electricity", "cop": 1},
+    "Gas boiler/circulator": {"fuel": "Natural Gas", "cop": 0.9},
+    "Boiler and underfloor heating, mains gas": {"fuel": "Natural Gas", "cop": 0.9},
+    "No system present: electric heaters assumed": {"fuel": "Electricity", "cop": 1},
+    "Electric instantaneous at point of use": {"fuel": "Electricity", "cop": 1},
 }
 STARTING_DUMMY_ID_VALUE = -9999
 
@@ -517,7 +523,19 @@ class Recommendations:
         return property_recommendations, impact_summary
 
     @staticmethod
-    def map_descriptions_to_fuel(heating_description, hotwater_description):
+    def map_descriptions_to_fuel(heating_description, hotwater_description, main_fuel_description):
+
+        # Handle the case of community schemes
+        if (heating_description == "Community scheme") or (hotwater_description == "Community scheme"):
+            if main_fuel_description == "mains gas (community)":
+                return {
+                    "heating_fuel_type": "Natural Gas (Community Scheme)",
+                    "hotwater_fuel_type": "Natural Gas (Community Scheme)",
+                    "heating_cop": 1,
+                    "hotwater_cop": 1
+                }
+            raise NotImplementedError("Handle this case")
+
         mapped = DESCRIPTIONS_TO_FUEL_TYPES[heating_description]
         heating_fuel = mapped["fuel"]
 
@@ -529,7 +547,9 @@ class Recommendations:
                 "heating_cop": mapped["cop"], "hotwater_cop": mapped["cop"]
             }
 
-        if hotwater_description == "From main system, plus solar":
+        if hotwater_description in [
+            "From main system, plus solar", "From main system, plus solar, no cylinder thermostat"
+        ]:
             # The fuel is
             return {
                 "heating_fuel_type": heating_fuel, "hotwater_fuel_type": heating_fuel + " + Solar Thermal",
@@ -623,7 +643,9 @@ class Recommendations:
         fuel_mapping = pd.DataFrame([
             {
                 "id": epc["id"],
-                **cls.map_descriptions_to_fuel(epc["mainheat-description"], epc["hotwater-description"])
+                **cls.map_descriptions_to_fuel(
+                    epc["mainheat-description"], epc["hotwater-description"], epc["main-fuel"]
+                )
             } for epc in property_instance.updated_simulation_epcs
         ])
 
@@ -635,7 +657,8 @@ class Recommendations:
                             "id": STARTING_DUMMY_ID_VALUE,
                             **cls.map_descriptions_to_fuel(
                                 property_instance.data["mainheat-description"],
-                                property_instance.data["hotwater-description"]
+                                property_instance.data["hotwater-description"],
+                                property_instance.data["main-fuel"]
                             )
                         }
                     ]
@@ -710,11 +733,15 @@ class Recommendations:
                 rec["energy_cost_savings"] = energy_cost_savings
 
         # Finally, we set the current energy bill
+        # For a community scheme, there is a standing charge but it's based on the operational cost of the network
+        # and therefore is likely different to the typical standing charge. This will be a cost typically defined
+        # by the network operator and often a building, whose residents are on a heat network, where the building
+        # operator will purchase energy from the network and re-sell it to the residents
         starting_figures = kwh_impact_table[kwh_impact_table["id"] == STARTING_DUMMY_ID_VALUE].squeeze()
         gas_standing_charge = 0
         if (
-            (starting_figures["heating_fuel_type"] == "Natural Gas") or
-            (starting_figures["hotwater_fuel_type"] == "Natural Gas")
+            (starting_figures["heating_fuel_type"] in ["Natural Gas", "Natural Gas (Community Scheme)"]) or
+            (starting_figures["hotwater_fuel_type"] == ["Natural Gas", "Natural Gas (Community Scheme)"])
         ):
             gas_standing_charge = AnnualBillSavings.DAILY_STANDARD_CHARGE_GAS * 365
 

From e7ab28bd17512ae4ae3a231a9728162af0abb48a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 13 Aug 2024 12:23:24 +0100
Subject: [PATCH 104/182] Handling different fuel types

---
 backend/app/plan/router.py             | 2 +-
 backend/ml_models/AnnualBillSavings.py | 9 +++++++--
 recommendations/Recommendations.py     | 1 +
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 6eb5d5ad..6f1d9935 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -724,7 +724,7 @@ async def trigger_plan(body: PlanTriggerRequest):
         # We now insert kwh estimates and costs into the recommendations
         # TODO: We should join the methodology which maps the heating and hot water descriptions to the fuel types in
         #       Recommendations, but also the Property class
-        for property_id in recommendations.keys():
+        for property_id in tqdm(recommendations.keys()):
             property_recommendations = recommendations[property_id]
             property_instance = [p for p in input_properties if p.id == property_id][0]
 
diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py
index bc3a5d32..d018dedb 100644
--- a/backend/ml_models/AnnualBillSavings.py
+++ b/backend/ml_models/AnnualBillSavings.py
@@ -272,7 +272,6 @@ class AnnualBillSavings:
             cost_per_kwh = cls.cost_per_kwh(
                 price_data["Price (p)"], price_data["Energy Content, Net Calorific value (kWh/unit)"]
             )
-
             return (kwh / cop) * cost_per_kwh
 
         if fuel == "Wood Logs":
@@ -280,11 +279,17 @@ class AnnualBillSavings:
             cost_per_kwh = cls.cost_per_kwh(
                 price_data["Price (p)"], price_data["Energy Content, Net Calorific value (kWh/unit)"]
             )
-
             return (kwh / cop) * cost_per_kwh
 
         if fuel == "Natural Gas + Solar Thermal":
             # The solar thermal covers a % of the heating kwh, so we need to adjust the cost
             return (kwh / cop) * GoogleSolarApi.SOLAR_CONSUMPTION_PROPORTION * cls.GAS_PRICE_CAP
 
+        if fuel == "Oil":
+            price_data = cls.FUEL_DATA[cls.FUEL_DATA["Fuel"] == "Kerosene"].squeeze()
+            cost_per_kwh = cls.cost_per_kwh(
+                price_data["Price (p)"], price_data["Energy Content, Net Calorific value (kWh/unit)"]
+            )
+            return (kwh / cop) * cost_per_kwh
+
         raise Exception("Fuel not recognised")
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index d689b412..d8d0ec08 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -38,6 +38,7 @@ DESCRIPTIONS_TO_FUEL_TYPES = {
     "Boiler and underfloor heating, mains gas": {"fuel": "Natural Gas", "cop": 0.9},
     "No system present: electric heaters assumed": {"fuel": "Electricity", "cop": 1},
     "Electric instantaneous at point of use": {"fuel": "Electricity", "cop": 1},
+    "Boiler and radiators, oil": {"fuel": "Oil", "cop": 0.9},
 }
 STARTING_DUMMY_ID_VALUE = -9999
 

From 5696b03b8ce27ccdc0e0a0a60b56e36aeb311a4e Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 13 Aug 2024 14:30:17 +0100
Subject: [PATCH 105/182] handling bug when no recommendations for a property

---
 backend/apis/GoogleSolarApi.py         | 4 ----
 backend/app/assumptions.py             | 4 ++++
 backend/app/plan/router.py             | 4 ++--
 backend/ml_models/AnnualBillSavings.py | 4 ++--
 recommendations/Recommendations.py     | 1 +
 5 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py
index 1354bbff..905d4975 100644
--- a/backend/apis/GoogleSolarApi.py
+++ b/backend/apis/GoogleSolarApi.py
@@ -17,10 +17,6 @@ logger = setup_logger()
 class GoogleSolarApi:
     NORTH_FACING_AZIMUTH_RANGE = (-30, 30)
 
-    # Conservative estimate of the proportion of electricity that will be consumed, whereas the rest will
-    # be exported
-    SOLAR_CONSUMPTION_PROPORTION = 0.5
-
     # These are variables, described in the documentation for cost analysis for non-us locations, seen here
     # https://developers.google.com/maps/documentation/solar/calculate-costs-non-us
     # We use the default figures that the API uses for US locations
diff --git a/backend/app/assumptions.py b/backend/app/assumptions.py
index d2b7b75a..ffc186df 100644
--- a/backend/app/assumptions.py
+++ b/backend/app/assumptions.py
@@ -2,3 +2,7 @@
 # which is often quoted as a sensible efficiency range for air source heat pumps.
 PESSIMISTIC_ASHPY_EFFICIENCY = 200
 AVERAGE_ASHP_EFFICIENCY = 300
+
+# Conservative estimate of the proportion of electricity that will be consumed, whereas the rest will
+# be exported
+SOLAR_CONSUMPTION_PROPORTION = 0.5
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 6f1d9935..48a78e15 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -655,7 +655,7 @@ async def trigger_plan(body: PlanTriggerRequest):
                     roof_area=solar_api_client.roof_area
                 )
 
-        logger.info("Getting components and epc recommendations")
+        logger.info("Identifying property recommendations")
         recommendations = {}
         recommendations_scoring_data = []
         representative_recommendations = {}
@@ -742,7 +742,7 @@ async def trigger_plan(body: PlanTriggerRequest):
         #      cylinder jacket), we should add these to the recommendations as default
 
         for p in input_properties:
-            if not recommendations[p.id]:
+            if not recommendations.get(p.id):
                 continue
             input_measures = prepare_input_measures(recommendations[p.id], body.goal)
 
diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py
index d018dedb..f791599a 100644
--- a/backend/ml_models/AnnualBillSavings.py
+++ b/backend/ml_models/AnnualBillSavings.py
@@ -1,6 +1,6 @@
 import numpy as np
 import pandas as pd
-from backend.apis.GoogleSolarApi import GoogleSolarApi
+import backend.app.assumptions as assumptions
 
 QUARTERLY_ENERGY_PRICES = [
     # 2024 Q1
@@ -283,7 +283,7 @@ class AnnualBillSavings:
 
         if fuel == "Natural Gas + Solar Thermal":
             # The solar thermal covers a % of the heating kwh, so we need to adjust the cost
-            return (kwh / cop) * GoogleSolarApi.SOLAR_CONSUMPTION_PROPORTION * cls.GAS_PRICE_CAP
+            return (kwh / cop) * assumptions.SOLAR_CONSUMPTION_PROPORTION * cls.GAS_PRICE_CAP
 
         if fuel == "Oil":
             price_data = cls.FUEL_DATA[cls.FUEL_DATA["Fuel"] == "Kerosene"].squeeze()
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index d8d0ec08..af55031f 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -39,6 +39,7 @@ DESCRIPTIONS_TO_FUEL_TYPES = {
     "No system present: electric heaters assumed": {"fuel": "Electricity", "cop": 1},
     "Electric instantaneous at point of use": {"fuel": "Electricity", "cop": 1},
     "Boiler and radiators, oil": {"fuel": "Oil", "cop": 0.9},
+    "Electric storage heaters, Electric storage heaters": {"fuel": "Electricity", "cop": 1},
 }
 STARTING_DUMMY_ID_VALUE = -9999
 

From 7275dc8e919455e397ed2056fb00781c766a3769 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 13 Aug 2024 14:47:10 +0100
Subject: [PATCH 106/182] fixing missing bills calcs for properties without
 recs

---
 backend/app/plan/router.py         |  9 +++++----
 recommendations/Recommendations.py | 32 +++++++++++++++++++-----------
 2 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 48a78e15..c26a5217 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -133,8 +133,8 @@ def extract_portfolio_aggregation_data(
             [r["energy_cost_savings"] for r in default_recommendations]
         )
 
-        pre_retrofit_energy_consumption = p.current_adjusted_energy
-        post_retrofit_energy_consumption = p.current_adjusted_energy - sum(
+        pre_retrofit_energy_consumption = p.current_energy_consumption
+        post_retrofit_energy_consumption = p.current_energy_consumption - sum(
             [r["kwh_savings"] for r in default_recommendations]
         )
 
@@ -724,8 +724,9 @@ async def trigger_plan(body: PlanTriggerRequest):
         # We now insert kwh estimates and costs into the recommendations
         # TODO: We should join the methodology which maps the heating and hot water descriptions to the fuel types in
         #       Recommendations, but also the Property class
-        for property_id in tqdm(recommendations.keys()):
-            property_recommendations = recommendations[property_id]
+        logger.info("Calculating tenant savings - kwh and bills")
+        for property_id in tqdm([p.id for p in input_properties]):
+            property_recommendations = recommendations.get(property_id, [])
             property_instance = [p for p in input_properties if p.id == property_id][0]
 
             property_current_energy_bill = Recommendations.calculate_recommendation_tenant_savings(
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index af55031f..ef478426 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -16,6 +16,7 @@ from recommendations.HotwaterRecommendations import HotwaterRecommendations
 from recommendations.SecondaryHeating import SecondaryHeating
 from backend.ml_models.AnnualBillSavings import AnnualBillSavings
 from backend.apis.GoogleSolarApi import GoogleSolarApi
+import backend.app.assumptions as assumptions
 
 ASHP_COP = 3
 DESCRIPTIONS_TO_FUEL_TYPES = {
@@ -40,6 +41,8 @@ DESCRIPTIONS_TO_FUEL_TYPES = {
     "Electric instantaneous at point of use": {"fuel": "Electricity", "cop": 1},
     "Boiler and radiators, oil": {"fuel": "Oil", "cop": 0.9},
     "Electric storage heaters, Electric storage heaters": {"fuel": "Electricity", "cop": 1},
+    "Boiler and radiators, electric": {"fuel": "Electricity", "cop": 0.9},
+    "Gas boiler/circulator, no cylinder thermostat": {"fuel": "Natural Gas", "cop": 0.9},
 }
 STARTING_DUMMY_ID_VALUE = -9999
 
@@ -594,14 +597,15 @@ class Recommendations:
             {
                 "phase": r["phase"],
                 "recommendation_id": r["recommendation_id"],
-                "lighting_kwh_savings": r["kwh_savings"] * GoogleSolarApi.SOLAR_CONSUMPTION_PROPORTION,
+                "lighting_kwh_savings": r["kwh_savings"]
             } for recs in property_recommendations for r in recs if r["type"] == "low_energy_lighting"
         ], columns=["phase", "recommendation_id", "lighting_kwh_savings"])
+
         solar_recommendations = pd.DataFrame([
             {
                 "phase": r["phase"],
                 "recommendation_id": r["recommendation_id"],
-                "solar_kwh_savings": r["initial_ac_kwh_per_year"] * GoogleSolarApi.SOLAR_CONSUMPTION_PROPORTION,
+                "solar_kwh_savings": r["initial_ac_kwh_per_year"] * assumptions.SOLAR_CONSUMPTION_PROPORTION,
             } for recs in property_recommendations for r in recs if r["type"] == "solar_pv"
         ], columns=["phase", "recommendation_id", "solar_kwh_savings"])
 
@@ -673,17 +677,21 @@ class Recommendations:
             fuel_mapping, how="left", on="id"
         ).sort_values(["phase", "recommendation_id"], ascending=True).reset_index(drop=True)
 
-        kwh_impact_table["heating_fuel_type"] = np.where(
-            kwh_impact_table["id"] == STARTING_DUMMY_ID_VALUE,
-            property_instance.heating_energy_source,
-            kwh_impact_table["heating_fuel_type"]
-        )
+        if (pd.isnull(kwh_impact_table["heating_fuel_type"]).sum() or
+            pd.isnull(kwh_impact_table["hotwater_fuel_type"]).sum()):
+            raise Exception("Fuel type is missing")
 
-        kwh_impact_table["hotwater_fuel_type"] = np.where(
-            kwh_impact_table["id"] == STARTING_DUMMY_ID_VALUE,
-            property_instance.hot_water_energy_source,
-            kwh_impact_table["hotwater_fuel_type"]
-        )
+        # kwh_impact_table["heating_fuel_type"] = np.where(
+        #     kwh_impact_table["id"] == STARTING_DUMMY_ID_VALUE,
+        #     property_instance.heating_energy_source,
+        #     kwh_impact_table["heating_fuel_type"]
+        # )
+        #
+        # kwh_impact_table["hotwater_fuel_type"] = np.where(
+        #     kwh_impact_table["id"] == STARTING_DUMMY_ID_VALUE,
+        #     property_instance.hot_water_energy_source,
+        #     kwh_impact_table["hotwater_fuel_type"]
+        # )
 
         # We now calculate the fuel cost
         for k in ["heating", "hotwater"]:

From 89241f9ae3f009ae1640d7651ebb997979ed0207 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 13 Aug 2024 16:14:20 +0100
Subject: [PATCH 107/182] remove old xposed floor recommendation handling

---
 etl/customers/newhaven/newhaven_study.py |  2 +-
 recommendations/FloorRecommendations.py  | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/etl/customers/newhaven/newhaven_study.py b/etl/customers/newhaven/newhaven_study.py
index 58edf578..9cda3d29 100644
--- a/etl/customers/newhaven/newhaven_study.py
+++ b/etl/customers/newhaven/newhaven_study.py
@@ -270,7 +270,7 @@ def make_asset_list():
         "multi_plan": True,
         "exclusions": [
             "internal_wall_insulation", "external_wall_insulation", "floor_insulation", "heating", "solar_pv",
-            "lighting", "windows"
+            "lighting", "windows", "secondary_heating"
         ],
         "budget": None,
     }
diff --git a/recommendations/FloorRecommendations.py b/recommendations/FloorRecommendations.py
index 5a8ad242..74be7d41 100644
--- a/recommendations/FloorRecommendations.py
+++ b/recommendations/FloorRecommendations.py
@@ -119,7 +119,11 @@ class FloorRecommendations(Definitions):
         if u_value < self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
             return
 
-        if self.property.floor["is_suspended"] or self.property.floor["is_to_unheated_space"]:
+        if (
+            self.property.floor["is_suspended"] or
+            self.property.floor["is_to_unheated_space"] or
+            self.property.floor["is_to_external_air"]
+        ):
             # Given the U-value, we recommend underfloor insulation
             self.recommend_floor_insulation(
                 phase=phase,
@@ -139,10 +143,6 @@ class FloorRecommendations(Definitions):
             )
             return
 
-        if self.property.floor["is_to_unheated_space"] or self.property.floor["is_to_external_air"]:
-            self.recommend_floor_insulation(u_value=u_value, parts=self.exposed_floor_insulation_parts)
-            return
-
         raise NotImplementedError("Implement me!")
 
     @staticmethod

From 1e16babab3964c1b9f3b360be52af7b856c196e2 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 13 Aug 2024 18:28:38 +0100
Subject: [PATCH 108/182] added heating and hot water consumption per property
 to db

---
 backend/Property.py                | 34 ++++++------------------------
 backend/app/db/models/portfolio.py |  3 ++-
 2 files changed, 8 insertions(+), 29 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 966dd7cb..c4b1b969 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -174,7 +174,7 @@ class Property:
         self.solar_pv_percentage = None
 
         self.current_energy_consumption = None
-        self.expected_adjusted_energy = None
+        self.current_energy_consumption_heating_hotwater = None
         self.current_energy_bill = None
         self.expected_energy_bill = None
 
@@ -724,13 +724,6 @@ class Property:
             "appliances": float(appliances_kwh)
         }
 
-        adjusted_kwh_estimates = {
-            k: AnnualBillSavings.adjust_energy_to_metered(
-                epc_energy=v,
-                current_epc_rating=self.data["current-energy-rating"],
-            ) for k, v in unadjusted_kwh_estimates.items()
-        }
-
         unadjusted_heating_costs = {
             "heating": None,
             "hot_water": None,
@@ -738,18 +731,13 @@ class Property:
             "appliances": float(appliances_kwh) * AnnualBillSavings.ELECTRICITY_PRICE_CAP
         }
 
-        adjusted_heating_costs = {
-            k: AnnualBillSavings.adjust_energy_to_metered(
-                epc_energy=v,
-                current_epc_rating=self.data["current-energy-rating"],
-            ) for k, v in unadjusted_heating_costs.items() if v is not None
-        }
-
         # Sum up the adjusted kwh figures
         self.current_energy_consumption = sum(list(unadjusted_kwh_estimates.values()))
+        self.current_energy_consumption_heating_hotwater = (
+            unadjusted_kwh_estimates["heating"] + unadjusted_kwh_estimates["hot_water"]
+        )
 
         self.energy_cost_estimates = {
-            # "adjusted": adjusted_heating_costs,
             "unadjusted": unadjusted_heating_costs,
             "epc": {
                 "heating": float(self.data["heating-cost-current"]),
@@ -759,7 +747,6 @@ class Property:
         }
 
         self.energy_consumption_estimates = {
-            # "adjusted": adjusted_kwh_estimates,
             "unadjusted": unadjusted_kwh_estimates
         }
 
@@ -899,7 +886,8 @@ class Property:
             "energy_tariff": self.data["energy-tariff"],
             "primary_energy_consumption": self.energy["primary_energy_consumption"],
             "co2_emissions": self.energy["co2_emissions"],
-            # "adjusted_energy_consumption": self.current_adjusted_energy,
+            "current_energy_demand": self.current_energy_consumption,
+            "current_energy_demand_heating_hotwater": self.current_energy_consumption_heating_hotwater,
             "estimated": self.data.get("estimated", False),
         }
 
@@ -1097,16 +1085,6 @@ class Property:
 
         return component_data
 
-    def set_adjusted_energy(
-        self, expected_adjusted_energy, expected_energy_bill
-    ):
-        """
-        Stores these values for usage later
-        """
-
-        self.expected_adjusted_energy = expected_adjusted_energy
-        self.expected_energy_bill = expected_energy_bill
-
     def set_windows_count(self):
         """
         Using the estimate_windows function, this method will set the number of windows in the property
diff --git a/backend/app/db/models/portfolio.py b/backend/app/db/models/portfolio.py
index aa0146c0..5ac092a7 100644
--- a/backend/app/db/models/portfolio.py
+++ b/backend/app/db/models/portfolio.py
@@ -168,7 +168,8 @@ class PropertyDetailsEpcModel(Base):
     energy_tariff = Column(Text)
     primary_energy_consumption = Column(Float)
     co2_emissions = Column(Float)
-    adjusted_energy_consumption = Column(Float)
+    current_energy_demand = Column(Float)
+    current_energy_demand_heating_hotwater = Column(Float)
     estimated = Column(Boolean, default=False)
 
 

From 7e973d7955dd6efaef26d8aad031238c109234b4 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 14 Aug 2024 12:20:17 +0100
Subject: [PATCH 109/182] added patch to built for form newhaven

---
 backend/Property.py                      |  6 +++
 backend/app/plan/router.py               | 35 ++++++++------
 backend/app/plan/schemas.py              |  1 +
 backend/ml_models/AnnualBillSavings.py   |  4 ++
 etl/customers/newhaven/newhaven_study.py | 59 +++++++++++++++++++++---
 recommendations/Recommendations.py       |  3 ++
 6 files changed, 86 insertions(+), 22 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index c4b1b969..649a9547 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -93,6 +93,7 @@ class Property:
         self.data = {
             k.replace("_", "-"): v for k, v in epc_record.get("prepared_epc").items()
         }
+        
         self.old_data = epc_record.get("old_data")
         self.property_dimensions = None
         # This is a list of measures that have already been installed in the property, typically found as a result
@@ -1193,6 +1194,11 @@ class Property:
 
         if self.hotwater["heater_type"] is not None:
             self.hot_water_energy_source = heater_type_to_fuel[self.hotwater["heater_type"]]
+
+            if self.hotwater["extra_features"] == "plus solar":
+                self.hot_water_energy_source = self.heating_energy_source + " + Solar Thermal"
+                return
+
         else:
             fuel = system_type_modification[self.hotwater["system_type"]]
 
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index c26a5217..25e41e52 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -312,7 +312,17 @@ def get_on_site_data(body: PlanTriggerRequest):
     return patches, already_installed, non_invasive_recommendations
 
 
-def extract_propert_on_site_recommendations(config, already_installed, non_invasive_recommendations, uprn):
+def extract_property_on_site_recommendations(config, patches, already_installed, non_invasive_recommendations, uprn):
+    patch_has_uprn = "uprn" in patches[0]
+    if patch_has_uprn:
+        patch = next((
+            x for x in patches if str(x["uprn"]) == str(config["uprn"])
+        ), {})
+    else:
+        patch = next((
+            x for x in patches if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
+        ), {})
+
     property_already_installed = next((
         x for x in already_installed if
         (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
@@ -348,7 +358,7 @@ def extract_propert_on_site_recommendations(config, already_installed, non_invas
 
         property_non_invasive_recommendations["recommendations"] = str(transformed)
 
-    return property_already_installed, property_non_invasive_recommendations
+    return patch, property_already_installed, property_non_invasive_recommendations
 
 
 router = APIRouter(
@@ -423,9 +433,13 @@ async def trigger_plan(body: PlanTriggerRequest):
             epc_records, energy_assessment["energy_assessment_is_newer"] = create_epc_records(
                 epc_searcher, energy_assessment
             )
-            patch = next((
-                x for x in patches if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
-            ), {})
+
+            patch, property_already_installed, property_non_invasive_recommendations = (
+                extract_property_on_site_recommendations(
+                    config, patches, already_installed, non_invasive_recommendations, uprn
+                )
+            )
+
             epc_records = patch_epc(patch, epc_records)
 
             prepared_epc = EPCRecord(
@@ -434,10 +448,6 @@ async def trigger_plan(body: PlanTriggerRequest):
                 cleaning_data=cleaning_data
             )
 
-            property_already_installed, property_non_invasive_recommendations = extract_propert_on_site_recommendations(
-                config, already_installed, non_invasive_recommendations, uprn
-            )
-
             input_properties.append(
                 Property(
                     id=property_id,
@@ -509,6 +519,7 @@ async def trigger_plan(body: PlanTriggerRequest):
         # TODO: For simple properties, we should do a comparison/check between the solar API's roof area and the
         #       basic estimate of roof area
 
+        # TODO: Debug this
         building_ids = [
             {
                 "building_id": p.building_id,
@@ -797,12 +808,6 @@ async def trigger_plan(body: PlanTriggerRequest):
             ]
             recommendations[p.id] = final_recommendations
 
-            # With that complete, we now total the kwh and cost savings for the property
-            # total_kwh_savings = sum([rec["kwh_savings"] for rec in final_recommendations if rec["default"]])
-            # total_energy_cost_savings = sum(
-            #     [rec["energy_cost_savings"] for rec in final_recommendations if rec["default"]]
-            # )
-
         logger.info("Uploading recommendations to the database")
         # If we have any work to do, we create a new scenario
         engine_scenario = create_scenario(
diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py
index bbcd5a57..63ca7834 100644
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@@ -35,6 +35,7 @@ class PlanTriggerRequest(BaseModel):
         "air_source_heat_pump",
         "internal_wall_insulation",
         "external_wall_insulation",
+        "secondary_heating"
     }
 
     _allowed_goals = {"Increasing EPC"}
diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py
index f791599a..13c9e0a5 100644
--- a/backend/ml_models/AnnualBillSavings.py
+++ b/backend/ml_models/AnnualBillSavings.py
@@ -285,6 +285,10 @@ class AnnualBillSavings:
             # The solar thermal covers a % of the heating kwh, so we need to adjust the cost
             return (kwh / cop) * assumptions.SOLAR_CONSUMPTION_PROPORTION * cls.GAS_PRICE_CAP
 
+        if fuel == "Electricity + Solar Thermal":
+            # The solar thermal covers a % of the heating kwh, so we need to adjust the cost
+            return (kwh / cop) * assumptions.SOLAR_CONSUMPTION_PROPORTION * cls.ELECTRICITY_PRICE_CAP
+
         if fuel == "Oil":
             price_data = cls.FUEL_DATA[cls.FUEL_DATA["Fuel"] == "Kerosene"].squeeze()
             cost_per_kwh = cls.cost_per_kwh(
diff --git a/etl/customers/newhaven/newhaven_study.py b/etl/customers/newhaven/newhaven_study.py
index 9cda3d29..e87705b8 100644
--- a/etl/customers/newhaven/newhaven_study.py
+++ b/etl/customers/newhaven/newhaven_study.py
@@ -11,7 +11,7 @@ EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certif
 CUSTOMER_DATA_DIRECTORY = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/Data"
 
 USER_ID = 8
-PORTFOLIO_ID = 89
+PORTFOLIO_ID = 90
 
 
 def make_asset_list():
@@ -109,8 +109,8 @@ def make_asset_list():
     asset_list = asset_list[asset_list["Class Description"] != "Caravan"]
     asset_list = asset_list[~pd.isnull(asset_list["current-energy-efficiency"])]
 
-    # Take a 10% sample, for properties that have an EPC, with a seed
-    asset_list = asset_list.sample(frac=0.25, random_state=42)
+    # Take a sample, for properties that have an EPC, with a seed
+    # asset_list = asset_list.sample(frac=0.5, random_state=42)
 
     AVG_FLOOR_HEIGHT = asset_list["floor-height"].median()
 
@@ -195,6 +195,17 @@ def make_asset_list():
 
         property_non_invasive_recs = []
         if not property_ashp_potential.empty:
+
+            if property_costs.empty:
+                similar_properties = ashp_potential[
+                    ashp_potential["Overall Suitability Rating"] &
+                    (ashp_potential["Recommended Heat Pump Size [kW]"] ==
+                     property_ashp_potential["Recommended Heat Pump Size [kW]"].values[0])
+                    ].merge(
+                    renewables_cost, how="inner", on="UPRN"
+                )
+                property_costs = similar_properties[["Air Source Heat Pump - Total"]].mean().to_frame().T
+
             property_non_invasive_recs.append(
                 {
                     "type": "air_source_heat_pump",
@@ -256,6 +267,21 @@ def make_asset_list():
         file_name=non_invasive_recommendations_filename
     )
 
+    # We add a patch to one of the units because there's no data for the built form
+    # We would be able to handle this automatically in the future, when using OS API
+    patches = [{
+        "uprn": "10033266220",
+        "built-form": "Semi-Detached",
+    }]
+
+    # Store patches in s3
+    patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.json"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(patches),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=patches_filename
+    )
+
     # Create three scenarios
     body1 = {
         "portfolio_id": str(PORTFOLIO_ID),
@@ -264,7 +290,7 @@ def make_asset_list():
         "goal_value": "A",
         "trigger_file_path": filename,
         "already_installed_file_path": "",
-        "patches_file_path": "",
+        "patches_file_path": patches_filename,
         "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
         "scenario_name": "Demand Reduction - no solid wall, windows, LEDs",
         "multi_plan": True,
@@ -283,7 +309,7 @@ def make_asset_list():
         "goal_value": "A",
         "trigger_file_path": filename,
         "already_installed_file_path": "",
-        "patches_file_path": "",
+        "patches_file_path": patches_filename,
         "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
         "scenario_name": "Demand Reduction - no solid wall, floors or heating",
         "multi_plan": True,
@@ -294,6 +320,25 @@ def make_asset_list():
     }
     print(body2)
 
+    # 2.5 - full fabric, no decant
+    body2_5 = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increasing EPC",
+        "goal_value": "A",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": patches_filename,
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+        "scenario_name": "Demand Reduction - no solid wall, floors or heating",
+        "multi_plan": True,
+        "exclusions": [
+            "internal_wall_insulation", "floor_insulation", "heating", "solar_pv",
+        ],
+        "budget": None,
+    }
+    print(body2_5)
+
     # Scenario B
     body3 = {
         "portfolio_id": str(PORTFOLIO_ID),
@@ -302,7 +347,7 @@ def make_asset_list():
         "goal_value": "A",
         "trigger_file_path": filename,
         "already_installed_file_path": "",
-        "patches_file_path": "",
+        "patches_file_path": patches_filename,
         "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
         "scenario_name": "Demand Reduction, Heating Systems, Solar PV - no solid wall or floors",
         "multi_plan": True,
@@ -319,7 +364,7 @@ def make_asset_list():
         "goal_value": "A",
         "trigger_file_path": filename,
         "already_installed_file_path": "",
-        "patches_file_path": "",
+        "patches_file_path": patches_filename,
         "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
         "scenario_name": "Whole House",
         "multi_plan": True,
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index ef478426..33c8bee4 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -43,6 +43,9 @@ DESCRIPTIONS_TO_FUEL_TYPES = {
     "Electric storage heaters, Electric storage heaters": {"fuel": "Electricity", "cop": 1},
     "Boiler and radiators, electric": {"fuel": "Electricity", "cop": 0.9},
     "Gas boiler/circulator, no cylinder thermostat": {"fuel": "Natural Gas", "cop": 0.9},
+    "Boiler and radiators, dual fuel (mineral and wood)": {"fuel": "Wood Logs", "cop": 0.9},
+    "Electric immersion, standard tariff, plus solar": {"fuel": "Electricity + Solar Thermal", "cop": 1},
+    "From main system, flue gas heat recovery": {"fuel": "Natural Gas", "cop": 0.9},
 }
 STARTING_DUMMY_ID_VALUE = -9999
 

From 6e802b1f586614ff544b610389d831e85b51f05e Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 14 Aug 2024 21:57:50 +0100
Subject: [PATCH 110/182] working on slides, debugging recommendations

---
 backend/app/assumptions.py               |   2 +-
 etl/customers/newhaven/newhaven_study.py |  11 +-
 etl/customers/newhaven/slides.py         | 214 +++++++++++++++++++++++
 recommendations/FloorRecommendations.py  |  19 +-
 recommendations/Recommendations.py       |   9 +-
 5 files changed, 240 insertions(+), 15 deletions(-)
 create mode 100644 etl/customers/newhaven/slides.py

diff --git a/backend/app/assumptions.py b/backend/app/assumptions.py
index ffc186df..f0ddf868 100644
--- a/backend/app/assumptions.py
+++ b/backend/app/assumptions.py
@@ -1,6 +1,6 @@
 # Assumes that the average efficiency of an air source heat pump is 250%, taking the median of the 200-400% range,
 # which is often quoted as a sensible efficiency range for air source heat pumps.
-PESSIMISTIC_ASHPY_EFFICIENCY = 200
+PESSIMISTIC_ASHP_EFFICIENCY = 200
 AVERAGE_ASHP_EFFICIENCY = 300
 
 # Conservative estimate of the proportion of electricity that will be consumed, whereas the rest will
diff --git a/etl/customers/newhaven/newhaven_study.py b/etl/customers/newhaven/newhaven_study.py
index e87705b8..e6871678 100644
--- a/etl/customers/newhaven/newhaven_study.py
+++ b/etl/customers/newhaven/newhaven_study.py
@@ -269,10 +269,13 @@ def make_asset_list():
 
     # We add a patch to one of the units because there's no data for the built form
     # We would be able to handle this automatically in the future, when using OS API
-    patches = [{
-        "uprn": "10033266220",
-        "built-form": "Semi-Detached",
-    }]
+    patches = [
+        {
+            "uprn": "10033266220",
+            "built-form": "Semi-Detached",
+        },
+        {'uprn': '10033266219', 'built-form': 'Semi-Detached'}
+    ]
 
     # Store patches in s3
     patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.json"
diff --git a/etl/customers/newhaven/slides.py b/etl/customers/newhaven/slides.py
new file mode 100644
index 00000000..3fe27452
--- /dev/null
+++ b/etl/customers/newhaven/slides.py
@@ -0,0 +1,214 @@
+import pandas as pd
+from sqlalchemy.orm import sessionmaker
+from backend.app.db.connection import db_engine
+from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, Scenario
+from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
+
+
+def get_data(portfolio_id, scenario_ids):
+    session = sessionmaker(bind=db_engine)()
+    session.begin()
+
+    # Get properties and their details for a specific portfolio
+    properties_query = session.query(
+        PropertyModel,
+        PropertyDetailsEpcModel
+    ).join(
+        PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id
+    ).filter(
+        PropertyModel.portfolio_id == portfolio_id  # Filter by portfolio ID
+    ).all()
+
+    # Transform properties data to include all fields dynamically
+    properties_data = [
+        {**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns},
+         **{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in
+            PropertyDetailsEpcModel.__table__.columns}}
+        for prop in properties_query
+    ]
+
+    # Get property IDs from fetched properties
+
+    # Get plans linked to the fetched properties
+    plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all()
+
+    # Transform plans data to include all fields dynamically
+    plans_data = [
+        {col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
+        for plan in plans_query
+    ]
+
+    # Extract plan IDs for filtering recommendations through PlanRecommendations
+    plan_ids = [plan['id'] for plan in plans_data]
+
+    # Get recommendations through PlanRecommendations for those plans and that are default
+    recommendations_query = session.query(
+        Recommendation,
+        Plan.scenario_id
+    ).join(
+        PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id
+    ).join(
+        Plan, Plan.id == PlanRecommendations.plan_id  # Join with Plan to access scenario_id
+    ).filter(
+        PlanRecommendations.plan_id.in_(plan_ids),
+        Recommendation.default == True  # Filtering for default recommendations
+    ).all()
+
+    # Transform recommendations data to include all fields dynamically and include scenario_id
+    recommendations_data = [
+        {**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation') else getattr(rec,
+                                                                                                           col.name) for
+            col in Recommendation.__table__.columns},
+         "Scenario ID": rec.scenario_id}
+        for rec in recommendations_query
+    ]
+
+    session.close()
+
+    return properties_data, plans_data, recommendations_data
+
+
+def slides():
+    # Prepares the information required for the slides
+
+    # Right now this is the second version of the nehaven portfolio
+    portfolio_id = 90
+    # Look at one scenario at a time, otherwise this is agony
+    scenario_ids = [47, 48, 49]
+
+    properties_data, plans_data, recommendations_data = get_data(portfolio_id, scenario_ids)
+
+    properties_df = pd.DataFrame(properties_data)
+    plans_df = pd.DataFrame(plans_data)
+    recommendations_df = pd.DataFrame(recommendations_data)
+
+    if properties_df.shape[0] != 2553:
+        raise ValueError("The number of unique properties is not 2553")
+
+    def estimate_post_retrofit_heating_hotwater_kwh(recommendations_df, scenario_ids):
+        # Get the recommendations for the scenario, default
+        scenario_comparison_df = []
+        scenario_comparison_df_2 = []
+        for scenario_id in scenario_ids:
+            # Get the recommendations for the scenario, default
+            scenario_recommendations = recommendations_df[
+                (recommendations_df["Scenario ID"] == scenario_id) &
+                (recommendations_df["default"] == True)
+                ].copy()
+
+            scenario_recommendations['ligting_kwh'] = scenario_recommendations.apply(
+                lambda x: x['kwh_savings'] if x['type'] == 'low_energy_lighting' else 0,
+                axis=1)
+            scenario_recommendations['solar_kwh'] = scenario_recommendations.apply(
+                lambda x: x['kwh_savings'] if x['type'] == 'solar_pv' else 0, axis=1)
+
+            if scenario_recommendations['solar_kwh'].sum() > 0:
+                blah
+
+            # Set 'Estimated Kwh Savings' to zero where specific kwh columns are used
+            scenario_recommendations['Estimated Kwh Savings'] = scenario_recommendations.apply(
+                lambda x: 0 if x['type'] in ['low_energy_lighting', 'solar_pv'] else x[
+                    'kwh_savings'], axis=1)
+
+            grouped_data = scenario_recommendations.groupby(['property_id']).agg({
+                'Estimated Kwh Savings': 'sum',
+                'ligting_kwh': 'sum',
+                'solar_kwh': 'sum'
+            }).reset_index()
+
+            comparison = properties_df.drop_duplicates().merge(
+                grouped_data, on=["property_id"], how="left"
+            )
+
+            comparison["Post Retrofit Heating & Hotwater kwh"] = (
+                comparison["current_energy_demand_heating_hotwater"] - \
+                comparison["Estimated Kwh Savings"]
+            )
+
+            avgs = comparison[['current_energy_demand_heating_hotwater', 'Post Retrofit Heating & Hotwater kwh']].mean()
+
+            # We now, for properties that have a plan, do a before and after
+            with_savings = comparison[~pd.isnull(comparison["Estimated Kwh Savings"])]
+
+            avgs2 = with_savings[
+                ['current_energy_demand_heating_hotwater', 'Post Retrofit Heating & Hotwater kwh']].mean()
+            avgs2["difference"] = avgs2["current_energy_demand_heating_hotwater"] - avgs2[
+                "Post Retrofit Heating & Hotwater kwh"]
+            avgs2["percentage_reduction"] = 100 * avgs2["difference"] / avgs2["current_energy_demand_heating_hotwater"]
+
+            scenario_comparison_df.append({"scenario_id": scenario_id, **avgs})
+            scenario_comparison_df_2.append({"scenario_id": scenario_id, **avgs2})
+
+        scenario_comparison_df = pd.DataFrame(scenario_comparison_df)
+        scenario_comparison_df_2 = pd.DataFrame(scenario_comparison_df_2)
+
+        return scenario_comparison_df, scenario_comparison_df_2
+
+        # TODO: How do we factor in solar PV
+
+    # Q1: What is the baseline heating and energy demand for the properties in the portfolio - baseline?
+    heating_hotwater_kwh = (
+        properties_df[['current_energy_demand', 'current_energy_demand_heating_hotwater']]
+        .mean()
+    )
+
+    # Q2: For each scenario, what is the £ per kwh reduction?
+    # Calculate total kwh savings
+    kwh_plan_impact = estimate_post_retrofit_heating_hotwater_kwh(properties_df, recommendations_df)
+
+    z = df[
+        (df["Recommendation Default Status"] == True) &
+        (df["Plan Name"].isin(['Demand Reduction – cavity & roof insulation']))
+        ]
+    z2 = z[z["Property ID"] == 25215]
+    # Find duplicated property ID, recommendationt type combos
+    z = z[z.duplicated(subset=["Property ID", "Recommendation Type"])]
+
+    for plan_name in df["Plan Name"].unique():
+        # Get default recs
+        default_recs = df[
+            (df["Recommendation Default Status"] == True) &
+            (df["Plan Name"] == plan_name)
+            ].copy()
+        if default_recs["Recommendation ID"].duplicated().sum():
+            raise Exception("somethign went wrong")
+
+        default_recs["Recommendation Type"].unique()
+
+        # We now calculate the total savings
+        total_savings = default_recs["Estimated Kwh Savings"].sum()
+        total_cost = default_recs["Recommendation Cost"].sum()
+
+    kwh_savings = df[
+        df["Recommendation Default Status"] == True
+        ].groupby("Plan Name")[["Estimated Kwh Savings", "Recommendation Cost"]].sum().rename(
+        columns={"Estimated Kwh Savings": "Total Kwh Savings", "Recommendation Cost": "Total Cost"}
+    ).reset_index()
+
+    kwh_savings["Cost per Kwh Saved"] = kwh_savings["Total Cost"] / kwh_savings["Total Kwh Savings"]
+
+    # Q3: For each scenario, we want to answer what the heating and hot water kwh looks like after retrofit
+    # We need to take recommndations that affect just the heating and hot water
+
+    # By property
+
+    df["Type Mapped"] = df["Recommendation Type"].copy().replace(
+        {
+            "loft_insulation": "roof_insulation",
+            "room_roof_insulation": "roof_insulation",
+            "flat_roof_insulation": "roof_insulation",
+            "hot_water_tank_insulation": "other",
+            "cylinder_thermostat": "other",
+            "sealing_open_fireplace": "other",
+        }
+    )
+
+    # Group by 'Plan Name' and 'Recommendation Type' and count unique 'Property ID'
+    recommendation_summary = df.groupby(['Plan Name', 'Type Mapped']).agg({
+        'Property ID': 'nunique'
+    }).reset_index()
+
+    recommendation_summary.columns = ['Plan Name', 'Type Mapped', 'Number of Properties']
+    recommendation_summary["Percentage of Properties"] = 100 * (
+        recommendation_summary["Number of Properties"] / df["Property ID"].nunique()
+    )
diff --git a/recommendations/FloorRecommendations.py b/recommendations/FloorRecommendations.py
index 74be7d41..c63d45c2 100644
--- a/recommendations/FloorRecommendations.py
+++ b/recommendations/FloorRecommendations.py
@@ -74,7 +74,6 @@ class FloorRecommendations(Definitions):
         u_value = self.property.floor["thermal_transmittance"]
         property_type = self.property.data["property-type"]
         floor_area = self.property.insulation_floor_area
-        year_built = self.property.year_built
 
         if self.property.floor["another_property_below"] | (self.property.floor["insulation_thickness"] in [
             "average", "above average"
@@ -95,14 +94,16 @@ class FloorRecommendations(Definitions):
 
         if u_value:
 
-            # By being built more recently than this, it means that the property was likely build with soild
-            # concrete floors with insulation already
-            if year_built < self.PART_L_YEAR_CUTOFF:
-                raise NotImplementedError("Not investigated this use case")
-
-            if u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
-                # The floor is already compliant
-                return
+            # In this case where we have the u-value of a floor, we likely don't have any other information about it
+            # so there is no recommendation that we can practically make
+            if (
+                self.property.floor["is_suspended"] or
+                self.property.floor["is_to_unheated_space"] or
+                self.property.floor["is_to_external_air"] or
+                self.property.floor["is_solid"]
+            ):
+                raise ValueError("This should not be possible")
+            return
 
         if u_value is None:
             u_value = get_floor_u_value(
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 33c8bee4..fef7472c 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -20,7 +20,9 @@ import backend.app.assumptions as assumptions
 
 ASHP_COP = 3
 DESCRIPTIONS_TO_FUEL_TYPES = {
-    "Air source heat pump, radiators, electric": {"fuel": "Electricity", "cop": ASHP_COP},
+    "Air source heat pump, radiators, electric": {
+        "fuel": "Electricity", "cop": assumptions.AVERAGE_ASHP_EFFICIENCY / 100
+    },
     "Boiler and radiators, mains gas": {"fuel": 'Natural Gas', "cop": 0.9},
     'Electric storage heaters': {"fuel": 'Electricity', "cop": 1},
     "Electric immersion, off-peak": {"fuel": 'Electricity', "cop": 1},
@@ -46,6 +48,11 @@ DESCRIPTIONS_TO_FUEL_TYPES = {
     "Boiler and radiators, dual fuel (mineral and wood)": {"fuel": "Wood Logs", "cop": 0.9},
     "Electric immersion, standard tariff, plus solar": {"fuel": "Electricity + Solar Thermal", "cop": 1},
     "From main system, flue gas heat recovery": {"fuel": "Natural Gas", "cop": 0.9},
+    "Electric underfloor heating": {"fuel": "Electricity", "cop": 1},
+    "No system present: electric immersion assumed": {"fuel": "Electricity", "cop": 1},
+    "Air source heat pump, underfloor, electric": {
+        "fuel": "Electricity", "cop": assumptions.AVERAGE_ASHP_EFFICIENCY / 100
+    },
 }
 STARTING_DUMMY_ID_VALUE = -9999
 

From cee16b81664ad7d5c18f939f93e2d6f01871e4a0 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 16 Aug 2024 09:50:37 +0100
Subject: [PATCH 111/182] closing up newhaven study

---
 etl/customers/newhaven/newhaven_study.py |  22 +-
 etl/customers/newhaven/slides.py         | 405 +++++++++++++++++------
 recommendations/WallRecommendations.py   |   2 +
 3 files changed, 318 insertions(+), 111 deletions(-)

diff --git a/etl/customers/newhaven/newhaven_study.py b/etl/customers/newhaven/newhaven_study.py
index e6871678..67471813 100644
--- a/etl/customers/newhaven/newhaven_study.py
+++ b/etl/customers/newhaven/newhaven_study.py
@@ -54,6 +54,8 @@ def make_asset_list():
     )
     ashp_potential["UPRN"] = ashp_potential["UPRN"].astype(int).astype(str)
 
+    ashp_potential[ashp_potential["UPRN"] == "100060067063"].squeeze()
+
     insulation_potential = pd.read_csv(
         f"{CUSTOMER_DATA_DIRECTORY}/Insulation Potential/Insulation Potential.csv",
         low_memory=False,
@@ -88,20 +90,20 @@ def make_asset_list():
         columns={"Wall Area [m^2]": "insulation_wall_area", "Building Area [m^2]": "floor_area"}
     )
 
-    # had_an_epc = asset_list[~pd.isnull(asset_list["current-energy-efficiency"])]
-    # below_b = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 80].shape
-    # below_c = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 69].shape
-    # had_an_epc["energy-efficiency-rating"].value_counts()
-    # asset_list["current-energy-rating"].value_counts()
-    # asset_list["co2-emissions-current"].mean()
+    had_an_epc = asset_list[~pd.isnull(asset_list["current-energy-efficiency"])]
+    below_b = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 80].shape
+    below_c = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 69].shape
+    had_an_epc["energy-efficiency-rating"].value_counts()
+    asset_list["current-energy-rating"].value_counts()
+    asset_list["co2-emissions-current"].mean()
     # # Get the underlying data of a histograme
-    # import matplotlib.pyplot as plt
-    # n, bins, patches = plt.hist(asset_list["co2-emissions-current"], bins=100, color="blue", alpha=0.7)
+    import matplotlib.pyplot as plt
+    n, bins, patches = plt.hist(asset_list["co2-emissions-current"], bins=100, color="blue", alpha=0.7)
     #
-    # bins = np.arange(0, asset_list["co2-emissions-current"].max(), 1)  # Bins from 50 to 150 with a step of 10
+    bins = np.arange(0, asset_list["co2-emissions-current"].max(), 1)  # Bins from 50 to 150 with a step of 10
     #
     # # Step 3: Calculate the frequency of data in each bin
-    # hist, bin_edges = np.histogram(asset_list["co2-emissions-current"], bins=bins)
+    hist, bin_edges = np.histogram(asset_list["co2-emissions-current"], bins=bins)
 
     # Take properties below a B - there are 2844 units
     asset_list = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 80]
diff --git a/etl/customers/newhaven/slides.py b/etl/customers/newhaven/slides.py
index 3fe27452..2fe914e2 100644
--- a/etl/customers/newhaven/slides.py
+++ b/etl/customers/newhaven/slides.py
@@ -1,4 +1,6 @@
+from tqdm import tqdm
 import pandas as pd
+import numpy as np
 from sqlalchemy.orm import sessionmaker
 from backend.app.db.connection import db_engine
 from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, Scenario
@@ -68,13 +70,101 @@ def get_data(portfolio_id, scenario_ids):
     return properties_data, plans_data, recommendations_data
 
 
+def estimate_post_retrofit_heating_hotwater_kwh(properties_df, recommendations_df, scenario_ids):
+    # properties_starting_with_electric_heating = properties_df[
+    #     properties_df["mainfuel"].isin(
+    #         ["Electricity not community", "Electricity electricity unspecified tariff"]
+    #     )
+    # ]["id"].tolist()
+
+    # Get the recommendations for the scenario, default
+    scenario_comparison_df = []
+    scenario_comparison_df_2 = []
+    cost_per_kwh_saved_table = []
+    for scenario_id in scenario_ids:
+        # Get the recommendations for the scenario, default
+        scenario_recommendations = recommendations_df[
+            (recommendations_df["Scenario ID"] == scenario_id) &
+            (recommendations_df["default"] == True)
+            ].copy()
+
+        scenario_recommendations['ligting_kwh'] = scenario_recommendations.apply(
+            lambda x: x['kwh_savings'] if x['type'] == 'low_energy_lighting' else 0,
+            axis=1)
+        scenario_recommendations['solar_kwh'] = scenario_recommendations.apply(
+            lambda x: x['kwh_savings'] if x['type'] == 'solar_pv' else 0, axis=1)
+
+        # Set 'Estimated Kwh Savings' to zero where specific kwh columns are used
+        scenario_recommendations['Estimated Kwh Savings'] = scenario_recommendations.apply(
+            lambda x: 0 if x['type'] in ['low_energy_lighting', 'solar_pv'] else x[
+                'kwh_savings'], axis=1)
+
+        # We need to determine if any of the properties start with electric heating or end with it
+        # property_electric_heating = []
+        # for pid, recs in scenario_recommendations.groupby("property_id"):
+        #     has_ashp = recs[recs["description"].str.contains("air source heat pump")]
+        #     if not has_ashp.empty:
+        #         property_electric_heating.append(pid)
+        #         continue
+        #     has_heating_rec = recs[recs["description"].str.contains("high heat retention electric")]
+        #     if not has_heating_rec.empty:
+        #         property_electric_heating.append(pid)
+        #         continue
+
+        grouped_data = scenario_recommendations.groupby(['property_id']).agg({
+            'Estimated Kwh Savings': 'sum',
+            'ligting_kwh': 'sum',
+            'solar_kwh': 'sum',
+            "estimated_cost": "sum"
+        }).reset_index()
+
+        comparison = properties_df.drop_duplicates().merge(
+            grouped_data, on=["property_id"], how="left"
+        )
+
+        comparison["Post Retrofit Heating & Hotwater kwh"] = (
+            comparison["current_energy_demand_heating_hotwater"] - \
+            comparison["Estimated Kwh Savings"]
+        )
+
+        avgs = comparison[['current_energy_demand_heating_hotwater', 'Post Retrofit Heating & Hotwater kwh']].mean()
+
+        # We now, for properties that have a plan, do a before and after
+        with_savings = comparison[~pd.isnull(comparison["Estimated Kwh Savings"])]
+
+        avgs2 = with_savings[
+            ['current_energy_demand_heating_hotwater', 'Post Retrofit Heating & Hotwater kwh']].mean()
+        avgs2["difference"] = avgs2["current_energy_demand_heating_hotwater"] - avgs2[
+            "Post Retrofit Heating & Hotwater kwh"]
+        avgs2["percentage_reduction"] = 100 * avgs2["difference"] / avgs2["current_energy_demand_heating_hotwater"]
+
+        # We also calculate the cost per kwh saves
+        total_kwh_saved = (
+            with_savings["Estimated Kwh Savings"].sum() +
+            with_savings["ligting_kwh"].sum() +
+            with_savings["solar_kwh"].sum()
+        )
+        total_cost = with_savings["estimated_cost"].sum()
+        cost_per_kwh_saved = total_cost / total_kwh_saved
+
+        scenario_comparison_df.append({"scenario_id": scenario_id, **avgs})
+        scenario_comparison_df_2.append({"scenario_id": scenario_id, **avgs2})
+        cost_per_kwh_saved_table.append({"scenario_id": scenario_id, "cost_per_kwh_saved": cost_per_kwh_saved})
+
+    scenario_comparison_population = pd.DataFrame(scenario_comparison_df)
+    scenario_comparison_retrofitted_units = pd.DataFrame(scenario_comparison_df_2)
+    cost_per_kwh_saved_table = pd.DataFrame(cost_per_kwh_saved_table)
+
+    return scenario_comparison_population, scenario_comparison_retrofitted_units, cost_per_kwh_saved_table
+
+
 def slides():
     # Prepares the information required for the slides
 
     # Right now this is the second version of the nehaven portfolio
     portfolio_id = 90
     # Look at one scenario at a time, otherwise this is agony
-    scenario_ids = [47, 48, 49]
+    scenario_ids = [47, 48, 49, 50, 51]
 
     properties_data, plans_data, recommendations_data = get_data(portfolio_id, scenario_ids)
 
@@ -85,114 +175,25 @@ def slides():
     if properties_df.shape[0] != 2553:
         raise ValueError("The number of unique properties is not 2553")
 
-    def estimate_post_retrofit_heating_hotwater_kwh(recommendations_df, scenario_ids):
-        # Get the recommendations for the scenario, default
-        scenario_comparison_df = []
-        scenario_comparison_df_2 = []
-        for scenario_id in scenario_ids:
-            # Get the recommendations for the scenario, default
-            scenario_recommendations = recommendations_df[
-                (recommendations_df["Scenario ID"] == scenario_id) &
-                (recommendations_df["default"] == True)
-                ].copy()
-
-            scenario_recommendations['ligting_kwh'] = scenario_recommendations.apply(
-                lambda x: x['kwh_savings'] if x['type'] == 'low_energy_lighting' else 0,
-                axis=1)
-            scenario_recommendations['solar_kwh'] = scenario_recommendations.apply(
-                lambda x: x['kwh_savings'] if x['type'] == 'solar_pv' else 0, axis=1)
-
-            if scenario_recommendations['solar_kwh'].sum() > 0:
-                blah
-
-            # Set 'Estimated Kwh Savings' to zero where specific kwh columns are used
-            scenario_recommendations['Estimated Kwh Savings'] = scenario_recommendations.apply(
-                lambda x: 0 if x['type'] in ['low_energy_lighting', 'solar_pv'] else x[
-                    'kwh_savings'], axis=1)
-
-            grouped_data = scenario_recommendations.groupby(['property_id']).agg({
-                'Estimated Kwh Savings': 'sum',
-                'ligting_kwh': 'sum',
-                'solar_kwh': 'sum'
-            }).reset_index()
-
-            comparison = properties_df.drop_duplicates().merge(
-                grouped_data, on=["property_id"], how="left"
-            )
-
-            comparison["Post Retrofit Heating & Hotwater kwh"] = (
-                comparison["current_energy_demand_heating_hotwater"] - \
-                comparison["Estimated Kwh Savings"]
-            )
-
-            avgs = comparison[['current_energy_demand_heating_hotwater', 'Post Retrofit Heating & Hotwater kwh']].mean()
-
-            # We now, for properties that have a plan, do a before and after
-            with_savings = comparison[~pd.isnull(comparison["Estimated Kwh Savings"])]
-
-            avgs2 = with_savings[
-                ['current_energy_demand_heating_hotwater', 'Post Retrofit Heating & Hotwater kwh']].mean()
-            avgs2["difference"] = avgs2["current_energy_demand_heating_hotwater"] - avgs2[
-                "Post Retrofit Heating & Hotwater kwh"]
-            avgs2["percentage_reduction"] = 100 * avgs2["difference"] / avgs2["current_energy_demand_heating_hotwater"]
-
-            scenario_comparison_df.append({"scenario_id": scenario_id, **avgs})
-            scenario_comparison_df_2.append({"scenario_id": scenario_id, **avgs2})
-
-        scenario_comparison_df = pd.DataFrame(scenario_comparison_df)
-        scenario_comparison_df_2 = pd.DataFrame(scenario_comparison_df_2)
-
-        return scenario_comparison_df, scenario_comparison_df_2
-
-        # TODO: How do we factor in solar PV
-
     # Q1: What is the baseline heating and energy demand for the properties in the portfolio - baseline?
     heating_hotwater_kwh = (
         properties_df[['current_energy_demand', 'current_energy_demand_heating_hotwater']]
         .mean()
     )
 
-    # Q2: For each scenario, what is the £ per kwh reduction?
-    # Calculate total kwh savings
-    kwh_plan_impact = estimate_post_retrofit_heating_hotwater_kwh(properties_df, recommendations_df)
-
-    z = df[
-        (df["Recommendation Default Status"] == True) &
-        (df["Plan Name"].isin(['Demand Reduction – cavity & roof insulation']))
-        ]
-    z2 = z[z["Property ID"] == 25215]
-    # Find duplicated property ID, recommendationt type combos
-    z = z[z.duplicated(subset=["Property ID", "Recommendation Type"])]
-
-    for plan_name in df["Plan Name"].unique():
-        # Get default recs
-        default_recs = df[
-            (df["Recommendation Default Status"] == True) &
-            (df["Plan Name"] == plan_name)
-            ].copy()
-        if default_recs["Recommendation ID"].duplicated().sum():
-            raise Exception("somethign went wrong")
-
-        default_recs["Recommendation Type"].unique()
-
-        # We now calculate the total savings
-        total_savings = default_recs["Estimated Kwh Savings"].sum()
-        total_cost = default_recs["Recommendation Cost"].sum()
-
-    kwh_savings = df[
-        df["Recommendation Default Status"] == True
-        ].groupby("Plan Name")[["Estimated Kwh Savings", "Recommendation Cost"]].sum().rename(
-        columns={"Estimated Kwh Savings": "Total Kwh Savings", "Recommendation Cost": "Total Cost"}
-    ).reset_index()
-
-    kwh_savings["Cost per Kwh Saved"] = kwh_savings["Total Cost"] / kwh_savings["Total Kwh Savings"]
+    # Q2: For each scenario, what is for what is the heating and hot water kwh after retrofit, on the entire
+    # popoulation (incl those without retrofit) and for just those being retrofit
+    # We also calculat the cost per kwh saved
+    scenario_comparison_population, scenario_comparison_retrofitted_units, cost_per_kwh_saved_table = (
+        estimate_post_retrofit_heating_hotwater_kwh(properties_df, recommendations_df, scenario_ids)
+    )
 
     # Q3: For each scenario, we want to answer what the heating and hot water kwh looks like after retrofit
     # We need to take recommndations that affect just the heating and hot water
 
     # By property
 
-    df["Type Mapped"] = df["Recommendation Type"].copy().replace(
+    recommendations_df["type_mapped"] = recommendations_df["type"].copy().replace(
         {
             "loft_insulation": "roof_insulation",
             "room_roof_insulation": "roof_insulation",
@@ -200,15 +201,217 @@ def slides():
             "hot_water_tank_insulation": "other",
             "cylinder_thermostat": "other",
             "sealing_open_fireplace": "other",
+            "suspended_floor_insulation": "floor_insulation",
+            "solid_floor_insulation": "floor_insulation",
         }
     )
 
+    recommendations_df["type_mapped"] = np.where(
+        recommendations_df["description"].str.contains("air source heat pump"),
+        "air_source_heat_pump",
+        recommendations_df["type_mapped"]
+    )
+
     # Group by 'Plan Name' and 'Recommendation Type' and count unique 'Property ID'
-    recommendation_summary = df.groupby(['Plan Name', 'Type Mapped']).agg({
-        'Property ID': 'nunique'
+    recommendation_summary = recommendations_df[recommendations_df["default"] == True].groupby(
+        ['Scenario ID', 'type_mapped']
+    ).agg({
+        'property_id': 'nunique'
     }).reset_index()
 
-    recommendation_summary.columns = ['Plan Name', 'Type Mapped', 'Number of Properties']
+    recommendation_summary.columns = ['Scenario ID', 'Type Mapped', 'Number of Properties']
     recommendation_summary["Percentage of Properties"] = 100 * (
-        recommendation_summary["Number of Properties"] / df["Property ID"].nunique()
+        recommendation_summary["Number of Properties"] / properties_df["id"].nunique()
     )
+
+    recommendation_summary_final_scenario = recommendation_summary[recommendation_summary["Scenario ID"].isin([51])]
+
+    # MVP implementation of funding estimation for the most basic scenario, using GBIS
+
+    project_scores_matrix = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv")
+
+    def find_abs(sap_movement, starting_sap, floor_area):
+        starting_band = find_band(starting_sap)
+        finishing_band = find_band(starting_sap + sap_movement)
+        if starting_band == finishing_band:
+            return 0
+
+        if floor_area <= 72:
+            floor_area_segment = '0-72'
+        elif (floor_area > 72) and (floor_area <= 97):
+            floor_area_segment = "73-97"
+        elif (floor_area > 97) and (floor_area <= 199):
+            floor_area_segment = "98-199"
+        else:
+            floor_area_segment = "200+"
+
+        return project_scores_matrix[
+            (project_scores_matrix["Floor Area Segment"] == floor_area_segment) &
+            (project_scores_matrix["Starting Band"] == starting_band) &
+            (project_scores_matrix["Finishing Band"] == finishing_band)
+            ].squeeze()["Cost Savings"]
+
+    eco4_scores_sap_table = [
+        {'Band': 'High_A', 'From': 96.0, 'Up to': 100.0, 'Mid-point': 98.0},
+        {'Band': 'Low_A', 'From': 92.0, 'Up to': 96.0, 'Mid-point': 94.0},
+        {'Band': 'High_B', 'From': 86.0, 'Up to': 91.0, 'Mid-point': 88.5},
+        {'Band': 'Low_B', 'From': 81.0, 'Up to': 86.0, 'Mid-point': 83.5},
+        {'Band': 'High_C', 'From': 74.5, 'Up to': 80.0, 'Mid-point': 77.25},
+        {'Band': 'Low_C', 'From': 69.0, 'Up to': 74.5, 'Mid-point': 71.75},
+        {'Band': 'High_D', 'From': 61.5, 'Up to': 68.0, 'Mid-point': 64.75},
+        {'Band': 'Low_D', 'From': 55.0, 'Up to': 61.5, 'Mid-point': 58.25},
+        {'Band': 'High_E', 'From': 46.5, 'Up to': 54.0, 'Mid-point': 50.25},
+        {'Band': 'Low_E', 'From': 39.0, 'Up to': 46.5, 'Mid-point': 42.75},
+        {'Band': 'High_F', 'From': 29.5, 'Up to': 38.0, 'Mid-point': 33.75},
+        {'Band': 'Low_F', 'From': 21.0, 'Up to': 29.5, 'Mid-point': 25.25},
+        {'Band': 'High_G', 'From': 10.5, 'Up to': 20.0, 'Mid-point': 15.25},
+        {'Band': 'Low_G', 'From': 1.0, 'Up to': 10.5, 'Mid-point': 5.75}
+    ]
+    eco4_scores_sap_table = pd.DataFrame(eco4_scores_sap_table)
+
+    def find_band(value):
+        # Iterate through each row in the DataFrame to find the correct band
+        value_floored = np.floor(value)
+        return eco4_scores_sap_table[
+            (eco4_scores_sap_table["From"] <= value_floored) & (eco4_scores_sap_table["Up to"] >= value_floored)
+            ].squeeze()["Band"]
+
+    def identify_funding_measure(p, p_recs, is_social):
+        measures = ["cavity_wall_insulation", "loft_insulation"]
+        property_abs = []
+        for m in measures:
+            funding_measure = p_recs[p_recs["type"] == m]
+            if not funding_measure.empty:
+                funding_measure = funding_measure.squeeze()
+                project_abs = find_abs(
+                    sap_movement=funding_measure["sap_points"],
+                    starting_sap=p["current_sap_points"],
+                    floor_area=p["total_floor_area"]
+                )
+                property_abs.append({
+                    "property_id": p["property_id"],
+                    "measure": funding_measure["type"],
+                    "cost": funding_measure["estimated_cost"],
+                    "abs": project_abs,
+                    "is_social": is_social
+                })
+
+        if not property_abs:
+            return None
+
+        property_abs = pd.DataFrame(property_abs).sort_values("cost", ascending=False)
+        property_abs = property_abs.head(1).to_dict(orient="records")[0]
+        return property_abs
+
+    social_tenure = ["rental (social)", "Rented (social)"]
+    scenario_recs = recommendations_df[recommendations_df["Scenario ID"].isin([47])]
+
+    funding = []
+    for _, p in tqdm(properties_df.iterrows(), total=len(properties_df)):
+        p_recs = scenario_recs[scenario_recs["property_id"] == p["property_id"]]
+        if p_recs.empty:
+            continue
+
+        if (p["tenure"] in social_tenure) and (p["current_sap_points"] < 69):
+            f = identify_funding_measure(p, p_recs, True)
+            if f:
+                funding.append(f)
+                continue
+
+        if p["current_sap_points"] < 69:
+            f = identify_funding_measure(p, p_recs, False)
+            if f:
+                funding.append(f)
+                continue
+
+    funding = pd.DataFrame(funding)
+    conservative_abs = 20
+    funding["expected_funding"] = funding["abs"] * conservative_abs
+    # We take rows where the expected funding is higher than the cost of the works + 10%
+    funding = funding[funding["expected_funding"] >= (funding["cost"] * 1.15)]
+
+    # From the owner of the properties, the funding that they see is just the cost of the works. The actual funding
+    # recieved will go to the installer
+    # We now look at the social funding
+    social_funding = funding[funding["is_social"]]["cost"].sum()
+    # For the private funding, we need to scale this to consider the fact that only a proportion of the properties
+    # will qualify due to needing the property to fall into council tax bands A - D, and that only some of the tenants
+    # will meet the benefits criteria
+    private_funding = funding[~funding["is_social"]]["cost"].sum()
+
+    # 51% of households are recipients of benefits in the South East, in the UK
+    # (2021/2022 - https://www.statista.com/statistics/382858/uk-state-benefits-by-region/)
+
+    # We also need to deduce the % of properties in council tax bands A - D
+    # 2023 council tax bands:
+    # https://www.gov.uk/government/statistics/council-tax-stock-of-properties-2023/council-tax-stock-of-properties
+    # -statistical-commentary
+    band_a_proportion = 0.239
+    band_b_proportion = 0.195
+    band_c_proportion = 0.219
+    band_d_proportion = 0.156
+    a_to_d_proportion = band_a_proportion + band_b_proportion + band_c_proportion + band_d_proportion
+
+    benefits_proportion = 0.51
+
+    # Note: It's probable that an occupant of a property in council tax bands A-D is more likely to be on benefits,
+    # however we retain the regional average to be conservative
+    # We scale the private funding based on these two factors
+    private_funding_scaled = private_funding * benefits_proportion * a_to_d_proportion
+
+    n_private_projects = np.round((~funding["is_social"]).sum() * benefits_proportion * a_to_d_proportion)
+
+    # Look at the impact of EWI for scenario
+
+    ewi_jobs = recommendations_df[
+        (recommendations_df["Scenario ID"] == 49) & (recommendations_df["type"] == "external_wall_insulation")
+        ]
+    ewi_jobs["estimated_cost"].sum()
+
+    has_cavity = recommendations_df[
+        (recommendations_df["type"] == "cavity_wall_insulation") & (recommendations_df["Scenario ID"] == 47)
+        ]
+    # Take the some properties in this
+    cavity_units = properties_df[properties_df["property_id"].isin(has_cavity["property_id"].values)]
+
+    cavity_units[cavity_units.index == 3][["uprn", "property_id"]]
+
+    z = recommendations_df[recommendations_df["property_id"] == 24525]
+
+    # Recommenation type by kwh savings per unit
+    recommendations_final_scenario = recommendations_df[
+        recommendations_df["Scenario ID"].isin([51]) &
+        (recommendations_df["default"] == True)
+        ].copy()
+    # Merge on floor area
+    recommendations_final_scenario = recommendations_final_scenario.merge(
+        properties_df[["property_id", "total_floor_area"]], on="property_id", how="left"
+    )
+    recommendations_final_scenario = recommendations_final_scenario[
+        ~pd.isnull(recommendations_final_scenario["total_floor_area"])]
+    recommendations_final_scenario["kwh_savings_per_unit"] = recommendations_final_scenario["kwh_savings"] / \
+                                                             recommendations_final_scenario["total_floor_area"]
+
+    recommendations_final_scenario["type_mapped2"] = recommendations_df["type"].copy().replace(
+        {
+            "room_roof_insulation": "roof_insulation",
+            "flat_roof_insulation": "roof_insulation",
+            "hot_water_tank_insulation": "other",
+            "cylinder_thermostat": "other",
+            "sealing_open_fireplace": "other",
+            "suspended_floor_insulation": "floor_insulation",
+            "solid_floor_insulation": "floor_insulation",
+        }
+    )
+
+    aggs = recommendations_final_scenario.groupby("type_mapped")[
+        ["kwh_savings_per_unit", "estimated_cost"]].mean().reset_index().sort_values(
+        "kwh_savings_per_unit", ascending=False
+    )
+    aggs["cost_per_kwh_saved"] = aggs["estimated_cost"] / aggs["kwh_savings_per_unit"]
+    # Show more columns with pandas
+    pd.set_option('display.max_columns', None)
+    # Show more rows with pandas
+    pd.set_option('display.max_rows', None)
+    # Show more characters in a column
+    pd.set_option('display.max_colwidth', None)
diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py
index 569d7bcb..b73f187c 100644
--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@@ -67,6 +67,7 @@ class WallRecommendations(Definitions):
         "Granite or whinstone, as built, no insulation": 'Granite or whinstone, with external insulation',
         "Timber frame, as built, no insulation": "Timber frame, with external insulation",
         'Timber frame, as built, partial insulation': 'Timber frame, with external insulation',
+        "Sandstone or limestone, as built, no insulation": "Sandstone or limestone, with external insulation",
     }
 
     # These are the ending descriptions we consider for walls with internal insulation
@@ -80,6 +81,7 @@ class WallRecommendations(Definitions):
         "Granite or whinstone, as built, no insulation": 'Granite or whinstone, with internal insulation',
         "Timber frame, as built, no insulation": "Timber frame, with internal insulation",
         'Timber frame, as built, partial insulation': 'Timber frame, with internal insulation',
+        "Sandstone or limestone, as built, no insulation": "Sandstone or limestone, with internal insulation",
     }
 
     def __init__(

From 3aa29e18a6629b952601c35a283e6f6dc7e66b62 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 16 Aug 2024 12:43:16 +0100
Subject: [PATCH 112/182] set up template of ownership class

---
 etl/ownership/Ownership.py | 467 +++++++++++++++++++++++++++++++++++++
 etl/ownership/README.md    |  10 +
 2 files changed, 477 insertions(+)
 create mode 100644 etl/ownership/Ownership.py
 create mode 100644 etl/ownership/README.md

diff --git a/etl/ownership/Ownership.py b/etl/ownership/Ownership.py
new file mode 100644
index 00000000..fc5c0632
--- /dev/null
+++ b/etl/ownership/Ownership.py
@@ -0,0 +1,467 @@
+from datetime import datetime
+from typing import List
+from tqdm import tqdm
+import pandas as pd
+import Levenshtein
+import re
+from utils.s3 import save_excel_to_s3
+from utils.logger import setup_logger
+from backend.SearchEpc import SearchEpc
+
+logger = setup_logger()
+
+
+class Ownership:
+    # These are a number of prefix phrases, found in the ownership data. If an address begins with a any of these
+    # terms, we remove them
+    OWNERSHIP_STARTING_TERMS = [
+        "land adjoining", "land on the", "land to the rear of", "land and buildings on the",
+        "garage adjoining", "car park adjoining", "the land adjoining", "land and buildings adjoining",
+        "all royal mines"
+    ]
+
+    def __init__(
+        self, epc_paths: List[str], domestic_ownership_path: str, overseas_ownership_path
+    ):
+        """
+
+        :param epc_paths: A list of strings, which points to the location of the EPC data to be used. TO date, this
+                          data has been held locally, and so will require extension to read from remote locaations like
+                          s3
+        :param domestic_ownership_path: A string which points to the location of the CCOD ownership data, that details
+                                        corporate ownership of properties in the UK, where the companies are UK based
+        :param overseas_ownership_path: A string which points to the location of the OCOD ownership data, that details
+                                        corporate ownership of properties in the UK, where the companies are overseas
+        """
+
+        # All epc paths should end with certificates.csv
+        if not any(path for path in epc_paths if path.endswith("certificates.csv")):
+            raise ValueError("epc_paths contains a path that does not end with certificates.csv")
+        self.epc_paths = epc_paths
+        self.domestic_ownership_path = domestic_ownership_path
+        self.overseas_ownership_path = overseas_ownership_path
+
+        self.run_timestamp = str(datetime.now())
+
+        # Data
+        self.epc_data = None
+        self.ownership_data = None
+        self.freehold_matching_lookup = None
+        self.leasehold_matching_lookup = None
+
+        self.shared_freehold_match = None
+        self.shared_leasehold_match = None
+
+        self.combined_matching_lookup = None
+        self.matched_addresses = None
+
+    def source_epc_properties(self, column_filters=None):
+        """
+        This function will filter the epc data as specified by column filers, searching across all of the EPC tables
+        as defined by
+        :param column_filters:
+        :return:
+        """
+
+        column_filters = {} if column_filters is None else column_filters
+
+        # TODO: Do the tenure filtering here!
+        # ["rental (private)", "Rented (private)", "owner-occupied", "Owner-occupied"]
+
+        data = []
+        for path in tqdm(self.epc_paths):
+            epc_data = pd.read_csv(path, low_memory=False)
+
+            epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
+            epc_data["UPRN"] = epc_data["UPRN"].astype(int).astype(str)
+
+            if pd.isnull(pd.to_datetime(epc_data["LODGEMENT_DATETIME"], errors="coerce")).sum():
+                raise Exception("Lodgement datetime contains ")
+
+            # Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this
+            epc_data["LODGEMENT_DATETIME"] = pd.to_datetime(epc_data["LODGEMENT_DATETIME"], errors="coerce")
+
+            epc_data = epc_data.sort_values(
+                ["LODGEMENT_DATE", "LODGEMENT_DATETIME"], ascending=False
+            ).drop_duplicates("UPRN")
+
+            # Get G & F properties
+            raise Exception("IMPLEMENT ME")
+            epc_data = epc_data[epc_data["CURRENT_ENERGY_RATING"].isin(["G", "F"])]
+            data.append(epc_data)
+
+        self.epc_data = pd.concat(data)
+
+        # Save as an excel
+        # TODO: Implement me
+        save_excel_to_s3(
+
+        )
+        # data.to_excel("EPC F & G Properties - V2.xlsx", index=False)
+
+    def load_company_ownership(self):
+        """
+        This function reads in the company ownership data and
+        :return:
+        """
+        logger.info("Reading in company ownership data")
+        self.ownership_data = pd.read_csv(self.domestic_ownership_path)
+        self.ownership_data["is_overseas"] = False
+        overseas_company_ownership = pd.read_csv(self.overseas_ownership_path)
+        overseas_company_ownership["is_overseas"] = True
+
+        self.ownership_data = pd.concat([self.ownership_data, overseas_company_ownership])
+
+        # FIlter on relevant postcodes - this is done to reduce the large size of the ownership dataset
+        logger.info("Filtering ownership data on EPC postcodes")
+        self.ownership_data = self.ownership_data[
+            self.ownership_data["Postcode"].str.lower().isin(self.epc_data["POSTCODE"].str.lower().unique())
+        ]
+
+    def prepare_for_matching(self):
+        """
+        Given the epc properties and the ownership data, this function performs a number of operations on both datasets
+        to prepare them for matching
+        """
+
+        logger.info("Preparing data for matching")
+        # Now we filter properties the other way around
+        self.epc_data = self.epc_data[
+            self.epc_data["POSTCODE"].str.lower().isin(self.ownership_data["Postcode"].str.lower().unique())
+        ]
+        # We have some duplicated on UPRN
+        # Take the newest UPRN
+        self.epc_data = self.epc_data.sort_values("LODGEMENT_DATE", ascending=False).drop_duplicates("UPRN")
+
+        # Remove entries where the address begins with the term "land adjoining", or other records that don't
+        # reference the
+        # the property itself
+
+        for starting_term in self.OWNERSHIP_STARTING_TERMS:
+            self.ownership_data = self.ownership_data[
+                ~self.ownership_data["Property Address"].str.lower().str.startswith(starting_term)
+            ]
+
+    @staticmethod
+    def extract_numeric_part(house_number: str) -> str:
+        """
+        Extracts only the numeric part from a house number that may contain letters.
+
+        Parameters:
+        - house_number (str): The house number string possibly containing letters.
+
+        Returns:
+        - str: The numeric part of the house number.
+        """
+        # Use regular expression to replace all non-digit characters with nothing
+        numeric_part = re.sub(r'\D', '', house_number)
+        return numeric_part
+
+    @staticmethod
+    def remove_text_in_brackets(address: str) -> str:
+        """
+        Removes any text within parentheses, including the parentheses themselves.
+
+        Parameters:
+        - address (str): The address string to clean.
+
+        Returns:
+        - str: The cleaned address with text in parentheses removed.
+        """
+        # Regex to find and remove content in parentheses
+        cleaned_address = re.sub(r'\s*\([^)]*\)', '', address)
+        return cleaned_address
+
+    @staticmethod
+    def extract_range_from_house_number(house_number_range: str):
+        """
+        Detects if the house number includes a numeric range (formatted as 'x-y') and extracts all values within this
+        range.
+        Non-numeric strings containing hyphens are ignored.
+
+        Parameters:
+        - house_number_range (str): The house number string that might contain a range.
+
+        Returns:
+        - list of str: A list of all numbers within the range if it is a range; otherwise, returns None.
+        """
+
+        if not house_number_range:
+            return None
+
+        if '-' in house_number_range:
+            parts = house_number_range.split('-')
+            if len(parts) == 2 and parts[0].isdigit() and parts[1].isdigit():
+                # Both parts are numeric, so it's a valid range
+                start, end = map(int, parts)  # Convert parts to integers
+                return [str(x) for x in range(start, end + 1)]
+            else:
+                # Not a valid numeric range
+                return None
+        else:
+            # No hyphen present or not a range
+            return None
+
+    @staticmethod
+    def is_in_range(row, house_no):
+        """ Check if the house number is within the range provided in the row. """
+        if row and any(house_no == num for num in row):
+            return True
+        return False
+
+    @staticmethod
+    def levenstein_match(matching_string, df, address_col):
+        match_to = df[address_col].tolist()
+        # Strip out punctuation and spaces
+        match_to = [re.sub(r'[^\w\s]', '', x) for x in match_to]
+        match_to = [x.replace(" ", "") for x in match_to]
+
+        # Perform matching between full key and match_to
+        distances = [Levenshtein.distance(matching_string, s) for s in match_to]
+        best_match_index = distances.index(min(distances))
+        # We might want to consider a threshold for the distance, however for the momeny,
+        # we don't consider this for the moment
+        df = df.iloc[best_match_index:best_match_index + 1]
+
+        return df
+
+    @classmethod
+    def remove_duplicate_matches(cls, matching_lookup, properties, company_ownership):
+        duplicated_titles = matching_lookup[matching_lookup["Title Number"].duplicated()]["Title Number"].unique()
+
+        to_drop = []
+        for dupe_title in duplicated_titles:
+            dupe_data = matching_lookup[matching_lookup["Title Number"] == dupe_title].copy()
+            matched_addresses = dupe_data.merge(
+                properties[["UPRN", "ADDRESS"]].rename(columns={"ADDRESS": "epc_address"}),
+                how="left", on="UPRN"
+            ).merge(
+                company_ownership[["Title Number", "Property Address"]],
+                how="left", on="Title Number"
+            )
+            # We perform levenstein to get the best match
+            best_match = cls.levenstein_match(
+                matching_string=matched_addresses["Property Address"].values[0],
+                df=matched_addresses,
+                address_col="epc_address"
+            )
+            matches_to_drop = matched_addresses[
+                ~matched_addresses["UPRN"].isin(best_match["UPRN"].values)
+            ]
+
+            to_drop.append(
+                matches_to_drop[["UPRN", "Title Number"]].copy()
+            )
+
+        to_drop = pd.concat(to_drop) if to_drop else pd.DataFrame()
+
+        if not to_drop.empty:
+            merged = pd.merge(matching_lookup, to_drop, on=['UPRN', 'Title Number'], how='left', indicator=True)
+            merged = merged[merged['_merge'] == 'left_only'].drop(columns=['_merge'])
+
+            return merged
+
+        return matching_lookup
+
+    @classmethod
+    def remove_duplicate_uprn_matches(cls, matching_lookup, properties, company_ownership):
+        dupe_uprns = matching_lookup[matching_lookup["UPRN"].duplicated()]["UPRN"].unique().tolist()
+
+        to_drop = []
+        for dupe_uprn in dupe_uprns:
+            dupe_data = matching_lookup[matching_lookup["UPRN"] == dupe_uprn].copy()
+            matched_addresses = dupe_data.merge(
+                properties[["UPRN", "ADDRESS"]].rename(columns={"ADDRESS": "epc_address"}),
+                how="left", on="UPRN"
+            ).merge(
+                company_ownership[["Title Number", "Property Address"]],
+                how="left", on="Title Number"
+            )
+            # We perform levenstein to get the best match
+            best_match = cls.levenstein_match(
+                matching_string=matched_addresses["Property Address"].values[0],
+                df=matched_addresses,
+                address_col="epc_address"
+            )
+            matches_to_drop = matched_addresses[
+                ~matched_addresses["Title Number"].isin(best_match["Title Number"].values)
+            ]
+
+            to_drop.append(
+                matches_to_drop[["UPRN", "Title Number"]].copy()
+            )
+
+        to_drop = pd.concat(to_drop)
+
+        if not to_drop.empty:
+            merged = pd.merge(matching_lookup, to_drop, on=['UPRN', 'Title Number'], how='left', indicator=True)
+            merged = merged[merged['_merge'] == 'left_only'].drop(columns=['_merge'])
+
+            return merged
+
+        return matching_lookup
+
+    def match(self):
+        if (self.epc_data is None) or (self.ownership_data is None):
+            raise ValueError("epc_data and ownership_data should not be null")
+
+        logger.info("Matching EPC data to ownership data")
+        freehold_matching_lookup = []
+        leasehold_matching_lookup = []
+        shared_leasehold_match = []
+        shared_freehold_match = []
+        for _, address in tqdm(self.epc_data.iterrows(), total=len(self.epc_data)):
+            match_type = "exact"
+            filtered = self.ownership_data[
+                self.ownership_data["Postcode"].str.lower() == address["POSTCODE"].lower()
+                ].copy()
+
+            # Remove postcode and remove trailing commas
+            filtered["house_number"] = (
+                filtered["Property Address"]
+                .apply(self.remove_text_in_brackets)
+                .apply(SearchEpc.get_house_number)
+                .str.lower()
+                .str.replace(",", "")
+            )
+            house_no = SearchEpc.get_house_number(address["ADDRESS1"])
+            if house_no is not None:
+                house_no = house_no.replace(",", "")
+
+            if house_no is None:
+                # It's hard for us to get a reliable match
+                # filtered = filtered[filtered["Property Address"].str.contains(address["ADDRESS1"])]
+                # if filtered.shape[0] > 1:
+                #     raise Exception("No valid - maybe we should do levenstein?")
+                continue
+
+            else:
+
+                if house_no not in filtered["house_number"].values:
+                    # If this happens, we check house_number for a x-y range of addresses
+                    filtered["house_number_range"] = filtered["house_number"].apply(
+                        self.extract_range_from_house_number
+                    )
+                    # If we have found a house number range, we check if the house number is in the range and if not,
+                    # we drop the row
+                    filtered['is_in_range'] = filtered['house_number_range'].apply(
+                        lambda x: self.is_in_range(x, house_no)
+                    )
+
+                    if filtered['is_in_range'].any():
+                        # If house_no is found in any range, keep only rows where it is in range
+                        filtered = filtered[filtered['is_in_range']]
+                    else:
+                        # If house_no is not found in any range, filter out rows where 'house_number_range' is not None
+                        filtered = filtered[filtered['house_number_range'].isnull()]
+
+                    # Strip out letters from house_no and house_number
+                    house_no = self.extract_numeric_part(house_no)
+                    filtered["house_number"] = filtered["house_number"].astype(str).apply(self.extract_numeric_part)
+                    match_type = "approximate"
+
+                filtered = filtered[filtered["house_number"] == house_no]
+
+            if filtered.empty:
+                continue
+
+            filtered_freehold = filtered[filtered["Tenure"] == "Freehold"]
+            filtered_leasehold = filtered[filtered["Tenure"] == "Leasehold"]
+
+            if filtered_freehold.shape[0] > 1:
+                matched = filtered_leasehold[["Title Number"]].copy()
+                matched.insert(0, "UPRN", address["UPRN"])
+                shared_freehold_match.append(matched)
+            elif not filtered_freehold.empty:
+                freehold_matching_lookup.append(
+                    {
+                        "UPRN": address["UPRN"],
+                        "Title Number": filtered_freehold["Title Number"].values[0],
+                        "match_type": match_type,
+                    }
+                )
+
+            if filtered_leasehold.shape[0] > 1:
+                matched = filtered_leasehold[["Title Number"]].copy()
+                matched.insert(0, "UPRN", address["UPRN"])
+                shared_leasehold_match.append(matched)
+            elif not filtered_leasehold.empty:
+                leasehold_matching_lookup.append(
+                    {
+                        "UPRN": address["UPRN"],
+                        "Title Number": filtered_leasehold["Title Number"].values[0],
+                        "match_type": match_type,
+                    }
+                )
+
+        self.freehold_matching_lookup = pd.DataFrame(freehold_matching_lookup)
+        self.leasehold_matching_lookup = pd.DataFrame(leasehold_matching_lookup)
+
+        self.freehold_matching_lookup = self.freehold_matching_lookup[
+            self.freehold_matching_lookup["match_type"] == "exact"
+            ]
+        self.leasehold_matching_lookup = self.leasehold_matching_lookup[
+            self.leasehold_matching_lookup["match_type"] == "exact"
+            ]
+
+        self.shared_leasehold_match = shared_leasehold_match
+        self.shared_freehold_match = shared_freehold_match
+
+        # finally, we create matched addresses
+        combined_matching_lookup = pd.concat([self.freehold_matching_lookup, self.leasehold_matching_lookup])
+
+        # Remove duplicates
+        combined_matching_lookup = self.remove_duplicate_matches(
+            matching_lookup=combined_matching_lookup,
+            properties=self.epc_data,
+            company_ownership=self.ownership_data
+        )
+        # We also have duplicates at a UPRN level
+        self.combined_matching_lookup = self.remove_duplicate_uprn_matches(
+            matching_lookup=combined_matching_lookup,
+            properties=self.epc_data,
+            company_ownership=self.ownership_data
+        )
+
+        self.matched_addresses = combined_matching_lookup.merge(
+            self.epc_data[
+                [
+                    "UPRN",
+                    "ADDRESS",
+                    "ADDRESS1",
+                    "CURRENT_ENERGY_EFFICIENCY",
+                    "CURRENT_ENERGY_RATING",
+                    "POSTCODE",
+                    "LODGEMENT_DATE",
+                    "TRANSACTION_TYPE"
+                ]
+            ].rename(
+                columns={
+                    "ADDRESS": "epc_address",
+                    "ADDRESS1": "epc_address1",
+                    "POSTCODE": "epc_postcode"
+                }
+            ),
+            how="left", on="UPRN"
+        ).merge(
+            self.ownership_data[
+                [
+                    "Title Number",
+                    "Property Address",
+                    "Postcode",
+                    "Company Registration No. (1)",
+                    "Proprietor Name (1)",
+                    "Date Proprietor Added",
+                ]
+            ],
+            how="left", on="Title Number"
+        )
+
+        # Let's try and get the house number
+        matched_addresses["house_number"] = (
+            matched_addresses["epc_address"]
+            .apply(self.remove_text_in_brackets)
+            .apply(SearchEpc.get_house_number)
+            .str.lower()
+            .str.replace(",", "")
+        )
diff --git a/etl/ownership/README.md b/etl/ownership/README.md
new file mode 100644
index 00000000..38b71474
--- /dev/null
+++ b/etl/ownership/README.md
@@ -0,0 +1,10 @@
+# Ownership Application
+
+This application contains methods that allows us to attempt to discover
+corporate ownership of properties, where possible.
+
+Practically, it's likely that the code within this application will be
+exported into other areas of this repository, and used to assemble
+pipelines that solve specific property ownership questions, and so this
+codebase is set up with the goal of providing farily easy to use, plug
+and play tools.
\ No newline at end of file

From c6ebcedfce146825822b1ee579568ba65e9d7f9f Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 19 Aug 2024 11:11:32 +0100
Subject: [PATCH 113/182] added multiple ownership mthods

---
 etl/ownership/Ownership.py | 296 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 292 insertions(+), 4 deletions(-)

diff --git a/etl/ownership/Ownership.py b/etl/ownership/Ownership.py
index fc5c0632..90abe147 100644
--- a/etl/ownership/Ownership.py
+++ b/etl/ownership/Ownership.py
@@ -20,8 +20,20 @@ class Ownership:
         "all royal mines"
     ]
 
+    # anything that is sold within this many months is flagged to have sold recently and is then
+    # considered to be dropped from matching
+    SOLD_RECENTLY_MONTHS = 12
+
+    # Anything that has been lodged for a marketed or unmarketed sale within this many months is
+    # flagged as potentially in the process of being sold
+    LODGED_RECENTLY_MONTHS = 12
+
     def __init__(
-        self, epc_paths: List[str], domestic_ownership_path: str, overseas_ownership_path
+        self,
+        epc_paths: List[str],
+        domestic_ownership_path: str,
+        overseas_ownership_path: str,
+        land_registry_path: str
     ):
         """
 
@@ -32,6 +44,7 @@ class Ownership:
                                         corporate ownership of properties in the UK, where the companies are UK based
         :param overseas_ownership_path: A string which points to the location of the OCOD ownership data, that details
                                         corporate ownership of properties in the UK, where the companies are overseas
+        :param land_registry_path: A string that points to the location of the land registry data
         """
 
         # All epc paths should end with certificates.csv
@@ -40,6 +53,7 @@ class Ownership:
         self.epc_paths = epc_paths
         self.domestic_ownership_path = domestic_ownership_path
         self.overseas_ownership_path = overseas_ownership_path
+        self.land_registry_path = land_registry_path
 
         self.run_timestamp = str(datetime.now())
 
@@ -48,12 +62,17 @@ class Ownership:
         self.ownership_data = None
         self.freehold_matching_lookup = None
         self.leasehold_matching_lookup = None
-
         self.shared_freehold_match = None
         self.shared_leasehold_match = None
+        self.land_registry = None
 
+        # Match tables
         self.combined_matching_lookup = None
         self.matched_addresses = None
+        self.land_registry_matches = None
+
+    def pipeline(self):
+        pass
 
     def source_epc_properties(self, column_filters=None):
         """
@@ -301,6 +320,36 @@ class Ownership:
 
         return matching_lookup
 
+    @staticmethod
+    def is_substring(x, match_string):
+        if pd.isnull(x):
+            return False
+        return x in match_string.lower()
+
+    @staticmethod
+    def house_number_match(paon, house_number):
+        # Firstly try and convert to numberic
+        try:
+            paon_numeric = int(paon)
+            house_number_numeric = int(house_number)
+            return paon_numeric == house_number_numeric
+        except Exception as e:  # noqa
+            # If we can't convert both to numeric, we do an equality
+
+            return paon == house_number
+
+    @staticmethod
+    def check_equalities(lr_filtered):
+        all_paon_equal = all(lr_filtered["paon"] == lr_filtered["paon"].values[0])
+        if pd.isnull(lr_filtered["saon"].values[0]):
+            all_saon_equal = all(pd.isnull(lr_filtered["saon"]))
+        else:
+            all_saon_equal = all(lr_filtered["saon"] == lr_filtered["saon"].values[0])
+
+        all_street_equal = all(lr_filtered["street"] == lr_filtered["street"].values[0])
+
+        return all_paon_equal, all_saon_equal, all_street_equal
+
     def match(self):
         if (self.epc_data is None) or (self.ownership_data is None):
             raise ValueError("epc_data and ownership_data should not be null")
@@ -458,10 +507,249 @@ class Ownership:
         )
 
         # Let's try and get the house number
-        matched_addresses["house_number"] = (
-            matched_addresses["epc_address"]
+        self.matched_addresses["house_number"] = (
+            self.matched_addresses["epc_address"]
             .apply(self.remove_text_in_brackets)
             .apply(SearchEpc.get_house_number)
             .str.lower()
             .str.replace(",", "")
         )
+
+    def match_with_land_registry(self):
+        """
+        This function matches the land registry data to the existing matches
+        :return:
+        """
+        # TODO: Refactor this
+        if self.matched_addresses is None:
+            raise ValueError("Run match() first!")
+
+        self.land_registry = pd.read_csv(self.land_registry_path)
+
+        for col in ["postcode", "street", "paon", "saon"]:
+            self.land_registry[col] = self.land_registry[col].str.lower().str.strip()
+
+        self.land_registry["date_of_transfer"] = pd.to_datetime(self.land_registry["date_of_transfer"])
+
+        land_registry_matches = []
+        for _, match in tqdm(self.matched_addresses.iterrows(), total=len(self.matched_addresses)):
+            # Filter land registry on the postcode
+            lr_filtered = self.land_registry[
+                (self.land_registry["postcode"] == match["epc_postcode"].lower().strip())
+            ].copy()
+
+            # Filter further, when the street is in in the address
+            # street should be contained in epc_address
+            lr_filtered = lr_filtered[
+                lr_filtered["street"].apply(lambda x: self.is_substring(x, match["epc_address"].lower())) |
+                lr_filtered["street"].apply(lambda x: self.is_substring(x, match["Property Address"].lower()))
+                ]
+
+            if lr_filtered.empty:
+                continue
+
+            # We now check if paon is in address 1
+            lr_filtered["paon_match"] = lr_filtered["paon"].apply(
+                lambda x: self.house_number_match(x, match["house_number"])
+            )
+            # We also try the secondary match
+            lr_filtered["saon_match"] = (
+                lr_filtered["saon"].apply(
+                    lambda x: False if pd.isnull(x) else self.is_substring(x, match["epc_address1"])
+                )
+            )
+            # We fileter where we have a primary or secondary match
+            lr_filtered = lr_filtered[
+                lr_filtered["paon_match"] | lr_filtered["saon_match"]
+                ]
+
+            if lr_filtered.empty:
+                continue
+            elif lr_filtered.shape[0] == 1:
+                land_registry_matches.append(
+                    {
+                        "uprn": match["UPRN"],
+                        "transaction_id": lr_filtered['transaction_id'].values[0],
+                        "price": lr_filtered["price"].values[0],
+                        "date_of_transfer": lr_filtered["date_of_transfer"].values[0],
+                    }
+                )
+                continue
+            elif lr_filtered.shape[0] > 1:
+                # We make sure all records are the same and take the newest
+                all_paon_equal, all_saon_equal, all_street_equal = self.check_equalities(lr_filtered)
+                has_paon_match = any(lr_filtered["paon_match"])
+
+                if all_paon_equal and all_street_equal and all_saon_equal:
+                    # Take the newest record, append and continue
+                    lr_filtered = lr_filtered.sort_values("date_of_transfer", ascending=False)
+                    lr_filtered = lr_filtered.head(1)
+                    land_registry_matches.append(
+                        {
+                            "uprn": match["UPRN"],
+                            "transaction_id": lr_filtered['transaction_id'].values[0],
+                            "price": lr_filtered["price"].values[0],
+                            "date_of_transfer": lr_filtered["date_of_transfer"].values[0],
+                        }
+                    )
+                    continue
+                elif has_paon_match and all_street_equal:
+                    # Peform filter on paon
+                    lr_filtered = lr_filtered[lr_filtered["paon_match"]]
+                    # Do an addtiioanl equality check
+                    all_paon_equal, all_saon_equal, all_street_equal = self.check_equalities(lr_filtered)
+                    if all_paon_equal and all_street_equal and all_saon_equal:
+                        lr_filtered = lr_filtered.sort_values("date_of_transfer", ascending=False)
+                        lr_filtered = lr_filtered.head(1)
+                        land_registry_matches.append(
+                            {
+                                "uprn": match["UPRN"],
+                                "transaction_id": lr_filtered['transaction_id'].values[0],
+                                "price": lr_filtered["price"].values[0],
+                                "date_of_transfer": lr_filtered["date_of_transfer"].values[0],
+                            }
+                        )
+                    else:
+                        # We do a match on saon
+                        lr_filtered["saon_match2"] = lr_filtered["saon"].apply(
+                            lambda x: False if pd.isnull(x) else self.is_substring(x, match["epc_address"])
+                        )
+
+                        lr_filtered = lr_filtered[lr_filtered["saon_match2"]]
+
+                        if lr_filtered.empty:
+                            continue
+                        elif lr_filtered.shape[0] == 1:
+                            land_registry_matches.append(
+                                {
+                                    "uprn": match["UPRN"],
+                                    "transaction_id": lr_filtered['transaction_id'].values[0],
+                                    "price": lr_filtered["price"].values[0],
+                                    "date_of_transfer": lr_filtered["date_of_transfer"].values[0],
+                                }
+                            )
+                            continue
+                        else:
+                            raise NotImplementedError("wtf")
+                else:
+                    # We have a final check, based on an observed case
+                    lr_address_1 = " ".join([x.lower().strip() for x in match["Property Address"].split(",")[0:2]])
+
+                    lr_filtered["paon_match2"] = lr_filtered["paon"].apply(
+                        lambda x: False if pd.isnull(x) else self.is_substring(x, lr_address_1)
+                    )
+
+                    lr_filtered = lr_filtered[lr_filtered["paon_match2"]]
+
+                    if lr_filtered.empty:
+                        continue
+                    elif lr_filtered.shape[0] == 1:
+                        land_registry_matches.append(
+                            {
+                                "uprn": match["UPRN"],
+                                "transaction_id": lr_filtered['transaction_id'].values[0],
+                                "price": lr_filtered["price"].values[0],
+                                "date_of_transfer": lr_filtered["date_of_transfer"].values[0],
+                            }
+                        )
+                        continue
+                    else:
+                        # Check all the same
+                        all_paon_equal, all_saon_equal, all_street_equal = self.check_equalities(lr_filtered)
+
+                        # Check saon is house number with exact match
+                        lr_filtered["saon_match2"] = lr_filtered["saon"].apply(
+                            lambda x: False if pd.isnull(x) else self.house_number_match(x, match["house_number"])
+                        )
+                        # We check if we have a flat
+                        match_flat_number = re.match("flat (\d+)", match["epc_address1"].lower())
+                        match_apartment_number = re.match("apartment (\d+)", match["epc_address1"].lower())
+                        lr_filtered["saon_match3"] = False
+                        if match_flat_number is not None:
+                            # Get out the match
+                            match_flat_number = "flat " + match_flat_number.group(1)
+                            lr_filtered["saon_match3"] = lr_filtered["saon"].apply(
+                                lambda x: False if pd.isnull(x) else x == match_flat_number
+                            )
+
+                        if match_apartment_number is not None:
+                            # Get out the match
+                            match_apartment_number = "apartment " + match_apartment_number.group(1)
+                            lr_filtered["saon_match3"] = lr_filtered["saon"].apply(
+                                lambda x: False if pd.isnull(x) else x == match_apartment_number
+                            )
+
+                        if all_paon_equal and all_saon_equal and all_street_equal:
+                            # Take the newest record
+                            lr_filtered = lr_filtered.sort_values("date_of_transfer", ascending=False)
+                            lr_filtered = lr_filtered.head(1)
+                            land_registry_matches.append(
+                                {
+                                    "uprn": match["UPRN"],
+                                    "transaction_id": lr_filtered['transaction_id'].values[0],
+                                    "price": lr_filtered["price"].values[0],
+                                    "date_of_transfer": lr_filtered["date_of_transfer"].values[0],
+                                }
+                            )
+                            continue
+                        elif any(lr_filtered["saon_match2"]):
+                            lr_filtered = lr_filtered[lr_filtered["saon_match2"]]
+                            all_saon_equal, all_paon_equal, all_street_equal = self.check_equalities(lr_filtered)
+                            if all_paon_equal and all_saon_equal and all_street_equal:
+                                # Filter on the newest record
+                                lr_filtered = lr_filtered.sort_values("date_of_transfer", ascending=False)
+                                lr_filtered = lr_filtered.head(1)
+                            if lr_filtered.shape[0] == 1:
+                                land_registry_matches.append(
+                                    {
+                                        "uprn": match["UPRN"],
+                                        "transaction_id": lr_filtered['transaction_id'].values[0],
+                                        "price": lr_filtered["price"].values[0],
+                                        "date_of_transfer": lr_filtered["date_of_transfer"].values[0],
+                                    }
+                                )
+                                continue
+                        elif any(lr_filtered["saon_match3"]):
+                            lr_filtered = lr_filtered[lr_filtered["saon_match3"]]
+                            if lr_filtered.shape[0] == 1:
+                                land_registry_matches.append(
+                                    {
+                                        "uprn": match["UPRN"],
+                                        "transaction_id": lr_filtered['transaction_id'].values[0],
+                                        "price": lr_filtered["price"].values[0],
+                                        "date_of_transfer": lr_filtered["date_of_transfer"].values[0],
+                                    }
+                                )
+                                continue
+
+                        raise NotImplementedError("wtf")
+            else:
+                raise NotImplementedError("What happened here?")
+
+        self.land_registry_matches = pd.DataFrame(land_registry_matches)
+
+        # Merge onto the EPC - ownership matches
+        self.matched_addresses = self.matched_addresses.merge(
+            land_registry_matches,
+            how="left",
+            left_on="UPRN",
+            right_on="uprn"
+        ).drop(columns=["uprn"])
+
+        # Flag anything that sold in the last year
+        self.matched_addresses["sold_recently"] = (
+            self.matched_addresses["date_of_transfer"] >= pd.Timestamp.now() -
+            pd.DateOffset(month=self.SOLD_RECENTLY_MONTHS)
+        )
+
+        self.matched_addresses["sale_lodged_recently"] = (
+            (
+                pd.to_datetime(
+                    self.matched_addresses["LODGEMENT_DATE"]
+                ) >= pd.Timestamp.now() - pd.DateOffset(months=self.LODGED_RECENTLY_MONTHS)
+            ) &
+            (self.matched_addresses["TRANSACTION_TYPE"].isin(["marketed sale", "non marketed sale"]))
+        )
+
+    def filter_matches(self):
+        pass

From 6f053a20d159ca08a87bed6b245b4ba32fe27d1b Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 19 Aug 2024 11:20:38 +0100
Subject: [PATCH 114/182] completing source_epc_properties

---
 etl/ownership/Ownership.py | 48 ++++++++++++++++++++------------------
 utils/s3.py                | 33 ++++++++++++++++++++++++++
 2 files changed, 58 insertions(+), 23 deletions(-)

diff --git a/etl/ownership/Ownership.py b/etl/ownership/Ownership.py
index 90abe147..3bdae59c 100644
--- a/etl/ownership/Ownership.py
+++ b/etl/ownership/Ownership.py
@@ -33,7 +33,8 @@ class Ownership:
         epc_paths: List[str],
         domestic_ownership_path: str,
         overseas_ownership_path: str,
-        land_registry_path: str
+        land_registry_path: str,
+        project_name: str
     ):
         """
 
@@ -45,6 +46,7 @@ class Ownership:
         :param overseas_ownership_path: A string which points to the location of the OCOD ownership data, that details
                                         corporate ownership of properties in the UK, where the companies are overseas
         :param land_registry_path: A string that points to the location of the land registry data
+        :param project_name: A string that is used to identify the project
         """
 
         # All epc paths should end with certificates.csv
@@ -57,6 +59,9 @@ class Ownership:
 
         self.run_timestamp = str(datetime.now())
 
+        # Data storage paths
+        self.epc_data_filepath = f"ownership/{project_name}/{self.run_timestamp}/epc_data.xlsx"
+
         # Data
         self.epc_data = None
         self.ownership_data = None
@@ -76,47 +81,44 @@ class Ownership:
 
     def source_epc_properties(self, column_filters=None):
         """
-        This function will filter the epc data as specified by column filers, searching across all of the EPC tables
-        as defined by
-        :param column_filters:
-        :return:
+        This function will filter the epc data as specified by column filters, searching across all of the EPC tables
+        :param column_filters: Dictionary with column names as keys and list of acceptable values as values. This
+                                 dictionary is is used to filter the EPC data and should look like this:
+                                {"column_name": ["value1", "value2", ...]}, where column_name is the name of the column
+                                in the EPC data and ["value1", "value2", ...] is a list of acceptable values for that
+                                column. If a column is not found in the EPC data, an exception is raised.
         """
 
         column_filters = {} if column_filters is None else column_filters
 
-        # TODO: Do the tenure filtering here!
-        # ["rental (private)", "Rented (private)", "owner-occupied", "Owner-occupied"]
-
         data = []
         for path in tqdm(self.epc_paths):
             epc_data = pd.read_csv(path, low_memory=False)
-
             epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
             epc_data["UPRN"] = epc_data["UPRN"].astype(int).astype(str)
 
             if pd.isnull(pd.to_datetime(epc_data["LODGEMENT_DATETIME"], errors="coerce")).sum():
-                raise Exception("Lodgement datetime contains ")
+                raise Exception("Lodgement datetime contains invalid data")
 
-            # Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this
             epc_data["LODGEMENT_DATETIME"] = pd.to_datetime(epc_data["LODGEMENT_DATETIME"], errors="coerce")
+            epc_data = epc_data.sort_values(["LODGEMENT_DATETIME"], ascending=False).drop_duplicates("UPRN")
 
-            epc_data = epc_data.sort_values(
-                ["LODGEMENT_DATE", "LODGEMENT_DATETIME"], ascending=False
-            ).drop_duplicates("UPRN")
+            # Apply column filters
+            for column, values in column_filters.items():
+                if column in epc_data.columns:
+                    epc_data = epc_data[epc_data[column].isin(values)]
+                else:
+                    raise Exception(f"Column {column} not found in data. column_filters is malformed")
 
-            # Get G & F properties
-            raise Exception("IMPLEMENT ME")
-            epc_data = epc_data[epc_data["CURRENT_ENERGY_RATING"].isin(["G", "F"])]
             data.append(epc_data)
 
-        self.epc_data = pd.concat(data)
-
-        # Save as an excel
-        # TODO: Implement me
+        self.epc_data = pd.concat(data, ignore_index=True)
+        # We now store the data in s3
         save_excel_to_s3(
-
+            df=self.epc_data,
+            bucket_name="epc_data",
+            file_key=self.epc_data_filepath
         )
-        # data.to_excel("EPC F & G Properties - V2.xlsx", index=False)
 
     def load_company_ownership(self):
         """
diff --git a/utils/s3.py b/utils/s3.py
index b3553824..ca0cbfac 100644
--- a/utils/s3.py
+++ b/utils/s3.py
@@ -229,6 +229,39 @@ def read_excel_from_s3(bucket_name, file_key, header_row, drop_all_na=True):
     return df
 
 
+def save_excel_to_s3(df, bucket_name, file_key):
+    """
+    Save a pandas DataFrame as an Excel file on S3.
+
+    :param df: DataFrame to save.
+    :param bucket_name: S3 bucket name.
+    :param file_key: S3 file key. This includes the file name and path.
+    """
+    # Ensure the DataFrame is not empty
+    if df.empty:
+        raise ValueError("The DataFrame is empty. Nothing to save to Excel.")
+
+    # Ensure the file_key ends with an appropriate Excel file extension
+    if not file_key.endswith((".xls", ".xlsx")):
+        raise ValueError("The specified file key does not appear to be an Excel file.")
+
+    # Create a BytesIO buffer
+    output = BytesIO()
+    # Save DataFrame to an Excel file buffer
+    df.to_excel(output, index=False)
+    output.seek(0)  # Important: move back to the beginning of the buffer
+
+    # Initialize a session using boto3
+    session = boto3.session.Session()
+    s3 = session.resource('s3')
+
+    # Upload the Excel file from the buffer to S3
+    bucket = s3.Bucket(bucket_name)
+    bucket.put_object(Body=output, Key=file_key)
+
+    logger.info(f"Excel file saved to S3 bucket '{bucket_name}' with key '{file_key}'")
+
+
 def read_csv_from_s3(bucket_name, filepath):
     s3 = boto3.client('s3')
 

From 287960361d28e3c9ca224f7b2cf09e4d617992fe Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 19 Aug 2024 11:32:56 +0100
Subject: [PATCH 115/182] added land registry get and filter

---
 etl/ownership/Ownership.py                    | 44 ++++++++++++-
 .../projects/midlands_portfolio/app.py        | 64 +++++++++++++++++++
 2 files changed, 106 insertions(+), 2 deletions(-)
 create mode 100644 etl/ownership/projects/midlands_portfolio/app.py

diff --git a/etl/ownership/Ownership.py b/etl/ownership/Ownership.py
index 3bdae59c..7403c45c 100644
--- a/etl/ownership/Ownership.py
+++ b/etl/ownership/Ownership.py
@@ -28,6 +28,26 @@ class Ownership:
     # flagged as potentially in the process of being sold
     LODGED_RECENTLY_MONTHS = 12
 
+    # These are the columns in the land registry data
+    LAND_REGISTRY_COLUMNS = [
+        "transaction_id",
+        "price",
+        "date_of_transfer",
+        "postcode",
+        "property_type",
+        "old_new",
+        "duration",
+        "paon",
+        "saon",
+        "street",
+        "locality",
+        "town_city",
+        "district",
+        "county",
+        "ppd_category_type",
+        "record_status",
+    ]
+
     def __init__(
         self,
         epc_paths: List[str],
@@ -517,16 +537,36 @@ class Ownership:
             .str.replace(",", "")
         )
 
+    def get_land_registry(self):
+        """
+        This function reads in the land registry data and filters it on the postcodes found in the EPC data
+        """
+        land_registry = pd.read_csv(self.land_registry_path, header=None)
+        land_registry.columns = self.LAND_REGISTRY_COLUMNS
+        land_registry = land_registry[
+            land_registry["postcode"].str.lower().isin(self.epc_data["POSTCODE"].str.lower().unique())
+        ]
+        land_registry["date_of_transfer"] = pd.to_datetime(
+            land_registry["date_of_transfer"], format="%Y-%m-%d", errors="coerce"
+        )
+        # Take data from the last 5 years
+        land_registry = land_registry[
+            (land_registry["date_of_transfer"] >= datetime.now() - pd.DateOffset(years=5))
+        ]
+
+        return land_registry
+
     def match_with_land_registry(self):
         """
         This function matches the land registry data to the existing matches
         :return:
         """
-        # TODO: Refactor this
+        # TODO: Refactor this entire function
         if self.matched_addresses is None:
             raise ValueError("Run match() first!")
 
-        self.land_registry = pd.read_csv(self.land_registry_path)
+        logger.info("Reading land registry data")
+        self.land_registry = self.get_land_registry()
 
         for col in ["postcode", "street", "paon", "saon"]:
             self.land_registry[col] = self.land_registry[col].str.lower().str.strip()
diff --git a/etl/ownership/projects/midlands_portfolio/app.py b/etl/ownership/projects/midlands_portfolio/app.py
new file mode 100644
index 00000000..d370ba1e
--- /dev/null
+++ b/etl/ownership/projects/midlands_portfolio/app.py
@@ -0,0 +1,64 @@
+from etl.ownership.Ownership import Ownership
+
+# Set up the project configuration
+USER_IDS = [
+    2,  # Khalim
+    3,  # Chenai
+    5,  # Anna
+    30,  # Patricia
+]
+PORTFOLIO_ID = None
+
+EPC_PATHS = [
+    "local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E08000031-Wolverhampton/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E08000026-Coventry/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E06000016-Leicester/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E06000015-Derby/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E06000021-Stoke-on-Trent/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E06000018-Nottingham/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E07000154-Northampton/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E06000061-North-Northamptonshire/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E06000062-West-Northamptonshire/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E07000152-East-Northamptonshire/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E07000155-South-Northamptonshire/certificates.csv",
+    #
+    "local_data/all-domestic-certificates/domestic-E08000027-Dudley/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E08000029-Solihull/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E07000234-Bromsgrove/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E08000030-Walsall/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E08000028-Sandwell/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E06000019-Herefordshire-County-of/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E06000020-Telford-and-Wrekin/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E07000218-North-Warwickshire/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E07000222-Warwick/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E07000237-Worcester/certificates.csv",
+    # East midlands
+    "local_data/all-domestic-certificates/domestic-E07000035-Derbyshire-Dales/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E07000038-North-East-Derbyshire/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E07000039-South-Derbyshire/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E06000012-North-East-Lincolnshire/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E06000013-North-Lincolnshire/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E07000138-Lincoln/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E07000134-North-West-Leicestershire/certificates.csv",
+    "local_data/all-domestic-certificates/domestic-E06000017-Rutland/certificates.csv",
+]
+
+DOMESTIC_OWNERSHIP_PATH = "/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_07.csv"
+OVERSEAS_OWNERSHIP_PATH = "/Users/khalimconn-kowlessar/Downloads/OCOD_FULL_2024_07.csv"
+LAND_REGISTRY_PATH = "/Users/khalimconn-kowlessar/Downloads/pp-complete.csv"
+
+PROJECT_NAME = "Midlands Portfolio"
+
+
+def app():
+    ownership_instance = Ownership(
+        epc_paths=EPC_PATHS,
+        domestic_ownership_path=DOMESTIC_OWNERSHIP_PATH,
+        overseas_ownership_path=OVERSEAS_OWNERSHIP_PATH,
+        land_registry_path=LAND_REGISTRY_PATH,
+        project_name=PROJECT_NAME
+    )
+    ownership_instance.pipeline()
+
+    # TODO: Create portfolio and payload

From 56889fa4b008bdff389e5ea4e83c1d15545ee077 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 19 Aug 2024 11:34:47 +0100
Subject: [PATCH 116/182] added land registry store

---
 etl/ownership/Ownership.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/etl/ownership/Ownership.py b/etl/ownership/Ownership.py
index 7403c45c..0bbb4689 100644
--- a/etl/ownership/Ownership.py
+++ b/etl/ownership/Ownership.py
@@ -81,6 +81,9 @@ class Ownership:
 
         # Data storage paths
         self.epc_data_filepath = f"ownership/{project_name}/{self.run_timestamp}/epc_data.xlsx"
+        self.filtered_land_registry_filepath = (
+            f"ownership/{project_name}/{self.run_timestamp}/filtered_land_registry.xlsx"
+        )
 
         # Data
         self.epc_data = None
@@ -567,12 +570,19 @@ class Ownership:
 
         logger.info("Reading land registry data")
         self.land_registry = self.get_land_registry()
+        # Store this fitereed version in s3
+        save_excel_to_s3(
+            df=self.land_registry,
+            bucket_name="epc_data",
+            file_key=self.filtered_land_registry_filepath,
+        )
 
         for col in ["postcode", "street", "paon", "saon"]:
             self.land_registry[col] = self.land_registry[col].str.lower().str.strip()
 
         self.land_registry["date_of_transfer"] = pd.to_datetime(self.land_registry["date_of_transfer"])
 
+        logger.info("Performing land registry matching")
         land_registry_matches = []
         for _, match in tqdm(self.matched_addresses.iterrows(), total=len(self.matched_addresses)):
             # Filter land registry on the postcode

From aca7e6935ea72f3cfb869ace1e529b5b95e9973c Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 19 Aug 2024 11:52:12 +0100
Subject: [PATCH 117/182] working on filtering methodology

---
 etl/ownership/Ownership.py                    | 155 ++++++++++++++++--
 .../projects/midlands_portfolio/app.py        |  17 +-
 2 files changed, 159 insertions(+), 13 deletions(-)

diff --git a/etl/ownership/Ownership.py b/etl/ownership/Ownership.py
index 0bbb4689..122c36e5 100644
--- a/etl/ownership/Ownership.py
+++ b/etl/ownership/Ownership.py
@@ -54,7 +54,10 @@ class Ownership:
         domestic_ownership_path: str,
         overseas_ownership_path: str,
         land_registry_path: str,
-        project_name: str
+        project_name: str,
+        bucket: str,
+        average_property_value: float,
+        portfolio_value: float
     ):
         """
 
@@ -67,6 +70,8 @@ class Ownership:
                                         corporate ownership of properties in the UK, where the companies are overseas
         :param land_registry_path: A string that points to the location of the land registry data
         :param project_name: A string that is used to identify the project
+        :param bucket: The name of the s3 bucket where the data will be stored
+        :param average_property_value: The average property value in the area
         """
 
         # All epc paths should end with certificates.csv
@@ -78,13 +83,23 @@ class Ownership:
         self.land_registry_path = land_registry_path
 
         self.run_timestamp = str(datetime.now())
+        self.project_name = project_name
+        self.bucket = bucket
+
+        self.average_property_value = average_property_value
+        self.portfolio_value = portfolio_value
 
         # Data storage paths
-        self.epc_data_filepath = f"ownership/{project_name}/{self.run_timestamp}/epc_data.xlsx"
+        self.epc_data_filepath = f"ownership/{self.project_name}/{self.run_timestamp}/epc_data.xlsx"
         self.filtered_land_registry_filepath = (
-            f"ownership/{project_name}/{self.run_timestamp}/filtered_land_registry.xlsx"
+            f"ownership/{self.project_name}/{self.run_timestamp}/filtered_land_registry.xlsx"
+        )
+        self.matched_addresses_pre_filter_filepath = (
+            f"ownership/{self.project_name}/{self.run_timestamp}/matched_addresses_pre_filter.xlsx"
+        )
+        self.combined_matching_lookup_pre_filter_filepath = (
+            f"ownership/{self.project_name}/{self.run_timestamp}/combined_matching_lookup_pre_filter.xlsx"
         )
-
         # Data
         self.epc_data = None
         self.ownership_data = None
@@ -99,8 +114,40 @@ class Ownership:
         self.matched_addresses = None
         self.land_registry_matches = None
 
-    def pipeline(self):
-        pass
+    def pipeline(self, column_filters=None):
+        """
+        Runs the full ownership process
+        :param column_filters: Dictionary with column names as keys and list of acceptable values as values. This
+                                 dictionary is is used to filter the EPC data and should look like this:
+                                {"column_name": ["value1", "value2", ...]}, where column_name is the name of the column
+                                in the EPC data and ["value1", "value2", ...] is a list of acceptable values for that
+                                column. If a column is not found in the EPC data, an exception is raised.
+        """
+        # Step 1: Get EPC data
+        self.source_epc_properties(column_filters=column_filters)
+
+        # Step 2: Get company ownership data
+        self.load_company_ownership()
+
+        # Step 3: Prepare data for matching
+        self.prepare_for_matching()
+
+        # Step 4: Match EPC data to ownership data
+        self.match()
+
+        # Step 5: Match land registry data to existing matches
+        self.match_with_land_registry()
+        # We store this data in s3 before we perform any filtering
+        save_excel_to_s3(
+            df=self.matched_addresses,
+            bucket_name=self.bucket,
+            file_key=self.matched_addresses_pre_filter_filepath
+        )
+        save_excel_to_s3(
+            df=self.combined_matching_lookup,
+            bucket_name=self.bucket,
+            file_key=self.combined_matching_lookup_pre_filter_filepath
+        )
 
     def source_epc_properties(self, column_filters=None):
         """
@@ -139,7 +186,7 @@ class Ownership:
         # We now store the data in s3
         save_excel_to_s3(
             df=self.epc_data,
-            bucket_name="epc_data",
+            bucket_name=self.bucket,
             file_key=self.epc_data_filepath
         )
 
@@ -169,7 +216,8 @@ class Ownership:
         """
 
         logger.info("Preparing data for matching")
-        # Now we filter properties the other way around
+        # Now we filter properties the other way around, since the ownership data might not have all of the
+        # postcodes that appear in the EPC data
         self.epc_data = self.epc_data[
             self.epc_data["POSTCODE"].str.lower().isin(self.ownership_data["Postcode"].str.lower().unique())
         ]
@@ -468,6 +516,8 @@ class Ownership:
                     }
                 )
 
+        logger.info("Matching complete - creating lookup tables")
+
         self.freehold_matching_lookup = pd.DataFrame(freehold_matching_lookup)
         self.leasehold_matching_lookup = pd.DataFrame(leasehold_matching_lookup)
 
@@ -540,6 +590,8 @@ class Ownership:
             .str.replace(",", "")
         )
 
+        logger.info("Successfully completed matching")
+
     def get_land_registry(self):
         """
         This function reads in the land registry data and filters it on the postcodes found in the EPC data
@@ -573,7 +625,7 @@ class Ownership:
         # Store this fitereed version in s3
         save_excel_to_s3(
             df=self.land_registry,
-            bucket_name="epc_data",
+            bucket_name=self.bucket,
             file_key=self.filtered_land_registry_filepath,
         )
 
@@ -780,6 +832,7 @@ class Ownership:
 
         self.land_registry_matches = pd.DataFrame(land_registry_matches)
 
+        logger.info("Sucessfully completed land registry matching - merging onto matched_addresses")
         # Merge onto the EPC - ownership matches
         self.matched_addresses = self.matched_addresses.merge(
             land_registry_matches,
@@ -803,5 +856,85 @@ class Ownership:
             (self.matched_addresses["TRANSACTION_TYPE"].isin(["marketed sale", "non marketed sale"]))
         )
 
-    def filter_matches(self):
-        pass
+    def aggregate_matches(self, matching_lookup, company_ownership, properties):
+        df = matching_lookup.merge(
+            company_ownership, how="left", on="Title Number"
+        ).merge(
+            properties[["UPRN", "LOCAL_AUTHORITY_LABEL"]], how="left", on="UPRN"
+        )
+        counts = (
+            df.groupby(["Company Registration No. (1)", "LOCAL_AUTHORITY_LABEL"])["UPRN"]
+            .count()
+            .reset_index(name="number_of_properties")
+        )
+        counts = counts.sort_values("number_of_properties", ascending=False)
+
+        pivot_counts = counts.pivot_table(
+            index=["Company Registration No. (1)"],  # Rows: companies and proprietors
+            columns="LOCAL_AUTHORITY_LABEL",  # Columns: each local authority
+            values="number_of_properties",  # The counts of properties
+            fill_value=0  # Fill missing values with 0 (where there are no properties owned)
+        ).reset_index()
+
+        total_counts = (
+            df.groupby(["Company Registration No. (1)"])["UPRN"]
+            .count()
+            .reset_index(name="total_number_of_properties")
+        )
+
+        # We have cases where the same company registration number results in the same company name, so we produce a
+        # best
+        # name per company registration number
+        best_names = (
+            df.groupby(["Company Registration No. (1)"])["Proprietor Name (1)"]
+            .first()
+            .reset_index()
+        )
+
+        total_counts = best_names.merge(
+            total_counts, how="left", on=["Company Registration No. (1)"]
+        )
+
+        pivot_counts = pivot_counts.merge(
+            total_counts, how="left", on=["Company Registration No. (1)"]
+        )
+
+        pivot_counts = pivot_counts.sort_values("total_number_of_properties", ascending=False)
+
+        pivot_counts["approx_value"] = self.average_property_value * pivot_counts["total_number_of_properties"]
+        pivot_counts["cumulative_value"] = pivot_counts["approx_value"].cumsum()
+
+        return pivot_counts
+
+    def create_final_matches(self):
+        """
+        Given the matching to this point, this method creates the final matching tables
+        :return:
+        """
+        logger.info("Creating final matches")
+        matched_addresses_final = self.matched_addresses[
+            ~self.matched_addresses["sold_recently"] &
+            ~self.matched_addresses["sale_lodged_recently"]
+            ]
+
+        # Filter combined_matching_lookup accordingly
+        combined_matching_lookup_final = self.combined_matching_lookup[
+            self.combined_matching_lookup["UPRN"].isin(self.combined_matching_lookup["UPRN"])
+        ]
+
+        combined_aggregate = self.aggregate_matches(
+            matching_lookup=combined_matching_lookup_final,
+            company_ownership=self.ownership_data,
+            properties=self.epc_paths
+        )
+
+        investment_owners = combined_aggregate[combined_aggregate["cumulative_value"] <= self.portfolio_value]
+
+        investment_properties = matched_addresses_final[
+            matched_addresses_final["Company Registration No. (1)"].isin(
+                investment_owners["Company Registration No. (1)"])
+        ]
+
+        portfolio_epc_data = self.epc_data[self.epc_data["UPRN"].isin(investment_properties["UPRN"])]
+
+        #
diff --git a/etl/ownership/projects/midlands_portfolio/app.py b/etl/ownership/projects/midlands_portfolio/app.py
index d370ba1e..17baed07 100644
--- a/etl/ownership/projects/midlands_portfolio/app.py
+++ b/etl/ownership/projects/midlands_portfolio/app.py
@@ -49,16 +49,29 @@ OVERSEAS_OWNERSHIP_PATH = "/Users/khalimconn-kowlessar/Downloads/OCOD_FULL_2024_
 LAND_REGISTRY_PATH = "/Users/khalimconn-kowlessar/Downloads/pp-complete.csv"
 
 PROJECT_NAME = "Midlands Portfolio"
+DATA_BUCKET = "retrofit-data-dev"
+
+# We use this as a rough figure, which helps us shape the portfolio
+PROPERTY_VALUE_ESTIMATE = 200_000
+# We want a 50m portfolio, but we create a bigger portfolio that needed, since properties will be filtered out
+PORTFOLIO_VALUE = 75_000_000
 
 
 def app():
+    epc_column_filters = {
+        "CURRENT_ENERGY_RATING": ["F", "G"]
+    }
+
     ownership_instance = Ownership(
         epc_paths=EPC_PATHS,
         domestic_ownership_path=DOMESTIC_OWNERSHIP_PATH,
         overseas_ownership_path=OVERSEAS_OWNERSHIP_PATH,
         land_registry_path=LAND_REGISTRY_PATH,
-        project_name=PROJECT_NAME
+        project_name=PROJECT_NAME,
+        bucket=DATA_BUCKET,
+        average_property_value=PROPERTY_VALUE_ESTIMATE,
+        portfolio_value=PORTFOLIO_VALUE
     )
-    ownership_instance.pipeline()
+    ownership_instance.pipeline(column_filters=epc_column_filters)
 
     # TODO: Create portfolio and payload

From 0db2f592305f6af112a94beb71897c75279dfc79 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 19 Aug 2024 12:07:18 +0100
Subject: [PATCH 118/182] set up structure

---
 etl/ownership/Ownership.py    | 62 ++++++++++++++++++++++++++++++++---
 etl/spatial/OpenUprnClient.py | 59 ++++++++++++++++++++++++++-------
 2 files changed, 105 insertions(+), 16 deletions(-)

diff --git a/etl/ownership/Ownership.py b/etl/ownership/Ownership.py
index 122c36e5..cfa3e3b3 100644
--- a/etl/ownership/Ownership.py
+++ b/etl/ownership/Ownership.py
@@ -7,6 +7,7 @@ import re
 from utils.s3 import save_excel_to_s3
 from utils.logger import setup_logger
 from backend.SearchEpc import SearchEpc
+from etl.spatial.OpenUprnClient import OpenUprnClient
 
 logger = setup_logger()
 
@@ -100,6 +101,12 @@ class Ownership:
         self.combined_matching_lookup_pre_filter_filepath = (
             f"ownership/{self.project_name}/{self.run_timestamp}/combined_matching_lookup_pre_filter.xlsx"
         )
+        # Final output paths
+        self.portfolio_owners_filepath = f"ownership/{self.project_name}/{self.run_timestamp}/portfolio_owners.xlsx"
+        self.portfolio_properties_filepath = (
+            f"ownership/{self.project_name}/{self.run_timestamp}/portfolio_properties.xlsx"
+        )
+
         # Data
         self.epc_data = None
         self.ownership_data = None
@@ -114,6 +121,11 @@ class Ownership:
         self.matched_addresses = None
         self.land_registry_matches = None
 
+        # Final outputs data
+        self.portfolio_owners = None
+        self.portfolio_properties = None
+        self.portfolio_epc_data = None
+
     def pipeline(self, column_filters=None):
         """
         Runs the full ownership process
@@ -917,24 +929,64 @@ class Ownership:
             ~self.matched_addresses["sale_lodged_recently"]
             ]
 
+        logger.info("Performing conservation area and listed/herigage building filtering")
+
+        portfolio_spatial_data = OpenUprnClient.get_spatial_data(
+            self.epc_data["UPRN"].tolist(), bucket_name="retrofit-data-dev"
+        )
+
+        portfolio_spatial_data = portfolio_spatial_data[
+            ["UPRN", "conservation_status", "is_listed_building", "is_heritage_building"]
+        ]
+
+        # Filter matched_addresses_final and filter combined_matching_lookup_final
+        matched_addresses_final = matched_addresses_final.merge(
+            portfolio_spatial_data, how="left", on="UPRN"
+        )
+        matched_addresses_final = matched_addresses_final[
+            ~matched_addresses_final["conservation_status"] &
+            ~matched_addresses_final["is_listed_building"] &
+            ~matched_addresses_final["is_heritage_building"]
+            ]
+
         # Filter combined_matching_lookup accordingly
         combined_matching_lookup_final = self.combined_matching_lookup[
             self.combined_matching_lookup["UPRN"].isin(self.combined_matching_lookup["UPRN"])
         ]
 
+        # Roll up portfolio
         combined_aggregate = self.aggregate_matches(
             matching_lookup=combined_matching_lookup_final,
             company_ownership=self.ownership_data,
             properties=self.epc_paths
         )
 
-        investment_owners = combined_aggregate[combined_aggregate["cumulative_value"] <= self.portfolio_value]
+        self.portfolio_owners = combined_aggregate[combined_aggregate["cumulative_value"] <= self.portfolio_value]
 
-        investment_properties = matched_addresses_final[
+        self.portfolio_properties = matched_addresses_final[
             matched_addresses_final["Company Registration No. (1)"].isin(
-                investment_owners["Company Registration No. (1)"])
+                self.portfolio_owners["Company Registration No. (1)"]
+            )
         ]
 
-        portfolio_epc_data = self.epc_data[self.epc_data["UPRN"].isin(investment_properties["UPRN"])]
+        self.portfolio_epc_data = self.epc_data[self.epc_data["UPRN"].isin(self.portfolio_properties["UPRN"])]
 
-        #
+        logger.info("Storing final outptus")
+        # Store data
+        save_excel_to_s3(
+            df=self.portfolio_owners,
+            bucket_name=self.bucket,
+            file_key=self.portfolio_owners_filepath,
+        )
+
+        save_excel_to_s3(
+            df=self.portfolio_properties,
+            bucket_name=self.bucket,
+            file_key=self.portfolio_properties_filepath,
+        )
+
+        save_excel_to_s3(
+            df=self.portfolio_epc_data,
+            bucket_name=self.bucket,
+            file_key=self.portfolio_epc_data_filepath,
+        )
diff --git a/etl/spatial/OpenUprnClient.py b/etl/spatial/OpenUprnClient.py
index 198f9945..11827f8d 100644
--- a/etl/spatial/OpenUprnClient.py
+++ b/etl/spatial/OpenUprnClient.py
@@ -119,7 +119,28 @@ class OpenUprnClient:
         )
 
     @staticmethod
-    def set_spatial_data(input_properties: list[Property], bucket_name):
+    def make_uprn_map(uprns, uprn_filenames):
+        """
+        Given a list of UPRNs, this method will return a map of the UPRN to the filename that the UPRN is contained in
+        :param uprns: List of UPRNs
+        :param uprn_filenames: Lookup from UPRN range to filename
+        :return:
+        """
+        uprn_map = {}
+        for uprn in uprns:
+            filtered_df = uprn_filenames[
+                (uprn_filenames["lower"] <= int(uprn))
+                & (uprn_filenames["upper"] >= int(uprn))
+                ]
+            if filtered_df["filenames"].values[0] in uprn_map:
+                uprn_map[filtered_df["filenames"].values[0]].append(int(uprn))
+            else:
+                uprn_map[filtered_df["filenames"].values[0]] = [int(uprn)]
+
+        return uprn_map
+
+    @classmethod
+    def set_spatial_data(cls, input_properties: list[Property], bucket_name):
         """
         Given a list of properties, this method will set the spatial data for each property
         The method will look for the minimal set of uprn datasets that it needs to read in to get all of the spatial
@@ -130,16 +151,8 @@ class OpenUprnClient:
             bucket_name=bucket_name, file_key="spatial/filename_meta.parquet"
         )
 
-        uprn_map = {}
-        for uprn in [p.uprn for p in input_properties]:
-            filtered_df = uprn_filenames[
-                (uprn_filenames["lower"] <= int(uprn))
-                & (uprn_filenames["upper"] >= int(uprn))
-                ]
-            if filtered_df["filenames"].values[0] in uprn_map:
-                uprn_map[filtered_df["filenames"].values[0]].append(int(uprn))
-            else:
-                uprn_map[filtered_df["filenames"].values[0]] = [int(uprn)]
+        uprns = [p.uprn for p in input_properties]
+        uprn_map = cls.make_uprn_map(uprns, uprn_filenames)
 
         for filename, associated_uprn in tqdm(uprn_map.items(), total=len(uprn_map)):
             # Read in the file
@@ -158,3 +171,27 @@ class OpenUprnClient:
                 raise Exception(f"Property with UPRN {p.uprn} does not have spatial data")
 
         return input_properties
+
+    @classmethod
+    def get_spatial_data(cls, uprns: list[int], bucket_name):
+        """
+        Similar method to set_spatial_data, but designed to work more generally on a list of uprns
+        :return:
+        """
+        uprn_filenames = read_dataframe_from_s3_parquet(
+            bucket_name=bucket_name, file_key="spatial/filename_meta.parquet"
+        )
+
+        uprn_map = cls.make_uprn_map(uprns, uprn_filenames)
+
+        uprn_spatial_table = []
+        for filename, associated_uprn in tqdm(uprn_map.items(), total=len(uprn_map)):
+            # Read in the file
+            spatial_data = read_dataframe_from_s3_parquet(
+                bucket_name="retrofit-data-dev", file_key=f"spatial/{filename}"
+            )
+
+            spatial_df = spatial_data[spatial_data["UPRN"].isin(associated_uprn)]
+            uprn_spatial_table.append(spatial_df)
+
+        return pd.concat(uprn_spatial_table)

From 36b18876d66687997e17040ec10b7feb64414a64 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 19 Aug 2024 13:30:32 +0100
Subject: [PATCH 119/182] Added excluded owners

---
 etl/ownership/Ownership.py                       | 11 ++++++++++-
 etl/ownership/config.py                          |  5 +++++
 etl/ownership/projects/midlands_portfolio/app.py |  4 +++-
 3 files changed, 18 insertions(+), 2 deletions(-)
 create mode 100644 etl/ownership/config.py

diff --git a/etl/ownership/Ownership.py b/etl/ownership/Ownership.py
index cfa3e3b3..25ba7cea 100644
--- a/etl/ownership/Ownership.py
+++ b/etl/ownership/Ownership.py
@@ -58,7 +58,8 @@ class Ownership:
         project_name: str,
         bucket: str,
         average_property_value: float,
-        portfolio_value: float
+        portfolio_value: float,
+        excluded_owners: List[str] = None,
     ):
         """
 
@@ -83,6 +84,8 @@ class Ownership:
         self.overseas_ownership_path = overseas_ownership_path
         self.land_registry_path = land_registry_path
 
+        self.excluded_owners = [] if excluded_owners is None else excluded_owners
+
         self.run_timestamp = str(datetime.now())
         self.project_name = project_name
         self.bucket = bucket
@@ -221,6 +224,12 @@ class Ownership:
             self.ownership_data["Postcode"].str.lower().isin(self.epc_data["POSTCODE"].str.lower().unique())
         ]
 
+        logger.info("Removing excluded owners")
+        # Use the company registration number to filter out excluded owners
+        self.ownership_data = self.ownership_data[
+            ~self.ownership_data["Company Registration No. (1)"].astype(str).isin(self.excluded_owners)
+        ]
+
     def prepare_for_matching(self):
         """
         Given the epc properties and the ownership data, this function performs a number of operations on both datasets
diff --git a/etl/ownership/config.py b/etl/ownership/config.py
new file mode 100644
index 00000000..c737d532
--- /dev/null
+++ b/etl/ownership/config.py
@@ -0,0 +1,5 @@
+# These are the registration numbers for companies we've heard a reponse from, and cannot sell
+OWNERS_WHO_CANT_SELL = [
+    # Al Rayan
+    "4483430"
+]
diff --git a/etl/ownership/projects/midlands_portfolio/app.py b/etl/ownership/projects/midlands_portfolio/app.py
index 17baed07..5a4cf3f3 100644
--- a/etl/ownership/projects/midlands_portfolio/app.py
+++ b/etl/ownership/projects/midlands_portfolio/app.py
@@ -1,4 +1,5 @@
 from etl.ownership.Ownership import Ownership
+from etl.ownership.config import OWNERS_WHO_CANT_SELL as EXCLUDED_OWNERS
 
 # Set up the project configuration
 USER_IDS = [
@@ -70,7 +71,8 @@ def app():
         project_name=PROJECT_NAME,
         bucket=DATA_BUCKET,
         average_property_value=PROPERTY_VALUE_ESTIMATE,
-        portfolio_value=PORTFOLIO_VALUE
+        portfolio_value=PORTFOLIO_VALUE,
+        EXCLUDED_OWNERS=EXCLUDED_OWNERS
     )
     ownership_instance.pipeline(column_filters=epc_column_filters)
 

From c48fb674ded97a44815eb3511a2c520f06789370 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 19 Aug 2024 13:43:18 +0100
Subject: [PATCH 120/182] Added PortfolioUsers table

---
 backend/app/db/models/portfolio.py               | 10 ++++++++++
 etl/ownership/projects/midlands_portfolio/app.py | 10 +++++++---
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/backend/app/db/models/portfolio.py b/backend/app/db/models/portfolio.py
index 5ac092a7..8a22e98a 100644
--- a/backend/app/db/models/portfolio.py
+++ b/backend/app/db/models/portfolio.py
@@ -205,3 +205,13 @@ class PropertyTargetsModel(Base):
     created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
     epc = Column(Enum(Epc))
     heat_demand = Column(Text)
+
+
+class PortfolioUsers(Base):
+    __table_args__ = "portfolioUsers"
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    userId = Column(Integer, ForeignKey('user.id'), nullable=False)
+    portfolioId = Column(Integer, ForeignKey('portfolio.id'), nullable=False)
+    role = Column(Text, nullable=False)
+    created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
+    updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
diff --git a/etl/ownership/projects/midlands_portfolio/app.py b/etl/ownership/projects/midlands_portfolio/app.py
index 5a4cf3f3..8cad3c3e 100644
--- a/etl/ownership/projects/midlands_portfolio/app.py
+++ b/etl/ownership/projects/midlands_portfolio/app.py
@@ -1,3 +1,5 @@
+from sqlalchemy.orm import sessionmaker
+from backend.app.db.connection import db_engine
 from etl.ownership.Ownership import Ownership
 from etl.ownership.config import OWNERS_WHO_CANT_SELL as EXCLUDED_OWNERS
 
@@ -8,7 +10,6 @@ USER_IDS = [
     5,  # Anna
     30,  # Patricia
 ]
-PORTFOLIO_ID = None
 
 EPC_PATHS = [
     "local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv",
@@ -72,8 +73,11 @@ def app():
         bucket=DATA_BUCKET,
         average_property_value=PROPERTY_VALUE_ESTIMATE,
         portfolio_value=PORTFOLIO_VALUE,
-        EXCLUDED_OWNERS=EXCLUDED_OWNERS
+        excluded_owners=EXCLUDED_OWNERS
     )
     ownership_instance.pipeline(column_filters=epc_column_filters)
 
-    # TODO: Create portfolio and payload
+    session = sessionmaker(bind=db_engine)()
+    session.begin()
+
+    # Create the project, if a portfolio doesn't exist for the project name

From 46c7a7c0d1ccf525a784879668d1723230bccff8 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 19 Aug 2024 13:46:03 +0100
Subject: [PATCH 121/182] added create_sfr_portfolio

---
 .../projects/midlands_portfolio/app.py        | 47 +++++++++++++++++--
 1 file changed, 44 insertions(+), 3 deletions(-)

diff --git a/etl/ownership/projects/midlands_portfolio/app.py b/etl/ownership/projects/midlands_portfolio/app.py
index 8cad3c3e..2165dc94 100644
--- a/etl/ownership/projects/midlands_portfolio/app.py
+++ b/etl/ownership/projects/midlands_portfolio/app.py
@@ -1,5 +1,7 @@
 from sqlalchemy.orm import sessionmaker
+from sqlalchemy.orm.exc import NoResultFound
 from backend.app.db.connection import db_engine
+from backend.app.db.models.portfolio import Portfolio, PortfolioUsers
 from etl.ownership.Ownership import Ownership
 from etl.ownership.config import OWNERS_WHO_CANT_SELL as EXCLUDED_OWNERS
 
@@ -59,6 +61,48 @@ PROPERTY_VALUE_ESTIMATE = 200_000
 PORTFOLIO_VALUE = 75_000_000
 
 
+def create_sfr_portfolio(project_name, user_ids):
+    session = sessionmaker(bind=db_engine)()
+    session.begin()
+
+    # Check for an existing portfolio by name
+    try:
+        portfolio = session.query(Portfolio).filter_by(name=project_name).one()
+    except NoResultFound:
+        portfolio = None
+
+    if portfolio:
+        # Fetch the associated users
+        existing_user_ids = {
+            pu.userId for pu in session.query(PortfolioUsers.userId).filter_by(portfolioId=portfolio.id)
+        }
+
+        # Check if the specified user_ids match any existing associations
+        if existing_user_ids.intersection(set(user_ids)):
+            print("Portfolio already exists under this name, for specified users.")
+        else:
+            print("Portfolio already exists under this name, for different users.")
+            return None  # Optional: You could also update the user associations here if needed
+
+        return portfolio  # Return the existing portfolio data
+
+    # If portfolio does not exist, create a new one
+    new_portfolio = Portfolio(name=project_name)
+    session.add(new_portfolio)
+    session.flush()  # Ensures that 'id' is available before committing if needed
+
+    # Create new user associations in PortfolioUsers
+    for user_id in user_ids:
+        new_association = PortfolioUsers(userId=user_id, portfolioId=new_portfolio.id)
+        session.add(new_association)
+
+    session.commit()
+    print(f"New portfolio created with ID: {new_portfolio.id}")
+    session.close()
+
+    return new_portfolio
+
+
 def app():
     epc_column_filters = {
         "CURRENT_ENERGY_RATING": ["F", "G"]
@@ -77,7 +121,4 @@ def app():
     )
     ownership_instance.pipeline(column_filters=epc_column_filters)
 
-    session = sessionmaker(bind=db_engine)()
-    session.begin()
-
     # Create the project, if a portfolio doesn't exist for the project name

From 34464267dcea14ac20e643988580721e43ac1851 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 19 Aug 2024 13:48:18 +0100
Subject: [PATCH 122/182] added get_asset_list

---
 etl/ownership/Ownership.py                    | 19 +++++++++++++++++++
 .../projects/midlands_portfolio/app.py        |  3 +++
 2 files changed, 22 insertions(+)

diff --git a/etl/ownership/Ownership.py b/etl/ownership/Ownership.py
index 25ba7cea..8221f8f7 100644
--- a/etl/ownership/Ownership.py
+++ b/etl/ownership/Ownership.py
@@ -109,6 +109,9 @@ class Ownership:
         self.portfolio_properties_filepath = (
             f"ownership/{self.project_name}/{self.run_timestamp}/portfolio_properties.xlsx"
         )
+        self.portfolio_epc_data_filepath = (
+            f"ownership/{self.project_name}/{self.run_timestamp}/portfolio_epc_data.xlsx"
+        )
 
         # Data
         self.epc_data = None
@@ -999,3 +1002,19 @@ class Ownership:
             bucket_name=self.bucket,
             file_key=self.portfolio_epc_data_filepath,
         )
+
+    def get_asset_list(self):
+        """
+        From the EPC data, creates the asset list
+        :return:
+        """
+
+        asset_list = self.portfolio_epc_data[["UPRN", "ADDRESS1", "POSTCODE"]].copy().rename(
+            columns={
+                "UPRN": "uprn",
+                "ADDRESS1": "address",
+                "POSTCODE": "postcode"
+            }
+        )
+
+        return asset_list
diff --git a/etl/ownership/projects/midlands_portfolio/app.py b/etl/ownership/projects/midlands_portfolio/app.py
index 2165dc94..905ff0e4 100644
--- a/etl/ownership/projects/midlands_portfolio/app.py
+++ b/etl/ownership/projects/midlands_portfolio/app.py
@@ -122,3 +122,6 @@ def app():
     ownership_instance.pipeline(column_filters=epc_column_filters)
 
     # Create the project, if a portfolio doesn't exist for the project name
+
+    # Create the asset list and the body of the portfolio
+    asset_list = ownership_instance.get_asset_list()

From 308e028605d3c4ea60e70fd8a2112594c8a10b90 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 19 Aug 2024 16:54:19 +0100
Subject: [PATCH 123/182] fixing dupes

---
 backend/app/db/models/portfolio.py |  2 +-
 etl/ownership/Ownership.py         | 30 +++++++++++++++++-------------
 2 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/backend/app/db/models/portfolio.py b/backend/app/db/models/portfolio.py
index 8a22e98a..f1355197 100644
--- a/backend/app/db/models/portfolio.py
+++ b/backend/app/db/models/portfolio.py
@@ -208,7 +208,7 @@ class PropertyTargetsModel(Base):
 
 
 class PortfolioUsers(Base):
-    __table_args__ = "portfolioUsers"
+    __tablename__ = "portfolioUsers"
     id = Column(Integer, primary_key=True, autoincrement=True)
     userId = Column(Integer, ForeignKey('user.id'), nullable=False)
     portfolioId = Column(Integer, ForeignKey('portfolio.id'), nullable=False)
diff --git a/etl/ownership/Ownership.py b/etl/ownership/Ownership.py
index 8221f8f7..9e328452 100644
--- a/etl/ownership/Ownership.py
+++ b/etl/ownership/Ownership.py
@@ -167,6 +167,9 @@ class Ownership:
             file_key=self.combined_matching_lookup_pre_filter_filepath
         )
 
+        # Prepare the final outputs:
+        self.create_final_matches()
+
     def source_epc_properties(self, column_filters=None):
         """
         This function will filter the epc data as specified by column filters, searching across all of the EPC tables
@@ -556,22 +559,22 @@ class Ownership:
         self.shared_freehold_match = shared_freehold_match
 
         # finally, we create matched addresses
-        combined_matching_lookup = pd.concat([self.freehold_matching_lookup, self.leasehold_matching_lookup])
+        self.combined_matching_lookup = pd.concat([self.freehold_matching_lookup, self.leasehold_matching_lookup])
 
         # Remove duplicates
-        combined_matching_lookup = self.remove_duplicate_matches(
-            matching_lookup=combined_matching_lookup,
+        self.combined_matching_lookup = self.remove_duplicate_matches(
+            matching_lookup=self.combined_matching_lookup,
             properties=self.epc_data,
             company_ownership=self.ownership_data
         )
         # We also have duplicates at a UPRN level
         self.combined_matching_lookup = self.remove_duplicate_uprn_matches(
-            matching_lookup=combined_matching_lookup,
+            matching_lookup=self.combined_matching_lookup,
             properties=self.epc_data,
             company_ownership=self.ownership_data
         )
 
-        self.matched_addresses = combined_matching_lookup.merge(
+        self.matched_addresses = self.combined_matching_lookup.merge(
             self.epc_data[
                 [
                     "UPRN",
@@ -859,7 +862,7 @@ class Ownership:
         logger.info("Sucessfully completed land registry matching - merging onto matched_addresses")
         # Merge onto the EPC - ownership matches
         self.matched_addresses = self.matched_addresses.merge(
-            land_registry_matches,
+            self.land_registry_matches,
             how="left",
             left_on="UPRN",
             right_on="uprn"
@@ -944,21 +947,22 @@ class Ownership:
         logger.info("Performing conservation area and listed/herigage building filtering")
 
         portfolio_spatial_data = OpenUprnClient.get_spatial_data(
-            self.epc_data["UPRN"].tolist(), bucket_name="retrofit-data-dev"
+            matched_addresses_final["UPRN"].unique().tolist(), bucket_name="retrofit-data-dev"
         )
 
         portfolio_spatial_data = portfolio_spatial_data[
             ["UPRN", "conservation_status", "is_listed_building", "is_heritage_building"]
-        ]
+        ].copy()
+        portfolio_spatial_data["UPRN"] = portfolio_spatial_data["UPRN"].astype(str)
 
         # Filter matched_addresses_final and filter combined_matching_lookup_final
         matched_addresses_final = matched_addresses_final.merge(
             portfolio_spatial_data, how="left", on="UPRN"
         )
         matched_addresses_final = matched_addresses_final[
-            ~matched_addresses_final["conservation_status"] &
-            ~matched_addresses_final["is_listed_building"] &
-            ~matched_addresses_final["is_heritage_building"]
+            matched_addresses_final["conservation_status"].isin([None, False]) &
+            matched_addresses_final["is_listed_building"].isin([None, False]) &
+            matched_addresses_final["is_heritage_building"].isin([None, False])
             ]
 
         # Filter combined_matching_lookup accordingly
@@ -970,7 +974,7 @@ class Ownership:
         combined_aggregate = self.aggregate_matches(
             matching_lookup=combined_matching_lookup_final,
             company_ownership=self.ownership_data,
-            properties=self.epc_paths
+            properties=self.epc_data
         )
 
         self.portfolio_owners = combined_aggregate[combined_aggregate["cumulative_value"] <= self.portfolio_value]
@@ -983,7 +987,7 @@ class Ownership:
 
         self.portfolio_epc_data = self.epc_data[self.epc_data["UPRN"].isin(self.portfolio_properties["UPRN"])]
 
-        logger.info("Storing final outptus")
+        logger.info("Storing final outpus")
         # Store data
         save_excel_to_s3(
             df=self.portfolio_owners,

From 1fda0dc2375e132ac4f58989e8536e0c9e780933 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 19 Aug 2024 17:55:14 +0100
Subject: [PATCH 124/182] debugged extract_property_on_site_recommendations

---
 backend/app/db/models/portfolio.py            |  3 +-
 backend/app/plan/router.py                    |  4 +-
 .../projects/midlands_portfolio/app.py        | 99 +++++++++++++------
 3 files changed, 71 insertions(+), 35 deletions(-)

diff --git a/backend/app/db/models/portfolio.py b/backend/app/db/models/portfolio.py
index f1355197..7580a27d 100644
--- a/backend/app/db/models/portfolio.py
+++ b/backend/app/db/models/portfolio.py
@@ -3,6 +3,7 @@ import pytz
 import datetime
 from sqlalchemy import Column, Integer, Text, Boolean, Float, DateTime, Enum, ForeignKey, CheckConstraint
 from sqlalchemy.ext.declarative import declarative_base
+from backend.app.db.models.users import UserModel  # noqa
 
 Base = declarative_base()
 
@@ -210,7 +211,7 @@ class PropertyTargetsModel(Base):
 class PortfolioUsers(Base):
     __tablename__ = "portfolioUsers"
     id = Column(Integer, primary_key=True, autoincrement=True)
-    userId = Column(Integer, ForeignKey('user.id'), nullable=False)
+    user_id = Column(Integer, ForeignKey('user.id'), nullable=False)
     portfolioId = Column(Integer, ForeignKey('portfolio.id'), nullable=False)
     role = Column(Text, nullable=False)
     created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 25e41e52..23d3f5d2 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -313,7 +313,7 @@ def get_on_site_data(body: PlanTriggerRequest):
 
 
 def extract_property_on_site_recommendations(config, patches, already_installed, non_invasive_recommendations, uprn):
-    patch_has_uprn = "uprn" in patches[0]
+    patch_has_uprn = "uprn" in patches[0] if patches else True
     if patch_has_uprn:
         patch = next((
             x for x in patches if str(x["uprn"]) == str(config["uprn"])
@@ -330,7 +330,7 @@ def extract_property_on_site_recommendations(config, patches, already_installed,
 
     # Because we have some non-invasive recommendations that match on address and postcode, but not UPRN
     # we need to check existence of uprn
-    has_uprn = "uprn" in non_invasive_recommendations[0]
+    has_uprn = "uprn" in non_invasive_recommendations[0] if non_invasive_recommendations else True
     if has_uprn:
         property_non_invasive_recommendations = next((
             x for x in non_invasive_recommendations if
diff --git a/etl/ownership/projects/midlands_portfolio/app.py b/etl/ownership/projects/midlands_portfolio/app.py
index 905ff0e4..bf18d846 100644
--- a/etl/ownership/projects/midlands_portfolio/app.py
+++ b/etl/ownership/projects/midlands_portfolio/app.py
@@ -1,9 +1,9 @@
 from sqlalchemy.orm import sessionmaker
-from sqlalchemy.orm.exc import NoResultFound
 from backend.app.db.connection import db_engine
 from backend.app.db.models.portfolio import Portfolio, PortfolioUsers
 from etl.ownership.Ownership import Ownership
 from etl.ownership.config import OWNERS_WHO_CANT_SELL as EXCLUDED_OWNERS
+from utils.s3 import save_csv_to_s3
 
 # Set up the project configuration
 USER_IDS = [
@@ -61,46 +61,51 @@ PROPERTY_VALUE_ESTIMATE = 200_000
 PORTFOLIO_VALUE = 75_000_000
 
 
-def create_sfr_portfolio(project_name, user_ids):
+def create_sfr_portfolio(project_name, user_ids, status, goal):
     session = sessionmaker(bind=db_engine)()
-    session.begin()
-
-    # Check for an existing portfolio by name
     try:
-        portfolio = session.query(Portfolio).filter_by(name=project_name).one()
-    except NoResultFound:
-        portfolio = None
+        session.begin()
 
-    if portfolio:
-        # Fetch the associated users
-        existing_user_ids = {
-            pu.userId for pu in session.query(PortfolioUsers.userId).filter_by(portfolioId=portfolio.id)
-        }
+        # Check for an existing portfolio by name
+        portfolio = session.query(Portfolio).filter_by(name=project_name).one_or_none()
 
-        # Check if the specified user_ids match any existing associations
-        if existing_user_ids.intersection(set(user_ids)):
-            print("Portfolio already exists under this name, for specified users.")
-        else:
-            print("Portfolio already exists under this name, for different users.")
-            return None  # Optional: You could also update the user associations here if needed
+        if portfolio:
+            # Fetch the associated users
+            existing_user_ids = {
+                pu.user_id for pu in session.query(PortfolioUsers.user_id).filter_by(portfolioId=portfolio.id)
+            }
 
-        return portfolio  # Return the existing portfolio data
+            # Check if the specified user_ids match any existing associations
+            if existing_user_ids.intersection(set(user_ids)):
+                print("Portfolio already exists under this name, for specified users.")
+            else:
+                print("Portfolio already exists under this name, for different users.")
+                session.rollback()  # No changes to be committed
+                return None  # Optional: You could also update the user associations here if needed
 
-    # If portfolio does not exist, create a new one
-    new_portfolio = Portfolio(name=project_name)
-    session.add(new_portfolio)
-    session.flush()  # Ensures that 'id' is available before committing if needed
+            return portfolio  # Return the existing portfolio data
 
-    # Create new user associations in PortfolioUsers
-    for user_id in user_ids:
-        new_association = PortfolioUsers(userId=user_id, portfolioId=new_portfolio.id)
-        session.add(new_association)
+        # If portfolio does not exist, create a new one with the provided status and goal
+        new_portfolio = Portfolio(name=project_name, status=status, goal=goal)
+        session.add(new_portfolio)
+        session.flush()  # Ensures that 'id' is available before committing if needed
 
-    session.commit()
-    print(f"New portfolio created with ID: {new_portfolio.id}")
-    session.close()
+        # Create new user associations in PortfolioUsers
+        for user_id in user_ids:
+            new_association = PortfolioUsers(user_id=user_id, portfolioId=new_portfolio.id)  # corrected attribute name
+            session.add(new_association)
 
-    return new_portfolio
+        session.commit()
+        print(f"New portfolio created with ID: {new_portfolio.id}")
+        return new_portfolio
+
+    except Exception as e:
+        session.rollback()  # Ensure no partial changes are committed
+        print(f"An error occurred: {e}")
+        raise
+
+    finally:
+        session.close()
 
 
 def app():
@@ -125,3 +130,33 @@ def app():
 
     # Create the asset list and the body of the portfolio
     asset_list = ownership_instance.get_asset_list()
+
+    # Create the portfolio
+    # TODO: Wasn't working
+    # create_sfr_portfolio(project_name=PROJECT_NAME, user_ids=USER_IDS, status="scoping", goal="Increasing EPC")
+
+    portfolio_id = 99
+    user_id = 8
+
+    filename = f"{user_id}/{portfolio_id}/asset_list.csv"
+    save_csv_to_s3(
+        dataframe=asset_list,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    body = {
+        "portfolio_id": str(portfolio_id),
+        "housing_type": "Private",
+        "goal": "Increasing EPC",
+        "goal_value": "C",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": "",
+        "scenario_name": "Hit EPC C",
+        "multi_plan": True,
+        "exclusions": ["fireplace", "floor_insulation"],
+        "budget": None,
+    }
+    print(body)

From 38eaa52e6c5bbc068a4008cd425e7a56966f3936 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 19 Aug 2024 18:09:10 +0100
Subject: [PATCH 125/182] handled bug for dual primary heating

---
 backend/Property.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/backend/Property.py b/backend/Property.py
index 649a9547..f8b40872 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -93,7 +93,7 @@ class Property:
         self.data = {
             k.replace("_", "-"): v for k, v in epc_record.get("prepared_epc").items()
         }
-        
+
         self.old_data = epc_record.get("old_data")
         self.property_dimensions = None
         # This is a list of measures that have already been installed in the property, typically found as a result
@@ -1181,6 +1181,14 @@ class Property:
         self.heating_energy_source = list({
             fuel for key, fuel in heating_fuel_mapping.items() if self.main_heating.get(key, False)
         })
+
+        if set(self.heating_energy_source) == {'Electricity', 'Natural Gas'}:
+            # It means they have mixed heating so we take the primary one, based on main fuel
+            if self.main_fuel["clean_description"] == "Mains gas not community":
+                self.heating_energy_source = ['Natural Gas']
+            else:
+                self.heating_energy_source = ['Electricity']
+
         if len(self.heating_energy_source) == 0 or len(self.heating_energy_source) > 1:
             raise Exception("Investigate me")
 

From 6bd66d83f5f6964206ffe623f4096a749af3176e Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 19 Aug 2024 18:59:21 +0100
Subject: [PATCH 126/182] handling odd heating systems

---
 backend/Property.py                    | 27 ++++++++++++++-----
 backend/app/assumptions.py             | 36 +++++++++++++++++++++++++
 backend/app/plan/router.py             |  8 +++++-
 backend/ml_models/AnnualBillSavings.py | 17 +++++++++++-
 recommendations/Recommendations.py     | 37 +-------------------------
 recommendations/rdsap_tables.py        |  8 +++---
 6 files changed, 85 insertions(+), 48 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index f8b40872..5bca434f 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -9,7 +9,6 @@ from etl.epc.Dataset import TrainingDataset
 from etl.epc.Record import EPCRecord
 from etl.epc.settings import LATEST_FIELD, MANDATORY_FIXED_FEATURES
 from etl.epc_clean.epc_attributes.all_cleaners import all_cleaner_map
-from etl.solar.SolarPhotoSupply import SolarPhotoSupply
 from utils.logger import setup_logger
 from utils.s3 import read_dataframe_from_s3_parquet
 from etl.epc.settings import DATA_ANOMALY_MATCHES
@@ -18,11 +17,11 @@ from recommendations.recommendation_utils import (
     estimate_perimeter,
     get_wall_type,
     estimate_external_wall_area,
-    esimtate_pitched_roof_area,
     estimate_windows,
 )
 from backend.ml_models.AnnualBillSavings import AnnualBillSavings
 from backend.app.utils import sap_to_epc
+import backend.app.assumptions as assumptions
 
 ENVIRONMENT = os.environ.get("ENVIRONMENT", "dev")
 DATA_BUCKET = os.environ.get(
@@ -1184,11 +1183,20 @@ class Property:
 
         if set(self.heating_energy_source) == {'Electricity', 'Natural Gas'}:
             # It means they have mixed heating so we take the primary one, based on main fuel
-            if self.main_fuel["clean_description"] == "Mains gas not community":
+            # This will probably happen in the case of an extension
+            if self.main_fuel["clean_description"] in ["Mains gas not community", "Mains gas community"]:
                 self.heating_energy_source = ['Natural Gas']
             else:
                 self.heating_energy_source = ['Electricity']
 
+        if set(self.heating_energy_source) == {'Natural Gas', 'Wood Logs'}:
+            # It means they have mixed heating so we take the primary one, based on main fuel
+            # This will probably happen in the case of an extension
+            if self.main_fuel["clean_description"] in ["Mains gas not community", "Mains gas community"]:
+                self.heating_energy_source = ['Natural Gas']
+            else:
+                self.heating_energy_source = ['Wood Logs']
+
         if len(self.heating_energy_source) == 0 or len(self.heating_energy_source) > 1:
             raise Exception("Investigate me")
 
@@ -1216,6 +1224,10 @@ class Property:
 
             if fuel in ['Main System', "Community Scheme"]:
                 self.hot_water_energy_source = self.heating_energy_source
+            elif fuel in ['Secondary System']:
+                # Check the secondary heating system
+                secondary_heating = self.data["secondheat-description"]
+                self.hot_water_energy_source = assumptions.DESCRIPTIONS_TO_FUEL_TYPES[secondary_heating]["fuel"]
             else:
                 raise Exception("Investiage me")
 
@@ -1273,7 +1285,10 @@ class Property:
             return self.current_energy_consumption
 
         # If the property currently has an electric boiler, it will still benefit from the ASHP efficiency gain
-        remap_fuel_sources = ["Natural Gas", "LPG", "Wood Logs", "Oil", "Electricity"]
+        remap_fuel_sources = [
+            "Natural Gas", "LPG", "Wood Logs", "Oil", "Electricity", "Coal", "Smokeless Fuel",
+            "Natural Gas + Solar Thermal", "Anthracite", "Wood Pellets",
+        ]
 
         heating_energy_source = self.heating_energy_source
         hot_water_energy_source = self.hot_water_energy_source
@@ -1281,11 +1296,11 @@ class Property:
         hotwater_consumption = self.energy_consumption_estimates["unadjusted"]["hot_water"]
 
         if (heating_energy_source not in remap_fuel_sources) or (
-            hot_water_energy_source not in remap_fuel_sources
+            hot_water_energy_source not in remap_fuel_sources + ["Electricity + Solar Thermal"]
         ):
             raise NotImplementedError("Have not implemented estimating electrical consumption for this fuel type")
 
-        if heating_energy_source in ["Natural Gas", "LPG", "Wood Logs"]:
+        if heating_energy_source in remap_fuel_sources:
             # Adjust the heating consumption to reflect the expected efficiency of an ASHP
             heating_consumption = heating_consumption / (assumed_ashp_efficiency / 100)
 
diff --git a/backend/app/assumptions.py b/backend/app/assumptions.py
index f0ddf868..5f8cb85c 100644
--- a/backend/app/assumptions.py
+++ b/backend/app/assumptions.py
@@ -6,3 +6,39 @@ AVERAGE_ASHP_EFFICIENCY = 300
 # Conservative estimate of the proportion of electricity that will be consumed, whereas the rest will
 # be exported
 SOLAR_CONSUMPTION_PROPORTION = 0.5
+
+DESCRIPTIONS_TO_FUEL_TYPES = {
+    "Air source heat pump, radiators, electric": {
+        "fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100
+    },
+    "Boiler and radiators, mains gas": {"fuel": 'Natural Gas', "cop": 0.9},
+    'Electric storage heaters': {"fuel": 'Electricity', "cop": 1},
+    "Electric immersion, off-peak": {"fuel": 'Electricity', "cop": 1},
+    "Electric storage heaters, radiators": {"fuel": 'Electricity', "cop": 1},
+    "Room heaters, electric": {"fuel": 'Electricity', "cop": 1},
+    "Electric immersion, standard tariff": {"fuel": 'Electricity', "cop": 1},
+    "Portable electric heaters assumed for most rooms": {"fuel": 'Electricity', "cop": 1},
+    "Boiler and radiators, LPG": {"fuel": 'LPG', "cop": 0.9},
+    "Room heaters, dual fuel (mineral and wood)": {"fuel": 'Wood Logs', "cop": 1},
+    "Room heaters, mains gas": {"fuel": 'Natural Gas', "cop": 0.9},
+    "Warm air, mains gas": {"fuel": 'Natural Gas', "cop": 0.9},
+    "Boiler, mains gas": {"fuel": 'Natural Gas', "cop": 0.9},
+    "Gas multipoint": {"fuel": "Natural Gas", "cop": 0.9},
+    "Warm air, Electricaire": {"fuel": "Electricity", "cop": 1},
+    "Gas boiler/circulator": {"fuel": "Natural Gas", "cop": 0.9},
+    "Boiler and underfloor heating, mains gas": {"fuel": "Natural Gas", "cop": 0.9},
+    "No system present: electric heaters assumed": {"fuel": "Electricity", "cop": 1},
+    "Electric instantaneous at point of use": {"fuel": "Electricity", "cop": 1},
+    "Boiler and radiators, oil": {"fuel": "Oil", "cop": 0.9},
+    "Electric storage heaters, Electric storage heaters": {"fuel": "Electricity", "cop": 1},
+    "Boiler and radiators, electric": {"fuel": "Electricity", "cop": 0.9},
+    "Gas boiler/circulator, no cylinder thermostat": {"fuel": "Natural Gas", "cop": 0.9},
+    "Boiler and radiators, dual fuel (mineral and wood)": {"fuel": "Wood Logs", "cop": 0.9},
+    "Electric immersion, standard tariff, plus solar": {"fuel": "Electricity + Solar Thermal", "cop": 1},
+    "From main system, flue gas heat recovery": {"fuel": "Natural Gas", "cop": 0.9},
+    "Electric underfloor heating": {"fuel": "Electricity", "cop": 1},
+    "No system present: electric immersion assumed": {"fuel": "Electricity", "cop": 1},
+    "Air source heat pump, underfloor, electric": {
+        "fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100
+    },
+}
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 23d3f5d2..f6e98918 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -508,7 +508,6 @@ async def trigger_plan(body: PlanTriggerRequest):
         logger.info("Getting spatial data")
         input_properties = OpenUprnClient.set_spatial_data(input_properties, bucket_name=get_settings().DATA_BUCKET)
 
-        logger.info("Setting property features")
         [p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=kwh_preds) for p in input_properties]
         logger.info("Performing solar analysis")
 
@@ -520,6 +519,13 @@ async def trigger_plan(body: PlanTriggerRequest):
         #       basic estimate of roof area
 
         # TODO: Debug this
+        for p in input_properties:
+            if p.uprn in [10002634631, 100031601798, 10009574286, 10007366417]:
+                continue
+            p.estimate_electrical_consumption(
+                assumed_ashp_efficiency=assumptions.AVERAGE_ASHP_EFFICIENCY, exclusions=body.exclusions
+            )
+
         building_ids = [
             {
                 "building_id": p.building_id,
diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py
index 13c9e0a5..d72feed7 100644
--- a/backend/ml_models/AnnualBillSavings.py
+++ b/backend/ml_models/AnnualBillSavings.py
@@ -274,7 +274,7 @@ class AnnualBillSavings:
             )
             return (kwh / cop) * cost_per_kwh
 
-        if fuel == "Wood Logs":
+        if fuel in ["Wood Logs", "Wood Pellets"]:
             price_data = cls.FUEL_DATA[cls.FUEL_DATA["Fuel"] == "Pellets (Bagged)"].squeeze()
             cost_per_kwh = cls.cost_per_kwh(
                 price_data["Price (p)"], price_data["Energy Content, Net Calorific value (kWh/unit)"]
@@ -296,4 +296,19 @@ class AnnualBillSavings:
             )
             return (kwh / cop) * cost_per_kwh
 
+        if fuel in ["Smokeless Fuel", "Anthracite"]:
+            price_data = cls.FUEL_DATA[cls.FUEL_DATA["Fuel"] == "Smokeless fuel"].squeeze()
+            cost_per_kwh = cls.cost_per_kwh(
+                price_data["Price (p)"], price_data["Energy Content, Net Calorific value (kWh/unit)"]
+            )
+            return (kwh / cop) * cost_per_kwh
+
+        # We use coal's values for
+        if fuel == "Coal":
+            price_data = cls.FUEL_DATA[cls.FUEL_DATA["Fuel"] == "Coal"].squeeze()
+            cost_per_kwh = cls.cost_per_kwh(
+                price_data["Price (p)"], price_data["Energy Content, Net Calorific value (kWh/unit)"]
+            )
+            return (kwh / cop) * cost_per_kwh
+
         raise Exception("Fuel not recognised")
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index fef7472c..4f75b30b 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -19,41 +19,6 @@ from backend.apis.GoogleSolarApi import GoogleSolarApi
 import backend.app.assumptions as assumptions
 
 ASHP_COP = 3
-DESCRIPTIONS_TO_FUEL_TYPES = {
-    "Air source heat pump, radiators, electric": {
-        "fuel": "Electricity", "cop": assumptions.AVERAGE_ASHP_EFFICIENCY / 100
-    },
-    "Boiler and radiators, mains gas": {"fuel": 'Natural Gas', "cop": 0.9},
-    'Electric storage heaters': {"fuel": 'Electricity', "cop": 1},
-    "Electric immersion, off-peak": {"fuel": 'Electricity', "cop": 1},
-    "Electric storage heaters, radiators": {"fuel": 'Electricity', "cop": 1},
-    "Room heaters, electric": {"fuel": 'Electricity', "cop": 1},
-    "Electric immersion, standard tariff": {"fuel": 'Electricity', "cop": 1},
-    "Portable electric heaters assumed for most rooms": {"fuel": 'Electricity', "cop": 1},
-    "Boiler and radiators, LPG": {"fuel": 'LPG', "cop": 0.9},
-    "Room heaters, dual fuel (mineral and wood)": {"fuel": 'Wood Logs', "cop": 1},
-    "Room heaters, mains gas": {"fuel": 'Natural Gas', "cop": 0.9},
-    "Warm air, mains gas": {"fuel": 'Natural Gas', "cop": 0.9},
-    "Boiler, mains gas": {"fuel": 'Natural Gas', "cop": 0.9},
-    "Gas multipoint": {"fuel": "Natural Gas", "cop": 0.9},
-    "Warm air, Electricaire": {"fuel": "Electricity", "cop": 1},
-    "Gas boiler/circulator": {"fuel": "Natural Gas", "cop": 0.9},
-    "Boiler and underfloor heating, mains gas": {"fuel": "Natural Gas", "cop": 0.9},
-    "No system present: electric heaters assumed": {"fuel": "Electricity", "cop": 1},
-    "Electric instantaneous at point of use": {"fuel": "Electricity", "cop": 1},
-    "Boiler and radiators, oil": {"fuel": "Oil", "cop": 0.9},
-    "Electric storage heaters, Electric storage heaters": {"fuel": "Electricity", "cop": 1},
-    "Boiler and radiators, electric": {"fuel": "Electricity", "cop": 0.9},
-    "Gas boiler/circulator, no cylinder thermostat": {"fuel": "Natural Gas", "cop": 0.9},
-    "Boiler and radiators, dual fuel (mineral and wood)": {"fuel": "Wood Logs", "cop": 0.9},
-    "Electric immersion, standard tariff, plus solar": {"fuel": "Electricity + Solar Thermal", "cop": 1},
-    "From main system, flue gas heat recovery": {"fuel": "Natural Gas", "cop": 0.9},
-    "Electric underfloor heating": {"fuel": "Electricity", "cop": 1},
-    "No system present: electric immersion assumed": {"fuel": "Electricity", "cop": 1},
-    "Air source heat pump, underfloor, electric": {
-        "fuel": "Electricity", "cop": assumptions.AVERAGE_ASHP_EFFICIENCY / 100
-    },
-}
 STARTING_DUMMY_ID_VALUE = -9999
 
 
@@ -551,7 +516,7 @@ class Recommendations:
                 }
             raise NotImplementedError("Handle this case")
 
-        mapped = DESCRIPTIONS_TO_FUEL_TYPES[heating_description]
+        mapped = assumptions.DESCRIPTIONS_TO_FUEL_TYPES[heating_description]
         heating_fuel = mapped["fuel"]
 
         if hotwater_description in [
diff --git a/recommendations/rdsap_tables.py b/recommendations/rdsap_tables.py
index 98cda9ab..5110764b 100644
--- a/recommendations/rdsap_tables.py
+++ b/recommendations/rdsap_tables.py
@@ -514,8 +514,8 @@ FLOOR_LEVEL_MAP = {
     "top floor": 5,
     "20+": 20,
     "21st or above": 21,
-    **{str(i).zfill(2): i for i in range(0, 21)},
-    **{ordinal(i): i for i in range(-1, 21)},
-    **{str(i): i for i in range(-1, 21)},
-    **{i: i for i in range(-1, 21)},
+    **{str(i).zfill(2): i for i in range(0, 51)},
+    **{ordinal(i): i for i in range(-1, 51)},
+    **{str(i): i for i in range(-1, 51)},
+    **{i: i for i in range(-1, 51)},
 }

From f13ce39bed7fce84a7c013d9dce2cc5fc0e50db9 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 19 Aug 2024 19:06:45 +0100
Subject: [PATCH 127/182] estimate_new_consumption debugging

---
 backend/Property.py                    | 2 +-
 backend/app/plan/router.py             | 8 --------
 backend/ml_models/AnnualBillSavings.py | 8 ++++++++
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 5bca434f..19e5cb2e 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -1287,7 +1287,7 @@ class Property:
         # If the property currently has an electric boiler, it will still benefit from the ASHP efficiency gain
         remap_fuel_sources = [
             "Natural Gas", "LPG", "Wood Logs", "Oil", "Electricity", "Coal", "Smokeless Fuel",
-            "Natural Gas + Solar Thermal", "Anthracite", "Wood Pellets",
+            "Natural Gas + Solar Thermal", "Anthracite", "Wood Pellets", "LPG + Solar Thermal"
         ]
 
         heating_energy_source = self.heating_energy_source
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index f6e98918..0a29f67c 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -518,14 +518,6 @@ async def trigger_plan(body: PlanTriggerRequest):
         # TODO: For simple properties, we should do a comparison/check between the solar API's roof area and the
         #       basic estimate of roof area
 
-        # TODO: Debug this
-        for p in input_properties:
-            if p.uprn in [10002634631, 100031601798, 10009574286, 10007366417]:
-                continue
-            p.estimate_electrical_consumption(
-                assumed_ashp_efficiency=assumptions.AVERAGE_ASHP_EFFICIENCY, exclusions=body.exclusions
-            )
-
         building_ids = [
             {
                 "building_id": p.building_id,
diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py
index d72feed7..211e5ea6 100644
--- a/backend/ml_models/AnnualBillSavings.py
+++ b/backend/ml_models/AnnualBillSavings.py
@@ -289,6 +289,14 @@ class AnnualBillSavings:
             # The solar thermal covers a % of the heating kwh, so we need to adjust the cost
             return (kwh / cop) * assumptions.SOLAR_CONSUMPTION_PROPORTION * cls.ELECTRICITY_PRICE_CAP
 
+        if fuel == "LPG + Solar Thermal":
+            # The solar thermal covers a % of the heating kwh, so we need to adjust the cost
+            price_data = cls.FUEL_DATA[cls.FUEL_DATA["Fuel"] == "LPG"].squeeze()
+            cost_per_kwh = cls.cost_per_kwh(
+                price_data["Price (p)"], price_data["Energy Content, Net Calorific value (kWh/unit)"]
+            )
+            return (kwh / cop) * cost_per_kwh * assumptions.SOLAR_CONSUMPTION_PROPORTION
+
         if fuel == "Oil":
             price_data = cls.FUEL_DATA[cls.FUEL_DATA["Fuel"] == "Kerosene"].squeeze()
             cost_per_kwh = cls.cost_per_kwh(

From cb993024bbfd84fea6425864039acd43d5ec96e1 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 19 Aug 2024 19:19:25 +0100
Subject: [PATCH 128/182] Adding missing regions

---
 backend/app/plan/router.py          | 2 +-
 recommendations/county_to_region.py | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 0a29f67c..a4292265 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -620,7 +620,7 @@ async def trigger_plan(body: PlanTriggerRequest):
                         p.set_solar_panel_configuration(unit_solar_panel_configuration)
         if individual_units:
             # Model the solar potential at the property level
-            for unit in individual_units:
+            for unit in tqdm(individual_units):
                 property_instance = [p for p in input_properties if p.id == unit["property_id"]][0]
                 # At this level, we check if the property is suitable for solar and if now, skip
                 if not property_instance.is_solar_pv_valid():
diff --git a/recommendations/county_to_region.py b/recommendations/county_to_region.py
index 7ca86715..f7d5193f 100644
--- a/recommendations/county_to_region.py
+++ b/recommendations/county_to_region.py
@@ -161,6 +161,9 @@ county_to_region_map = {
 
     # Additional mappings requried, based on what we find in the EPC database
     'Greater London Authority': 'Inner London',
+    'Herefordshire, County of': 'West Midlands',
+    "North Northamptonshire": 'East Midlands',
+    "West Northamptonshire": 'East Midlands',
     # We have a bunch of inner London local authority mappings, which can be used if the county is not found
     'Barking and Dagenham': 'Inner London', 'Barnet': 'Inner London', 'Bexley': 'Inner London',
     'Brent': 'Inner London', 'Bromley': 'Inner London', 'Camden': 'Inner London', 'City of London': 'Inner London',

From 01c688da23de301b5d1b63d94a0485e7158a67a5 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 19 Aug 2024 21:38:53 +0100
Subject: [PATCH 129/182] debugging solar api

---
 backend/apis/GoogleSolarApi.py | 22 ++++++++++++++++++++--
 backend/app/plan/router.py     |  5 ++---
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py
index 905d4975..13c7abb4 100644
--- a/backend/apis/GoogleSolarApi.py
+++ b/backend/apis/GoogleSolarApi.py
@@ -258,7 +258,7 @@ class GoogleSolarApi:
 
         # Remove any north facing roof segments
         panel_performance = []
-        for config in self.insights_data["solarPotential"]["solarPanelConfigs"]:
+        for config in self.insights_data["solarPotential"].get("solarPanelConfigs", []):
             roof_segment_summaries = config["roofSegmentSummaries"]
             # Filter on just the segments in self.roof_segment_indexes
             roof_segment_summaries = [
@@ -310,7 +310,25 @@ class GoogleSolarApi:
             )
 
         panel_performance = pd.DataFrame(panel_performance)
-        # We can have duplicate configurations
+
+        if panel_performance.empty:
+            self.panel_performance = pd.DataFrame(
+                columns=[
+                    "n_panels",
+                    "yearly_dc_energy",
+                    "total_cost",
+                    "panneled_roof_area",
+                    "array_wattage",
+                    "initial_ac_kwh_per_year",
+                    "lifetime_ac_kwh",
+                    "roi",
+                    "expected_payback_years",
+                    "lifetime_dc_kwh"
+                ]
+            )
+            return
+
+            # We can have duplicate configurations
         panel_performance = panel_performance.drop_duplicates()
         # If we look at the building level, we don't include any projects fewer than 10 panels, otherwise the
         # minimum is 4
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index a4292265..e773e303 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -472,8 +472,6 @@ async def trigger_plan(body: PlanTriggerRequest):
         materials = get_materials(session)
         cleaned = get_cleaned()
 
-        solar_api_client = GoogleSolarApi(api_key=get_settings().GOOGLE_SOLAR_API_KEY)
-
         dataset_version = "2024-07-08"
         energy_consumption_client = EnergyConsumptionModel(
             model_paths={
@@ -588,6 +586,7 @@ async def trigger_plan(body: PlanTriggerRequest):
                 energy_consumption = sum(
                     [entry['energy_consumption'] for entry in building_ids if entry['building_id'] == building_id]
                 )
+                solar_api_client = GoogleSolarApi(api_key=get_settings().GOOGLE_SOLAR_API_KEY)
                 solar_api_client.get(
                     longitude=coordinates["longitude"],
                     latitude=coordinates["latitude"],
@@ -629,7 +628,7 @@ async def trigger_plan(body: PlanTriggerRequest):
                 # We check if we have a solar non-invasive recommendation
                 if [r for r in property_instance.non_invasive_recommendations if r["type"] == "solar_pv"]:
                     continue
-
+                solar_api_client = GoogleSolarApi(api_key=get_settings().GOOGLE_SOLAR_API_KEY)
                 solar_api_client.get(
                     longitude=unit["longitude"],
                     latitude=unit["latitude"],

From 9436bfe7d6322269ee9a5dabf3e2e134d99f1b39 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 19 Aug 2024 21:54:33 +0100
Subject: [PATCH 130/182] Debugging solar api for missing panel config

---
 backend/apis/GoogleSolarApi.py | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py
index 13c7abb4..e930fcff 100644
--- a/backend/apis/GoogleSolarApi.py
+++ b/backend/apis/GoogleSolarApi.py
@@ -309,7 +309,7 @@ class GoogleSolarApi:
                 }
             )
 
-        panel_performance = pd.DataFrame(panel_performance)
+        panel_performance = pd.DataFrame([panel_performance])
 
         if panel_performance.empty:
             self.panel_performance = pd.DataFrame(
@@ -329,12 +329,30 @@ class GoogleSolarApi:
             return
 
             # We can have duplicate configurations
+
         panel_performance = panel_performance.drop_duplicates()
         # If we look at the building level, we don't include any projects fewer than 10 panels, otherwise the
         # minimum is 4
         min_panels = 10 if is_building else 4
         panel_performance = panel_performance[panel_performance["n_panels"] >= min_panels]
 
+        if panel_performance.empty:
+            self.panel_performance = pd.DataFrame(
+                columns=[
+                    "n_panels",
+                    "yearly_dc_energy",
+                    "total_cost",
+                    "panneled_roof_area",
+                    "array_wattage",
+                    "initial_ac_kwh_per_year",
+                    "lifetime_ac_kwh",
+                    "roi",
+                    "expected_payback_years",
+                    "lifetime_dc_kwh"
+                ]
+            )
+            return
+
         panel_performance["initial_ac_kwh_per_year"] = panel_performance["yearly_dc_energy"] * self.dc_to_ac_rate
 
         # Remove anything where the total ac energy is less than half of the array wattage

From 9938dea1904e6cd14fc882d65ea5ca0ed1329967 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 20 Aug 2024 15:50:50 +0100
Subject: [PATCH 131/182] added excluded uprns

---
 etl/ownership/Ownership.py                       |  6 ++++++
 etl/ownership/config.py                          | 11 +++++++++--
 etl/ownership/projects/midlands_portfolio/app.py |  6 ++++--
 3 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/etl/ownership/Ownership.py b/etl/ownership/Ownership.py
index 9e328452..a3aa9e15 100644
--- a/etl/ownership/Ownership.py
+++ b/etl/ownership/Ownership.py
@@ -60,6 +60,7 @@ class Ownership:
         average_property_value: float,
         portfolio_value: float,
         excluded_owners: List[str] = None,
+        excluded_uprns: List[int] = None,
     ):
         """
 
@@ -85,6 +86,7 @@ class Ownership:
         self.land_registry_path = land_registry_path
 
         self.excluded_owners = [] if excluded_owners is None else excluded_owners
+        self.excluded_uprns = [] if excluded_uprns is None else excluded_uprns
 
         self.run_timestamp = str(datetime.now())
         self.project_name = project_name
@@ -204,6 +206,10 @@ class Ownership:
             data.append(epc_data)
 
         self.epc_data = pd.concat(data, ignore_index=True)
+
+        if self.excluded_uprns:
+            self.epc_data = self.epc_data[~self.epc_data["UPRN"].astype(float).isin(self.excluded_uprns)]
+
         # We now store the data in s3
         save_excel_to_s3(
             df=self.epc_data,
diff --git a/etl/ownership/config.py b/etl/ownership/config.py
index c737d532..1940e06d 100644
--- a/etl/ownership/config.py
+++ b/etl/ownership/config.py
@@ -1,5 +1,12 @@
 # These are the registration numbers for companies we've heard a reponse from, and cannot sell
 OWNERS_WHO_CANT_SELL = [
-    # Al Rayan
-    "4483430"
+    # Al Rayan - they're the senior lender, not able to sell
+    "4483430",
+    # Ultrabarn - they're unwilling to sell and will sort any retrofits themselves
+    "2794851"
+]
+
+EXCLUDED_UPRNS = [
+    # This property no longer exists
+    200003827624
 ]
diff --git a/etl/ownership/projects/midlands_portfolio/app.py b/etl/ownership/projects/midlands_portfolio/app.py
index bf18d846..99b8fc48 100644
--- a/etl/ownership/projects/midlands_portfolio/app.py
+++ b/etl/ownership/projects/midlands_portfolio/app.py
@@ -1,8 +1,9 @@
+import pandas as pd
 from sqlalchemy.orm import sessionmaker
 from backend.app.db.connection import db_engine
 from backend.app.db.models.portfolio import Portfolio, PortfolioUsers
 from etl.ownership.Ownership import Ownership
-from etl.ownership.config import OWNERS_WHO_CANT_SELL as EXCLUDED_OWNERS
+from etl.ownership.config import OWNERS_WHO_CANT_SELL as EXCLUDED_OWNERS, EXCLUDED_UPRNS
 from utils.s3 import save_csv_to_s3
 
 # Set up the project configuration
@@ -122,7 +123,8 @@ def app():
         bucket=DATA_BUCKET,
         average_property_value=PROPERTY_VALUE_ESTIMATE,
         portfolio_value=PORTFOLIO_VALUE,
-        excluded_owners=EXCLUDED_OWNERS
+        excluded_owners=EXCLUDED_OWNERS,
+        excluded_uprns=EXCLUDED_UPRNS
     )
     ownership_instance.pipeline(column_filters=epc_column_filters)
 

From a69ec1dd6b53422490bf9171c5965573f7f110c5 Mon Sep 17 00:00:00 2001
From: Michael Duong <michaelduong22@gmail.com>
Date: Tue, 20 Aug 2024 16:28:39 +0100
Subject: [PATCH 132/182] add basic script for scraping zoopla

---
 etl/webscrape/Zoopla.py | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 etl/webscrape/Zoopla.py

diff --git a/etl/webscrape/Zoopla.py b/etl/webscrape/Zoopla.py
new file mode 100644
index 00000000..bb86c759
--- /dev/null
+++ b/etl/webscrape/Zoopla.py
@@ -0,0 +1,38 @@
+# Initial Code
+
+from seleniumbase import SB
+import time
+
+uprns = [
+    100071297618,
+    100080893397,
+    100060778033,
+    200004793081,
+    100071265143,
+    100071297618,
+    100080893397,
+    100060778033,
+    200004793081,
+    100071265143,
+]
+
+estimate_list = []
+
+for uprn in uprns:
+
+    # Probably can change the timings here
+    time.sleep(5)
+    with SB(uc=True) as sb:
+        sb.uc_open_with_reconnect(
+            f"https://www.zoopla.co.uk/property/uprn/{uprn}/",
+            3,
+        )
+
+        soup = sb.get_beautiful_soup()
+
+        estimates = soup.find_all("div", {"data-testid": "sale-estimate"})
+        # Can change the way we extract the text here
+        estimate_text = (
+            estimates[-1].find_all("p")[-1].find_all("span")[-1]["aria-label"]
+        )
+        estimate_list.append(estimate_text)

From 41c38e622de7ffddd7e9680b44648afacbe8dd79 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 20 Aug 2024 17:43:00 +0100
Subject: [PATCH 133/182] debugging ownership class

---
 etl/ownership/Ownership.py                    | 60 ++++++++++++++++++-
 .../projects/midlands_portfolio/app.py        |  3 +
 2 files changed, 60 insertions(+), 3 deletions(-)

diff --git a/etl/ownership/Ownership.py b/etl/ownership/Ownership.py
index a3aa9e15..5f506881 100644
--- a/etl/ownership/Ownership.py
+++ b/etl/ownership/Ownership.py
@@ -4,7 +4,7 @@ from tqdm import tqdm
 import pandas as pd
 import Levenshtein
 import re
-from utils.s3 import save_excel_to_s3
+from utils.s3 import save_excel_to_s3, read_excel_from_s3
 from utils.logger import setup_logger
 from backend.SearchEpc import SearchEpc
 from etl.spatial.OpenUprnClient import OpenUprnClient
@@ -948,7 +948,7 @@ class Ownership:
         matched_addresses_final = self.matched_addresses[
             ~self.matched_addresses["sold_recently"] &
             ~self.matched_addresses["sale_lodged_recently"]
-            ]
+            ].copy()
 
         logger.info("Performing conservation area and listed/herigage building filtering")
 
@@ -973,7 +973,7 @@ class Ownership:
 
         # Filter combined_matching_lookup accordingly
         combined_matching_lookup_final = self.combined_matching_lookup[
-            self.combined_matching_lookup["UPRN"].isin(self.combined_matching_lookup["UPRN"])
+            self.combined_matching_lookup["UPRN"].isin(matched_addresses_final["UPRN"])
         ]
 
         # Roll up portfolio
@@ -991,8 +991,16 @@ class Ownership:
             )
         ]
 
+        # We perform some checks
+        if self.portfolio_owners["total_number_of_properties"].sum() != self.portfolio_properties["UPRN"].nunique():
+            raise ValueError("Portfolio owners and properties don't match")
+
         self.portfolio_epc_data = self.epc_data[self.epc_data["UPRN"].isin(self.portfolio_properties["UPRN"])]
 
+        # Additional checks
+        if self.portfolio_properties["UPRN"].nunique() != self.portfolio_epc_data["UPRN"].nunique():
+            raise ValueError("Portfolio properties and epc data don't match")
+
         logger.info("Storing final outpus")
         # Store data
         save_excel_to_s3(
@@ -1028,3 +1036,49 @@ class Ownership:
         )
 
         return asset_list
+
+    def create_final_outputs(self, portfolio_timestamp):
+        """
+        Given the completed outputs of the matching process, this function creates the final outputs, after matching
+        valuation data, and creates a "working" directory, which is our current view of the sfr portfolio. This means
+        that we can iterate on the portfolio without affecting the final outputs, and then once we're happy with the
+        new version, we can commit those files to the "working" directory. This inforamtion shouldn't update very
+        often and so we're ok to store this at a daily level
+        :return:
+        """
+
+        # Step 1: Read in the valuations data
+        valuations = read_excel_from_s3(
+            bucket_name=self.bucket,
+            file_key=f"ownership/{self.project_name}/sfr property valuations.xlsx",
+            header_row=0
+        )
+
+        # Load in the portfolio data
+        # 1) owners
+        portfolio_owners = read_excel_from_s3(
+            bucket_name=self.bucket,
+            file_key=f"ownership/{self.project_name}/{portfolio_timestamp}/portfolio_owners.xlsx",
+            header_row=0
+        )
+        # 2) EPC
+        portfolio_epc_data = read_excel_from_s3(
+            bucket_name=self.bucket,
+            file_key=f"ownership/{self.project_name}/{portfolio_timestamp}/portfolio_epc_data.xlsx",
+            header_row=0
+        )
+
+        # 3) properties
+        portfolio_properties = read_excel_from_s3(
+            bucket_name=self.bucket,
+            file_key=f"ownership/{self.project_name}/{portfolio_timestamp}/portfolio_properties.xlsx",
+            header_row=0
+        )
+
+        portfolio_epc_data["UPRN"].duplicated().sum()
+        portfolio_properties["UPRN"].duplicated().sum()
+        portfolio_properties[~portfolio_properties["UPRN"].astype(str).isin(portfolio_epc_data["UPRN"].astype(str))]
+
+        portfolio_properties[~portfolio_properties["UPRN"].astype(str).isin(portfolio_epc_data["UPRN"].astype(str))]
+
+        portfolio_epc_data.shape
diff --git a/etl/ownership/projects/midlands_portfolio/app.py b/etl/ownership/projects/midlands_portfolio/app.py
index 99b8fc48..ae7822a6 100644
--- a/etl/ownership/projects/midlands_portfolio/app.py
+++ b/etl/ownership/projects/midlands_portfolio/app.py
@@ -162,3 +162,6 @@ def app():
         "budget": None,
     }
     print(body)
+
+    # We now need a distinct step to prepare final outputs
+    portfolio_timestamp = "2024-08-20 15:51:10.292075"

From 3799a780b46012179adeb84d1dd81e31761d86b3 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 20 Aug 2024 17:45:19 +0100
Subject: [PATCH 134/182] finding missed uprns

---
 etl/ownership/projects/midlands_portfolio/app.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/etl/ownership/projects/midlands_portfolio/app.py b/etl/ownership/projects/midlands_portfolio/app.py
index ae7822a6..f868bd3b 100644
--- a/etl/ownership/projects/midlands_portfolio/app.py
+++ b/etl/ownership/projects/midlands_portfolio/app.py
@@ -163,5 +163,10 @@ def app():
     }
     print(body)
 
+    # We read in the current valuation data and identify if there are any uprns that need to be added
+    previous_valuations = pd.read_excel(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/sfr/sfr property valuations.xlsx")
+    asset_list[~asset_list["uprn"].astype(str).isin(previous_valuations["uprn"].astype(str))]
+
     # We now need a distinct step to prepare final outputs
     portfolio_timestamp = "2024-08-20 15:51:10.292075"

From a153de51c31fd8174540b3cb7a8c4e6abfa24d61 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 20 Aug 2024 18:52:52 +0100
Subject: [PATCH 135/182] added mountview and more uprns to excclusions |

---
 etl/ownership/config.py                       | 21 ++++++++++++++++++-
 .../projects/midlands_portfolio/app.py        |  3 ++-
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/etl/ownership/config.py b/etl/ownership/config.py
index 1940e06d..1b67e742 100644
--- a/etl/ownership/config.py
+++ b/etl/ownership/config.py
@@ -4,9 +4,28 @@ OWNERS_WHO_CANT_SELL = [
     "4483430",
     # Ultrabarn - they're unwilling to sell and will sort any retrofits themselves
     "2794851"
+    # Mountview - Anna spoke with someone from Mounview - they acquire tenancies and sell them as soon as they become
+    # vacant. They have no immediate opportunities but we may come back and remove this
+    "328090"
 ]
 
 EXCLUDED_UPRNS = [
     # This property no longer exists
-    200003827624
+    200003827624,
+    # This property doesn't seem to exist
+    90070698,
+    # Can't really find a solid record on Zoopla/Rightmove
+    10090437990,
+    # This property doesn't seem to exist
+    100070902790,
+    # This property doesn't seem to exist
+    100070902791,
+    # This property doesn't seem to exist
+    100031997775,
+    # Can't find reliable information to this property on zoopla/rightmove
+    200001372608,
+    # Can't find reliable information to this property on zoopla/rightmove
+    100031592801,
+    # Can't find reliable information to this property on zoopla/rightmove
+    100031579087,
 ]
diff --git a/etl/ownership/projects/midlands_portfolio/app.py b/etl/ownership/projects/midlands_portfolio/app.py
index f868bd3b..e79d86d2 100644
--- a/etl/ownership/projects/midlands_portfolio/app.py
+++ b/etl/ownership/projects/midlands_portfolio/app.py
@@ -166,7 +166,8 @@ def app():
     # We read in the current valuation data and identify if there are any uprns that need to be added
     previous_valuations = pd.read_excel(
         "/Users/khalimconn-kowlessar/Documents/hestia/Customers/sfr/sfr property valuations.xlsx")
-    asset_list[~asset_list["uprn"].astype(str).isin(previous_valuations["uprn"].astype(str))]
+    missed = asset_list[~asset_list["uprn"].astype(str).isin(previous_valuations["uprn"].astype(str))]
+    missed.to_csv("missed_valuations.csv")
 
     # We now need a distinct step to prepare final outputs
     portfolio_timestamp = "2024-08-20 15:51:10.292075"

From 520aa430b77462666ac2ca9405a5d7349172224f Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 20 Aug 2024 19:27:34 +0100
Subject: [PATCH 136/182] added step to remove owners with just 1 property

---
 etl/ownership/Ownership.py                    | 31 ++++++++++++++-----
 etl/ownership/config.py                       |  4 +++
 .../projects/midlands_portfolio/app.py        | 14 +++++----
 3 files changed, 36 insertions(+), 13 deletions(-)

diff --git a/etl/ownership/Ownership.py b/etl/ownership/Ownership.py
index 5f506881..5b421e7b 100644
--- a/etl/ownership/Ownership.py
+++ b/etl/ownership/Ownership.py
@@ -933,6 +933,7 @@ class Ownership:
         )
 
         pivot_counts = pivot_counts.sort_values("total_number_of_properties", ascending=False)
+        pivot_counts = pivot_counts[pivot_counts["total_number_of_properties"] > 1]
 
         pivot_counts["approx_value"] = self.average_property_value * pivot_counts["total_number_of_properties"]
         pivot_counts["cumulative_value"] = pivot_counts["approx_value"].cumsum()
@@ -1037,7 +1038,7 @@ class Ownership:
 
         return asset_list
 
-    def create_final_outputs(self, portfolio_timestamp):
+    def create_final_outputs(self, portfolio_timestamp, exclusion_uprns=None):
         """
         Given the completed outputs of the matching process, this function creates the final outputs, after matching
         valuation data, and creates a "working" directory, which is our current view of the sfr portfolio. This means
@@ -1047,8 +1048,10 @@ class Ownership:
         :return:
         """
 
+        exclusion_uprns = [] if exclusion_uprns is None else exclusion_uprns
+
         # Step 1: Read in the valuations data
-        valuations = read_excel_from_s3(
+        valuatio_ns = read_excel_from_s3(
             bucket_name=self.bucket,
             file_key=f"ownership/{self.project_name}/sfr property valuations.xlsx",
             header_row=0
@@ -1075,10 +1078,24 @@ class Ownership:
             header_row=0
         )
 
-        portfolio_epc_data["UPRN"].duplicated().sum()
-        portfolio_properties["UPRN"].duplicated().sum()
-        portfolio_properties[~portfolio_properties["UPRN"].astype(str).isin(portfolio_epc_data["UPRN"].astype(str))]
+        # Check they're the right size
+        if portfolio_owners["total_number_of_properties"].sum() != portfolio_properties["UPRN"].nunique():
+            raise ValueError("Portfolio owners and properties don't match")
 
-        portfolio_properties[~portfolio_properties["UPRN"].astype(str).isin(portfolio_epc_data["UPRN"].astype(str))]
+        if portfolio_properties["UPRN"].nunique() != portfolio_epc_data["UPRN"].nunique():
+            raise ValueError("Portfolio properties and epc data don't match")
 
-        portfolio_epc_data.shape
+        # We make some final cuts based on UPRNs that at a later stage are found to be odd
+        if portfolio_properties["UPRN"].isin(exclusion_uprns).sum():
+            # Identify who the owners are for thes uprns
+            owners = portfolio_properties[portfolio_properties["UPRN"].isin(exclusion_uprns)].groupby(
+                "Company Registration No. (1)"
+            )["UPRN"].nunique().reset_index().rename(
+                columns={"UPRN": "number_of_properties_to_exclude"}
+            )
+
+            min_owners_threshold = portfolio_owners["total_number_of_properties"].min()
+
+            portfolio_owners = portfolio_owners.merge(
+                owners, how="left", on="Company Registration No. (1)", suffixes=("", "_excluded")
+            )
diff --git a/etl/ownership/config.py b/etl/ownership/config.py
index 1b67e742..3f153817 100644
--- a/etl/ownership/config.py
+++ b/etl/ownership/config.py
@@ -28,4 +28,8 @@ EXCLUDED_UPRNS = [
     100031592801,
     # Can't find reliable information to this property on zoopla/rightmove
     100031579087,
+    # Can't find reliable information to this property on zoopla/rightmove
+    200000877273,
+    # Can't find reliable information to this property on zoopla/rightmove - seems like a post office!
+    100071391639
 ]
diff --git a/etl/ownership/projects/midlands_portfolio/app.py b/etl/ownership/projects/midlands_portfolio/app.py
index e79d86d2..8a2abe48 100644
--- a/etl/ownership/projects/midlands_portfolio/app.py
+++ b/etl/ownership/projects/midlands_portfolio/app.py
@@ -163,11 +163,13 @@ def app():
     }
     print(body)
 
-    # We read in the current valuation data and identify if there are any uprns that need to be added
-    previous_valuations = pd.read_excel(
-        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/sfr/sfr property valuations.xlsx")
-    missed = asset_list[~asset_list["uprn"].astype(str).isin(previous_valuations["uprn"].astype(str))]
-    missed.to_csv("missed_valuations.csv")
+    # # We read in the current valuation data and identify if there are any uprns that need to be added
+    # previous_valuations = pd.read_excel(
+    #     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/sfr/sfr property valuations.xlsx")
+    # missed = asset_list[~asset_list["uprn"].astype(str).isin(previous_valuations["uprn"].astype(str))]
+    # missed.to_csv("missed_valuations.csv")
 
     # We now need a distinct step to prepare final outputs
-    portfolio_timestamp = "2024-08-20 15:51:10.292075"
+    portfolio_timestamp = "2024-08-20 18:53:08.326351"
+
+    exclusion_uprns = EXCLUDED_UPRNS

From f186c00df519258b305599bbf10f78b20a6ce0aa Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 20 Aug 2024 19:51:18 +0100
Subject: [PATCH 137/182] corrected property exclusions

---
 etl/ownership/Ownership.py                    | 47 ++++++++++++++-----
 etl/ownership/config.py                       |  4 +-
 .../projects/midlands_portfolio/app.py        | 12 ++++-
 3 files changed, 48 insertions(+), 15 deletions(-)

diff --git a/etl/ownership/Ownership.py b/etl/ownership/Ownership.py
index 5b421e7b..b05ef22a 100644
--- a/etl/ownership/Ownership.py
+++ b/etl/ownership/Ownership.py
@@ -1038,7 +1038,7 @@ class Ownership:
 
         return asset_list
 
-    def create_final_outputs(self, portfolio_timestamp, exclusion_uprns=None):
+    def create_final_outputs(self, portfolio_timestamp, storage_date, exclusion_uprns=None):
         """
         Given the completed outputs of the matching process, this function creates the final outputs, after matching
         valuation data, and creates a "working" directory, which is our current view of the sfr portfolio. This means
@@ -1051,7 +1051,7 @@ class Ownership:
         exclusion_uprns = [] if exclusion_uprns is None else exclusion_uprns
 
         # Step 1: Read in the valuations data
-        valuatio_ns = read_excel_from_s3(
+        valuations = read_excel_from_s3(
             bucket_name=self.bucket,
             file_key=f"ownership/{self.project_name}/sfr property valuations.xlsx",
             header_row=0
@@ -1087,15 +1087,40 @@ class Ownership:
 
         # We make some final cuts based on UPRNs that at a later stage are found to be odd
         if portfolio_properties["UPRN"].isin(exclusion_uprns).sum():
+            raise Exception("Implement me!")
             # Identify who the owners are for thes uprns
-            owners = portfolio_properties[portfolio_properties["UPRN"].isin(exclusion_uprns)].groupby(
-                "Company Registration No. (1)"
-            )["UPRN"].nunique().reset_index().rename(
-                columns={"UPRN": "number_of_properties_to_exclude"}
-            )
+            # owners = portfolio_properties[portfolio_properties["UPRN"].isin(exclusion_uprns)].groupby(
+            #     "Company Registration No. (1)"
+            # )["UPRN"].nunique().reset_index().rename(
+            #     columns={"UPRN": "number_of_properties_to_exclude"}
+            # )
+            #
+            # min_owners_threshold = portfolio_owners["total_number_of_properties"].min()
+            #
+            # portfolio_owners = portfolio_owners.merge(
+            #     owners, how="left", on="Company Registration No. (1)", suffixes=("", "_excluded")
+            # )
 
-            min_owners_threshold = portfolio_owners["total_number_of_properties"].min()
+        # Step 2: Merge in the valuations data
+        portfolio_properties = portfolio_properties.merge(
+            valuations, how="left", on="UPRN"
+        )
 
-            portfolio_owners = portfolio_owners.merge(
-                owners, how="left", on="Company Registration No. (1)", suffixes=("", "_excluded")
-            )
+        # Step 3: Store the final outputs
+        save_excel_to_s3(
+            df=portfolio_owners,
+            bucket_name=self.bucket,
+            file_key=f"ownership/{self.project_name}/current/{storage_date}/portfolio_owners.xlsx",
+        )
+
+        save_excel_to_s3(
+            df=portfolio_properties,
+            bucket_name=self.bucket,
+            file_key=f"ownership/{self.project_name}/current/{storage_date}/portfolio_properties.xlsx",
+        )
+
+        save_excel_to_s3(
+            df=portfolio_epc_data,
+            bucket_name=self.bucket,
+            file_key=f"ownership/{self.project_name}/current/{storage_date}/portfolio_epc_data.xlsx",
+        )
diff --git a/etl/ownership/config.py b/etl/ownership/config.py
index 3f153817..ac92693a 100644
--- a/etl/ownership/config.py
+++ b/etl/ownership/config.py
@@ -3,10 +3,10 @@ OWNERS_WHO_CANT_SELL = [
     # Al Rayan - they're the senior lender, not able to sell
     "4483430",
     # Ultrabarn - they're unwilling to sell and will sort any retrofits themselves
-    "2794851"
+    "2794851",
     # Mountview - Anna spoke with someone from Mounview - they acquire tenancies and sell them as soon as they become
     # vacant. They have no immediate opportunities but we may come back and remove this
-    "328090"
+    "328090",
 ]
 
 EXCLUDED_UPRNS = [
diff --git a/etl/ownership/projects/midlands_portfolio/app.py b/etl/ownership/projects/midlands_portfolio/app.py
index 8a2abe48..19a52357 100644
--- a/etl/ownership/projects/midlands_portfolio/app.py
+++ b/etl/ownership/projects/midlands_portfolio/app.py
@@ -1,4 +1,5 @@
-import pandas as pd
+import datetime
+
 from sqlalchemy.orm import sessionmaker
 from backend.app.db.connection import db_engine
 from backend.app.db.models.portfolio import Portfolio, PortfolioUsers
@@ -170,6 +171,13 @@ def app():
     # missed.to_csv("missed_valuations.csv")
 
     # We now need a distinct step to prepare final outputs
-    portfolio_timestamp = "2024-08-20 18:53:08.326351"
+    portfolio_timestamp = "2024-08-20 19:28:18.260205"
 
     exclusion_uprns = EXCLUDED_UPRNS
+
+    # Create a date in the yyyy-mm-dd format to store the data against
+    storage_date = datetime.datetime.now().strftime("%Y-%m-%d")
+
+    ownership_instance.create_final_outputs(
+        portfolio_timestamp=portfolio_timestamp, storage_date=storage_date, exclusion_uprns=EXCLUDED_UPRNS
+    )

From ceda01b77fb5a13f74aaed97e2704727f1bfa4c2 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 20 Aug 2024 20:25:53 +0100
Subject: [PATCH 138/182] completed portfolio refresh

---
 etl/ownership/Ownership.py                       | 2 +-
 etl/ownership/projects/midlands_portfolio/app.py | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/etl/ownership/Ownership.py b/etl/ownership/Ownership.py
index b05ef22a..3bc4b60d 100644
--- a/etl/ownership/Ownership.py
+++ b/etl/ownership/Ownership.py
@@ -1103,7 +1103,7 @@ class Ownership:
 
         # Step 2: Merge in the valuations data
         portfolio_properties = portfolio_properties.merge(
-            valuations, how="left", on="UPRN"
+            valuations.rename(columns={"uprn": "UPRN"}).drop(columns=['address', 'postcode']), how="left", on="UPRN"
         )
 
         # Step 3: Store the final outputs
diff --git a/etl/ownership/projects/midlands_portfolio/app.py b/etl/ownership/projects/midlands_portfolio/app.py
index 19a52357..d004965f 100644
--- a/etl/ownership/projects/midlands_portfolio/app.py
+++ b/etl/ownership/projects/midlands_portfolio/app.py
@@ -171,9 +171,7 @@ def app():
     # missed.to_csv("missed_valuations.csv")
 
     # We now need a distinct step to prepare final outputs
-    portfolio_timestamp = "2024-08-20 19:28:18.260205"
-
-    exclusion_uprns = EXCLUDED_UPRNS
+    portfolio_timestamp = "2024-08-20 19:51:33.884145"
 
     # Create a date in the yyyy-mm-dd format to store the data against
     storage_date = datetime.datetime.now().strftime("%Y-%m-%d")

From 1d82433b066d8eacab88ad005339761f5ac1c852 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 22 Aug 2024 08:44:38 +0100
Subject: [PATCH 139/182] heating recs testing wip

---
 .../test_data/heating_recommendations_data.py | 60 +++++++++++++++++++
 .../tests/test_heating_recommendations.py     | 51 ++++++++++++++++
 2 files changed, 111 insertions(+)
 create mode 100644 recommendations/tests/test_data/heating_recommendations_data.py
 create mode 100644 recommendations/tests/test_heating_recommendations.py

diff --git a/recommendations/tests/test_data/heating_recommendations_data.py b/recommendations/tests/test_data/heating_recommendations_data.py
new file mode 100644
index 00000000..3cb80876
--- /dev/null
+++ b/recommendations/tests/test_data/heating_recommendations_data.py
@@ -0,0 +1,60 @@
+import random
+from pathlib import Path
+import inspect
+import pandas as pd
+
+# this can be used to get example data to build the test cases
+src_file_path = inspect.getfile(lambda: None)
+EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates"
+epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
+directory = random.sample(epc_directories, 1)[0]
+data = pd.read_csv(directory / "certificates.csv", low_memory=False)
+# Rename the columns to the same format as the api returns
+data.columns = [c.replace("_", "-").lower() for c in data.columns]
+
+eg = data.sample(1).to_dict("records")[0]
+
+testing_examples = [
+    {
+        "epc": {
+            'lmk-key': '948324269042014090409224502942098', 'address1': '15, Ringwood Crescent', 'address2': None,
+            'address3': None, 'postcode': 'TS19 9DN', 'building-reference-number': 1016769078,
+            'current-energy-rating': 'C', 'potential-energy-rating': 'B', 'current-energy-efficiency': 79,
+            'potential-energy-efficiency': 85, 'property-type': 'House', 'built-form': 'Semi-Detached',
+            'inspection-date': '2014-08-21', 'local-authority': 'E06000004', 'constituency': 'E14000970',
+            'county': None,
+            'lodgement-date': '2014-09-04', 'transaction-type': 'none of the above', 'environment-impact-current': 77,
+            'environment-impact-potential': 85, 'energy-consumption-current': 152,
+            'energy-consumption-potential': 103.0, 'co2-emissions-current': 2.2, 'co2-emiss-curr-per-floor-area': 30,
+            'co2-emissions-potential': 1.5, 'lighting-cost-current': 61.0, 'lighting-cost-potential': 47.0,
+            'heating-cost-current': 625.0, 'heating-cost-potential': 522.0, 'hot-water-cost-current': 100.0,
+            'hot-water-cost-potential': 71.0, 'total-floor-area': 74.0, 'energy-tariff': 'Single',
+            'mains-gas-flag': 'Y', 'floor-level': 'NODATA!', 'flat-top-storey': None, 'flat-storey-count': None,
+            'main-heating-controls': 2106.0, 'multi-glaze-proportion': 100.0,
+            'glazed-type': 'double glazing installed before 2002', 'glazed-area': 'Normal', 'extension-count': 0.0,
+            'number-habitable-rooms': 3.0, 'number-heated-rooms': 3.0, 'low-energy-lighting': 70.0,
+            'number-open-fireplaces': 0.0, 'hotwater-description': 'From main system', 'hot-water-energy-eff': 'Good',
+            'hot-water-env-eff': 'Good', 'floor-description': 'Solid, no insulation (assumed)',
+            'floor-energy-eff': None,
+            'floor-env-eff': None, 'windows-description': 'Fully double glazed', 'windows-energy-eff': 'Average',
+            'windows-env-eff': 'Average', 'walls-description': 'Cavity wall, filled cavity', 'walls-energy-eff': 'Good',
+            'walls-env-eff': 'Good', 'secondheat-description': 'Room heaters, mains gas', 'sheating-energy-eff': None,
+            'sheating-env-eff': None, 'roof-description': 'Pitched, 50 mm loft insulation', 'roof-energy-eff': 'Poor',
+            'roof-env-eff': 'Poor', 'mainheat-description': 'Boiler and radiators, mains gas',
+            'mainheat-energy-eff': 'Good', 'mainheat-env-eff': 'Good',
+            'mainheatcont-description': 'Programmer, room thermostat and TRVs', 'mainheatc-energy-eff': 'Good',
+            'mainheatc-env-eff': 'Good', 'lighting-description': 'Low energy lighting in 70% of fixed outlets',
+            'lighting-energy-eff': 'Very Good', 'lighting-env-eff': 'Very Good',
+            'main-fuel': 'mains gas (not community)', 'wind-turbine-count': 0.0, 'heat-loss-corridor': 'NO DATA!',
+            'unheated-corridor-length': None, 'floor-height': 2.5, 'photo-supply': 50.0,
+            'solar-water-heating-flag': None,
+            'mechanical-ventilation': 'natural', 'address': '15, Ringwood Crescent',
+            'local-authority-label': 'Stockton-on-Tees', 'constituency-label': 'Stockton North',
+            'posttown': 'STOCKTON-ON-TEES', 'construction-age-band': 'England and Wales: 1950-1966',
+            'lodgement-datetime': '2014-09-04 09:22:45', 'tenure': 'owner-occupied',
+            'fixed-lighting-outlets-count': 10.0, 'low-energy-fixed-light-count': 7.0, 'uprn': 100110195416.0,
+            'uprn-source': 'Address Matched'
+        }
+
+    }
+]
diff --git a/recommendations/tests/test_heating_recommendations.py b/recommendations/tests/test_heating_recommendations.py
new file mode 100644
index 00000000..8857c343
--- /dev/null
+++ b/recommendations/tests/test_heating_recommendations.py
@@ -0,0 +1,51 @@
+import pandas as pd
+from utils.s3 import read_dataframe_from_s3_parquet
+import pytest
+from backend.Property import Property
+from etl.epc.Record import EPCRecord
+from recommendations.HeatingRecommender import HeatingRecommender
+from recommendations.tests.test_data.heating_recommendations_data import testing_examples
+
+
+class TestHeatingRecommendations:
+
+    @pytest.fixture
+    def cleaning_data(self):
+        return read_dataframe_from_s3_parquet(
+            bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+        )
+
+    @pytest.mark.parametrize(
+        "test_case",
+        testing_examples
+    )
+    def test_recommend(self, test_case, cleaning_data):
+        """
+        With this function, we test out multiple heating descriptions and check which recomendations
+        we retrieve alongside them
+        :return:
+        """
+
+        epc_records = {"original_epc": test_case["epc"], "full_sap_epc": {}, "old_data": []}
+
+        epc_record = EPCRecord(
+            epc_records=epc_records,
+            run_mode="newdata",
+            cleaning_data=cleaning_data
+        )
+
+        p = Property(
+            id=0,
+            postcode=test_case["epc"]["postcode"],
+            address=test_case["epc"]["address"],
+            epc_record=epc_record
+        )
+
+        recommender = HeatingRecommender(property_instance=p)
+        # Check they're empty
+        assert not recommender.heating_recommendations
+        assert not recommender.heating_control_recommendations
+
+        recommender.recommend(has_cavity_or_loft_recommendations=False)
+
+        # TODO: We check results against expected behaviour

From 71d82edb49ac3df51ba013ca335ee7b9f99fb0d8 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 22 Aug 2024 10:31:03 +0100
Subject: [PATCH 140/182] commented out epc data reading code

---
 .../test_data/heating_recommendations_data.py | 37 +++++++++++--------
 .../tests/test_heating_recommendations.py     | 31 ++++++++++++++--
 2 files changed, 48 insertions(+), 20 deletions(-)

diff --git a/recommendations/tests/test_data/heating_recommendations_data.py b/recommendations/tests/test_data/heating_recommendations_data.py
index 3cb80876..0656e917 100644
--- a/recommendations/tests/test_data/heating_recommendations_data.py
+++ b/recommendations/tests/test_data/heating_recommendations_data.py
@@ -1,18 +1,18 @@
-import random
-from pathlib import Path
-import inspect
-import pandas as pd
-
-# this can be used to get example data to build the test cases
-src_file_path = inspect.getfile(lambda: None)
-EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates"
-epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
-directory = random.sample(epc_directories, 1)[0]
-data = pd.read_csv(directory / "certificates.csv", low_memory=False)
-# Rename the columns to the same format as the api returns
-data.columns = [c.replace("_", "-").lower() for c in data.columns]
-
-eg = data.sample(1).to_dict("records")[0]
+# import random
+# from pathlib import Path
+# import inspect
+# import pandas as pd
+#
+# # this can be used to get example data to build the test cases
+# src_file_path = inspect.getfile(lambda: None)
+# EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates"
+# epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
+# directory = random.sample(epc_directories, 1)[0]
+# data = pd.read_csv(directory / "certificates.csv", low_memory=False)
+# # Rename the columns to the same format as the api returns
+# data.columns = [c.replace("_", "-").lower() for c in data.columns]
+#
+# eg = data.sample(1).to_dict("records")[0]
 
 testing_examples = [
     {
@@ -54,7 +54,12 @@ testing_examples = [
             'lodgement-datetime': '2014-09-04 09:22:45', 'tenure': 'owner-occupied',
             'fixed-lighting-outlets-count': 10.0, 'low-energy-fixed-light-count': 7.0, 'uprn': 100110195416.0,
             'uprn-source': 'Address Matched'
-        }
+        },
+        "kwh": {
 
+        },
+        "recommendation_descripptions": [
+
+        ]
     }
 ]
diff --git a/recommendations/tests/test_heating_recommendations.py b/recommendations/tests/test_heating_recommendations.py
index 8857c343..76927702 100644
--- a/recommendations/tests/test_heating_recommendations.py
+++ b/recommendations/tests/test_heating_recommendations.py
@@ -1,8 +1,10 @@
 import pandas as pd
-from utils.s3 import read_dataframe_from_s3_parquet
+import msgpack
+from utils.s3 import read_dataframe_from_s3_parquet, read_from_s3
 import pytest
 from backend.Property import Property
 from etl.epc.Record import EPCRecord
+from etl.bill_savings.KwhData import KwhData
 from recommendations.HeatingRecommender import HeatingRecommender
 from recommendations.tests.test_data.heating_recommendations_data import testing_examples
 
@@ -15,18 +17,32 @@ class TestHeatingRecommendations:
             bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
         )
 
+    @pytest.fixture
+    def cleaned(self):
+        df = read_from_s3(
+            s3_file_name="cleaned_epc_data/cleaned.bson",
+            bucket_name="retrofit-data-dev"
+        )
+
+        df = msgpack.unpackb(df, raw=False)
+        return df
+
+    @pytest.fixture
+    def kwh_client(self):
+        return KwhData(bucket="retrofit-data-dev", read_consumption_data=True)
+
     @pytest.mark.parametrize(
         "test_case",
         testing_examples
     )
-    def test_recommend(self, test_case, cleaning_data):
+    def test_recommend(self, test_case, cleaning_data, cleaned, kwh_client):
         """
         With this function, we test out multiple heating descriptions and check which recomendations
         we retrieve alongside them
         :return:
         """
 
-        epc_records = {"original_epc": test_case["epc"], "full_sap_epc": {}, "old_data": []}
+        epc_records = {"original_epc": test_case["epc"].copy(), "full_sap_epc": {}, "old_data": []}
 
         epc_record = EPCRecord(
             epc_records=epc_records,
@@ -38,8 +54,15 @@ class TestHeatingRecommendations:
             id=0,
             postcode=test_case["epc"]["postcode"],
             address=test_case["epc"]["address"],
-            epc_record=epc_record
+            epc_record=epc_record,
+            energy_assessment={
+                "condition": {},
+                "energy_assessment_is_newer": False
+            }
         )
+        # TODO: Implement me
+        kwh_predictions = test_case["kwhs"]
+        p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=kwh_predictions)
 
         recommender = HeatingRecommender(property_instance=p)
         # Check they're empty

From f122ae32693372f54552f6976eceaa9f1218bf58 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 24 Aug 2024 15:07:51 +0100
Subject: [PATCH 141/182] minor prep ahead of orbit & wates

---
 etl/customers/orbit/archetypes.py | 40 +++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)
 create mode 100644 etl/customers/orbit/archetypes.py

diff --git a/etl/customers/orbit/archetypes.py b/etl/customers/orbit/archetypes.py
new file mode 100644
index 00000000..2a2e0baf
--- /dev/null
+++ b/etl/customers/orbit/archetypes.py
@@ -0,0 +1,40 @@
+import pandas as pd
+
+
+def main():
+    """
+    Some rough and ready analysis to get a view of what the achetypes could be, ahead of a meeting with Wates
+    on the 28th Aug 2024
+    :return:
+    """
+
+    all_assets = pd.read_excel(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Orbit - Wates/Bexley Wave 3 Project - external - "
+        "reduced.xlsx",
+        sheet_name="Full Property List",
+        header=1
+    )
+
+    secondary_cols = ["" if pd.isnull(x) else x for x in all_assets.iloc[0, :].values]
+    new_colnames = [
+        "+".join([all_assets.columns[i], secondary_cols[i]]) if secondary_cols[i] else all_assets.columns[i]
+        for i, c in enumerate(all_assets.columns)
+    ]
+    # Drop row 0
+
+    locations = {
+        location_name: pd.read_excel(
+            "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Orbit - Wates/Bexley Wave 3 Project - external - "
+            "reduced"
+            ".xlsx",
+            sheet_name=location_name
+        ) for location_name in [
+            "Forest Road Erith",
+            "Lesney Farms",
+            "Brook Street 155 - 243",
+            "Hazel Drive",
+            "Page Crescent",
+            "Brook Salmon Roberts and Chapma",
+            "Beacon Road"
+        ]
+    }

From 4c71342cfb0c9487c243c79025f86ce4ce03171a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 27 Aug 2024 14:43:18 +0100
Subject: [PATCH 142/182] improving SearchEpc matching algorithm

---
 backend/SearchEpc.py              |  16 +-
 etl/customers/orbit/archetypes.py | 264 +++++++++++++++++++++++++++---
 2 files changed, 257 insertions(+), 23 deletions(-)

diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py
index 37c2b7f9..fd6ea032 100644
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@@ -292,8 +292,7 @@ class SearchEpc:
                         "error": str(e)
                     }
 
-    @staticmethod
-    def filter_rows(rows, property_type=None, address=None):
+    def filter_rows(self, rows, property_type=None, address=None):
         """
         This method should not be used when property_type and address are both not None
         :param rows:
@@ -321,7 +320,18 @@ class SearchEpc:
 
         if address is not None:
             # We can do a filter on the property type
-            best_match = process.extractOne(address, [r["address"] for r in rows], score_cutoff=0)
+            # We check if the full address contains the postcode and if it does, remove
+            if self.postcode in address:
+                address = address.replace(self.postcode, "").strip().rstrip(",")
+
+            # We check if post town is included in the address
+            if any([r["posttown"].lower() in address.lower() for r in rows]):
+                best_match = process.extractOne(
+                    address, [", ".join([r["address"], r["posttown"]]) for r in rows], score_cutoff=0
+                )
+            else:
+                best_match = process.extractOne(address, [r["address"] for r in rows], score_cutoff=0)
+            # Get all of the scores
             rows_filtered = [r for r in rows if r["address"] == best_match[0]]
 
             if rows_filtered:
diff --git a/etl/customers/orbit/archetypes.py b/etl/customers/orbit/archetypes.py
index 2a2e0baf..e0f5e995 100644
--- a/etl/customers/orbit/archetypes.py
+++ b/etl/customers/orbit/archetypes.py
@@ -1,4 +1,24 @@
 import pandas as pd
+import numpy as np
+from backend.SearchEpc import SearchEpc
+from dotenv import load_dotenv
+from tqdm import tqdm
+import os
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+
+def clean_colnames(df):
+    secondary_cols = ["" if pd.isnull(x) else x for x in df.iloc[0, :].values]
+    new_colnames = [
+        "+".join([df.columns[i], secondary_cols[i]]) if secondary_cols[i] else df.columns[i]
+        for i, c in enumerate(df.columns)
+    ]
+    # Drop row 0
+    df = df.drop(0)
+    df.columns = new_colnames
+    return df
 
 
 def main():
@@ -8,33 +28,237 @@ def main():
     :return:
     """
 
+    all_locations = [
+        "Forest Road Erith",
+        "Lesney Farms",
+        "Brook Street 155 - 243",
+        "Hazel Drive",
+        "Page Crescent",
+        "Brook Salmon Roberts and Chapma",
+        "Beacon Road"
+    ]
+
     all_assets = pd.read_excel(
         "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Orbit - Wates/Bexley Wave 3 Project - external - "
         "reduced.xlsx",
         sheet_name="Full Property List",
         header=1
     )
-
-    secondary_cols = ["" if pd.isnull(x) else x for x in all_assets.iloc[0, :].values]
-    new_colnames = [
-        "+".join([all_assets.columns[i], secondary_cols[i]]) if secondary_cols[i] else all_assets.columns[i]
-        for i, c in enumerate(all_assets.columns)
-    ]
-    # Drop row 0
+    all_assets = clean_colnames(all_assets)
+    all_assets["Location"] = None
 
     locations = {
-        location_name: pd.read_excel(
+        location_name: clean_colnames(pd.read_excel(
             "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Orbit - Wates/Bexley Wave 3 Project - external - "
-            "reduced"
-            ".xlsx",
-            sheet_name=location_name
-        ) for location_name in [
-            "Forest Road Erith",
-            "Lesney Farms",
-            "Brook Street 155 - 243",
-            "Hazel Drive",
-            "Page Crescent",
-            "Brook Salmon Roberts and Chapma",
-            "Beacon Road"
-        ]
+            "reduced.xlsx",
+            sheet_name=location_name,
+            header=1
+        )) for location_name in all_locations
+    }
+
+    for loc in all_locations:
+        all_assets["Location"] = np.where(
+            all_assets["Asset Reference"].isin(locations[loc]["Asset Reference"]),
+            loc,
+            all_assets["Location"]
+        )
+
+    if pd.isnull(all_assets["Location"]).sum():
+        raise Exception("something went wrong")
+
+    # 234 properties below EPC C
+    below_epc_c = all_assets[all_assets["PRE CALCULATED EPC"].isin(["D", "E", "F", "G"])].copy()
+
+    # We simplify wall type
+    below_epc_c["wall_type_simplified"] = below_epc_c["Wall Type"].str.split(" ").str[0]
+
+    known_no_epc = [
+        28679,  # These is no EPC for 11 Page Crescent, Erith, Kent, DA8 2HJ, just 11A
+        29291,  # No EPC for 225 Slade Green Road, Erith, Kent, DA8 2JW
+    ]
+    # Get the EPC data
+    epc_data = []
+    for _, home in tqdm(all_assets.iterrows(), total=len(all_assets)):
+        if home["Asset Reference"] in known_no_epc:
+            continue
+
+        address = home["Address"]
+        # Spelling error
+        if "Frinstead" in address:
+            address = address.replace("Frinstead", "Frinsted")
+
+        address1 = address.split(",")[0]
+
+        searcher = SearchEpc(
+            address1=address1,
+            postcode=home["Address - Postcode"],
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key="",
+            full_address=address,
+        )
+        searcher.ordnance_survey_client.property_type = None
+        searcher.ordnance_survey_client.built_form = None
+
+        searcher.find_property(skip_os=True)
+        if searcher.newest_epc is None:
+            raise Exception("Couldn't find")
+
+        epc_data.append(
+            {
+                "Asset Reference": home["Asset Reference"],
+                **searcher.newest_epc.copy()
+            }
+        )
+
+    epc_data = pd.DataFrame(epc_data)
+    # epc_data.to_csv(
+    #     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Orbit - Wates/Bexley EPC data.csv", index=False
+    # )
+
+    epc_comparison = all_assets[
+        ['Asset Reference', 'Address', 'PRE CALCULATED EPC']
+    ].merge(
+        epc_data[["Asset Reference", "current-energy-rating", "lodgement-date"]],
+        on='Asset Reference',
+        how="left"
+    )
+
+    # There are a large # of properties (147) that have different pre calcualted EPC rating, to what's on the registry
+    # These may be internally held EPRs but this may inform which properties we might want to prioritise for survey
+    different_epcs = epc_comparison[
+        epc_comparison["PRE CALCULATED EPC"] != epc_comparison["current-energy-rating"]
+        ]
+
+    not_c = different_epcs[
+        (different_epcs["PRE CALCULATED EPC"] == "C") &
+        (different_epcs["current-energy-rating"] != "C")
+        ]
+
+    system_builds = below_epc_c[
+        below_epc_c["Wall Type"].str.contains("SystemBuilt")
+    ].copy()
+
+    combinations = system_builds[
+        ['Asset Type', 'Property Type', 'Location', 'PRE CALCULATED EPC', 'Wall Type', ]
+    ].drop_duplicates()
+
+    system_build_data_comparison = system_builds.merge(
+        epc_data[["Asset Reference", "walls-description", "roof-description", "current-energy-rating"]],
+        left_on='Asset Reference',
+        right_on='Asset Reference',
+        how="left"
+    )
+
+    system_build_data_comparison["PRE CALCULATED EPC"].value_counts()
+    system_build_data_comparison["current-energy-rating"].value_counts()
+
+    epc_cs_system_builds = system_build_data_comparison[system_build_data_comparison["current-energy-rating"] == "C"]
+
+    archetype_columns = [
+        ["Asset Type", "Property Type", "Wall Type", "Location"],
+        ["Asset Type", "Property Type", "Location"],
+        ["Asset Type", "Property Type", "Wall Type", "Location", "PRE CALCULATED EPC", "roof-description"],
+        ["Asset Type", "Property Type", "Location", "PRE CALCULATED EPC"]
+    ]
+
+    summary = []
+    for cols in archetype_columns:
+        combinations = system_build_data_comparison[cols].drop_duplicates()
+        summary.append(
+            {
+                "cols": cols,
+                "number_archetypes": len(combinations),
+            }
+        )
+
+    summary = pd.DataFrame(summary)
+
+    # Let's use this column combination
+    chosen_combination = [
+        "Asset Type", "Property Type", "Wall Type", "Location", "PRE CALCULATED EPC", "roof-description"
+    ]
+
+    # For this combination, let's find the properties
+    archetype_combinations = system_build_data_comparison[chosen_combination].drop_duplicates().reset_index(drop=True)
+    archetype_combinations["archetype ID"] = archetype_combinations.index
+
+    archetyped_data = system_build_data_comparison.merge(
+        archetype_combinations, how="left", on=chosen_combination
+    )
+
+    counts = archetyped_data["archetype ID"].value_counts()
+    # Archetype 0: Semi D, Uninsulated system built, Pre calculated EPC D, flat insulated roof, (Lesney-0)
+    # Archetype 1: Semi D, Externally insulated system built, Pre calculated EPC D, flat insulated roof (Lesney-1)
+    # Archetype 5: Semi D, System built with unknown insulation, Pre calculated EPC D, flat roof insulated (Lesney-2)
+    # Archetype 3: Semi D, Externally insulated system built, Pre calculated EPC D, flat roof uninsulated (assumed) (
+    # Lesney-3)
+    # 0     21
+    # 1     10
+    # 5     10
+    # 3      3
+    # 2      1
+    # 4      1
+    # 6      1
+    # 7      1
+    # 8      1
+    # 9      1
+    # 10     1
+    # 11     1
+
+    # This archetype is the same as 0, apart from the pre calculate EPC being an E. The registry says this is a D
+    # This has been added to additonal units
+    eg1 = archetyped_data[archetyped_data["archetype ID"] == 2]
+
+    # This archetype is the same as 3, apart from it having limited flat roof insulation.
+    # TODO: The insulation status of this property should be confirmed
+    eg2 = archetyped_data[archetyped_data["archetype ID"] == 4]
+    eg2["roof-description"]
+    z = epc_data[epc_data["Asset Reference"] == eg2["Asset Reference"].values[0]]
+
+    # This is the one mid-terrace - the EPC data indicates that this is Semi-detached
+    # Otherwise this is archetype 5
+    # this should be semi-detached
+    eg3 = archetyped_data[archetyped_data["archetype ID"] == 6]
+    eg3_epc_data = epc_data[epc_data["Asset Reference"] == eg3["Asset Reference"].values[0]]
+
+    # This warrants its own archetype
+    # Semi D, System built with unknown insulation, Pre calculated EPC D, flat uninsulated roof
+    eg4 = archetyped_data[archetyped_data["archetype ID"] == 7]
+
+    # This property stands out due to the mixed cavity and system built wall, but besides that it's similar to
+    # archetype 0
+    # The latest EPC agrees that this is a mixed wall type but the EPC suggests solid and cavity, with an assumed
+    # insulated cavity, as built
+    eg5 = archetyped_data[archetyped_data["archetype ID"] == 8]
+
+    # Archetypes 9, 10, 11 are all similar, Semi D, Uninsulated system built, with pitched lofts with up to 200mm
+    # insulation in the lofts
+    eg6 = archetyped_data[archetyped_data["archetype ID"] == 9]
+
+    # It's just the three units
+    # They're all labelled as
+    pitched_system_built_properties = archetyped_data[archetyped_data["archetype ID"].isin([9, 10, 11])]
+    pitched_system_built_properties["Address"]
+
+    notes = [
+        {
+            "Asset Reference": 27445,
+            "note": "Confirmed this has a pitched roof on Maps"
+        },
+        {
+            "Asset Reference": 27443,
+            "note": "Confirmed this has a pitched roof on Maps"
+        },
+        {
+            "Asset Reference": 27442,
+            "note": "Confirmed this has a pitched roof on Maps"
+        },
+        {
+            "Asset Reference": 25847,
+            "note": "This is labelled as a mid-terrace but the EPC data + Maps suggest it's a semi-detached"
+        }
+    ]
+
+    patches = {
+        25847: {"Property Type": "Semi Detached House", "archetype ID": 5},
     }

From 2890ff13cdf6cb7d1bdb3aa6624b2821327d0b80 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 27 Aug 2024 14:48:40 +0100
Subject: [PATCH 143/182] fixed new bug In added

---
 backend/SearchEpc.py              | 6 ++++--
 etl/customers/orbit/archetypes.py | 9 ++++++++-
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py
index fd6ea032..5f101d81 100644
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@@ -329,10 +329,12 @@ class SearchEpc:
                 best_match = process.extractOne(
                     address, [", ".join([r["address"], r["posttown"]]) for r in rows], score_cutoff=0
                 )
+                # Get all of the scores
+                rows_filtered = [r for r in rows if ", ".join([r["address"], r["posttown"]]) == best_match[0]]
             else:
                 best_match = process.extractOne(address, [r["address"] for r in rows], score_cutoff=0)
-            # Get all of the scores
-            rows_filtered = [r for r in rows if r["address"] == best_match[0]]
+                # Get all of the scores
+                rows_filtered = [r for r in rows if r["address"] == best_match[0]]
 
             if rows_filtered:
                 return rows_filtered
diff --git a/etl/customers/orbit/archetypes.py b/etl/customers/orbit/archetypes.py
index e0f5e995..73665bcb 100644
--- a/etl/customers/orbit/archetypes.py
+++ b/etl/customers/orbit/archetypes.py
@@ -89,6 +89,13 @@ def main():
 
         address1 = address.split(",")[0]
 
+        asset_type_map = {
+            "HOUSE": "House",
+            "BUNGALOWS": "Bungalow",
+            "FLATS": "Flat",
+            "MAISONETTES": "Maisonette",
+        }
+
         searcher = SearchEpc(
             address1=address1,
             postcode=home["Address - Postcode"],
@@ -96,7 +103,7 @@ def main():
             os_api_key="",
             full_address=address,
         )
-        searcher.ordnance_survey_client.property_type = None
+        searcher.ordnance_survey_client.property_type = asset_type_map[home["Asset Type"]]
         searcher.ordnance_survey_client.built_form = None
 
         searcher.find_property(skip_os=True)

From e2e9721605c1e68dbf8cc788a3624dc0258bffe7 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 29 Aug 2024 11:13:30 +0100
Subject: [PATCH 144/182] set up template for heating recommendation testing

---
 etl/customers/orbit/archetypes.py             | 234 ++++++++++++++----
 .../test_data/heating_recommendations_data.py |  18 +-
 .../tests/test_heating_recommendations.py     |  48 +++-
 3 files changed, 247 insertions(+), 53 deletions(-)

diff --git a/etl/customers/orbit/archetypes.py b/etl/customers/orbit/archetypes.py
index 73665bcb..988da74f 100644
--- a/etl/customers/orbit/archetypes.py
+++ b/etl/customers/orbit/archetypes.py
@@ -21,7 +21,7 @@ def clean_colnames(df):
     return df
 
 
-def main():
+def lesney_farms():
     """
     Some rough and ready analysis to get a view of what the achetypes could be, ahead of a meeting with Wates
     on the 28th Aug 2024
@@ -150,16 +150,25 @@ def main():
     ].drop_duplicates()
 
     system_build_data_comparison = system_builds.merge(
-        epc_data[["Asset Reference", "walls-description", "roof-description", "current-energy-rating"]],
+        epc_data[
+            ["Asset Reference", "walls-description", "roof-description", "current-energy-rating", "lodgement-date",
+             "current-energy-efficiency"]],
         left_on='Asset Reference',
         right_on='Asset Reference',
         how="left"
     )
 
-    system_build_data_comparison["PRE CALCULATED EPC"].value_counts()
-    system_build_data_comparison["current-energy-rating"].value_counts()
+    # Apply patches
+    patches = {
+        25847: {"Property Type": "Semi Detached House"},
+    }
 
-    epc_cs_system_builds = system_build_data_comparison[system_build_data_comparison["current-energy-rating"] == "C"]
+    for asset_ref, patch in patches.items():
+        for k, v in patch.items():
+            system_build_data_comparison.loc[
+                system_build_data_comparison["Asset Reference"] == asset_ref,
+                k
+            ] = v
 
     archetype_columns = [
         ["Asset Type", "Property Type", "Wall Type", "Location"],
@@ -194,53 +203,34 @@ def main():
     )
 
     counts = archetyped_data["archetype ID"].value_counts()
-    # Archetype 0: Semi D, Uninsulated system built, Pre calculated EPC D, flat insulated roof, (Lesney-0)
+    # Archetype 0: Semi D, As built system built, Pre calculated EPC D, flat insulated roof, (Lesney-0)
     # Archetype 1: Semi D, Externally insulated system built, Pre calculated EPC D, flat insulated roof (Lesney-1)
-    # Archetype 5: Semi D, System built with unknown insulation, Pre calculated EPC D, flat roof insulated (Lesney-2)
+    # Archetype 4: Semi D, System built with unknown insulation, Pre calculated EPC D, flat roof insulated (Lesney-2)
     # Archetype 3: Semi D, Externally insulated system built, Pre calculated EPC D, flat roof uninsulated (assumed) (
     # Lesney-3)
-    # 0     21
-    # 1     10
-    # 5     10
-    # 3      3
-    # 2      1
-    # 4      1
-    # 6      1
-    # 7      1
-    # 8      1
-    # 9      1
-    # 10     1
-    # 11     1
+    # 0    21
+    # 1    11
+    # 4    11
+    # 3     3
+    # 2     1
+    # 5     1
+    # 6     1
+    # 7     1
+    # 8     1
+    # 9     1
 
     # This archetype is the same as 0, apart from the pre calculate EPC being an E. The registry says this is a D
     # This has been added to additonal units
     eg1 = archetyped_data[archetyped_data["archetype ID"] == 2]
 
-    # This archetype is the same as 3, apart from it having limited flat roof insulation.
-    # TODO: The insulation status of this property should be confirmed
-    eg2 = archetyped_data[archetyped_data["archetype ID"] == 4]
-    eg2["roof-description"]
-    z = epc_data[epc_data["Asset Reference"] == eg2["Asset Reference"].values[0]]
+    # Semi D, System built with unknown insulation, Pre calculated EPC D, flat roof insulated
+    # This looks like it would fit either in archetype
+    eg2 = archetyped_data[archetyped_data["archetype ID"] == 5]
 
-    # This is the one mid-terrace - the EPC data indicates that this is Semi-detached
-    # Otherwise this is archetype 5
-    # this should be semi-detached
     eg3 = archetyped_data[archetyped_data["archetype ID"] == 6]
-    eg3_epc_data = epc_data[epc_data["Asset Reference"] == eg3["Asset Reference"].values[0]]
 
-    # This warrants its own archetype
-    # Semi D, System built with unknown insulation, Pre calculated EPC D, flat uninsulated roof
-    eg4 = archetyped_data[archetyped_data["archetype ID"] == 7]
-
-    # This property stands out due to the mixed cavity and system built wall, but besides that it's similar to
-    # archetype 0
-    # The latest EPC agrees that this is a mixed wall type but the EPC suggests solid and cavity, with an assumed
-    # insulated cavity, as built
-    eg5 = archetyped_data[archetyped_data["archetype ID"] == 8]
-
-    # Archetypes 9, 10, 11 are all similar, Semi D, Uninsulated system built, with pitched lofts with up to 200mm
+    # Archetypes 7, 8, 9 are all similar, Semi D, Uninsulated system built, with pitched lofts with up to 200mm
     # insulation in the lofts
-    eg6 = archetyped_data[archetyped_data["archetype ID"] == 9]
 
     # It's just the three units
     # They're all labelled as
@@ -266,6 +256,164 @@ def main():
         }
     ]
 
-    patches = {
-        25847: {"Property Type": "Semi Detached House", "archetype ID": 5},
-    }
+    # These are As Built, System Built
+    system_built_streets = (
+        archetyped_data["Address"].str.split(",").str[0].str.split(" ").str[1].unique()
+    )
+
+    all_assets_w_epcs = all_assets.merge(epc_data, on="Asset Reference", how="left")
+
+    # Grab all of the properties on this street that aren't system built
+    streets_not_system_builds = all_assets_w_epcs[
+        all_assets_w_epcs["Address"].str.split(",").str[0].str.split(" ").str[1].isin(system_built_streets) &
+        ~all_assets_w_epcs["Wall Type"].str.contains("SystemBuilt")
+        ]
+
+    system_builds = archetyped_data[
+        archetyped_data["Wall Type"].str.contains("SystemBuilt")
+    ][["Asset Reference", "Address", "Wall Type", "walls-description"]].sort_values("Address")
+
+    birling_street_system_builds = system_builds[system_builds["Address"].str.contains("Birling")]
+    halstead_street_system_builds = system_builds[system_builds["Address"].str.contains("Halstead")]
+    brasted_street_system_builds = system_builds[system_builds["Address"].str.contains("Brasted")]
+    frinstead_street_system_builds = system_builds[
+        system_builds["Address"].str.contains("Frinstead") | system_builds["Address"].str.contains("Frinsted")
+        ]
+
+    pd.set_option('display.max_rows', 500)
+    pd.set_option('display.max_columns', 500)
+    pd.set_option('display.width', 1000)
+    streets_not_system_builds[["Asset Reference", "Address", "Wall Type", "walls-description"]]
+
+    system_builds[system_builds["Address"].str.contains("Birling")]
+
+    # Possible System Builds
+
+    # Create the proposed sample
+    # lesney-0
+    archetyped_data["lodgement-date"] = pd.to_datetime(archetyped_data["lodgement-date"])
+
+    lesney_0 = archetyped_data[archetyped_data["archetype ID"] == 0].copy()
+    # Get the oldest EPC per postcode
+    lesney_0 = lesney_0.sort_values(["Address - Postcode", "lodgement-date"])
+    lesney_0[["Address", "Address - Postcode", "lodgement-date"]]
+
+    lesney_1 = archetyped_data[archetyped_data["archetype ID"] == 1].copy()
+    lesney_1 = lesney_1.sort_values(["Address - Postcode", "lodgement-date"])
+    lesney_1[["Address", "Address - Postcode", "lodgement-date"]]
+
+    lesney_2 = archetyped_data[archetyped_data["archetype ID"] == 4].copy()
+    lesney_2 = lesney_2.sort_values(["Address - Postcode", "lodgement-date"])
+    lesney_2[["Address", "Address - Postcode", "lodgement-date"]]
+
+    lesney_3 = archetyped_data[archetyped_data["archetype ID"] == 3].copy()
+    lesney_3 = lesney_3.sort_values(["Address - Postcode", "lodgement-date"])
+    lesney_3[["Address", "Address - Postcode", "lodgement-date", "roof-description"]]
+
+    # Get the pitched roof properties, which are lesney-4
+    lesney_4 = archetyped_data[archetyped_data["archetype ID"].isin([7, 8, 9])].copy()
+    lesney_4 = lesney_4.sort_values(["Address - Postcode", "lodgement-date"])
+    lesney_4[["Address", "Address - Postcode", "lodgement-date", "roof-description"]]
+
+    assigned_archetypes = archetyped_data[
+        ["Asset Reference", "archetype ID", "Address"] + chosen_combination +
+        ["lodgement-date", "current-energy-rating", "current-energy-efficiency", "walls-description"]
+        ].copy()
+    # Map the archetype ID to their string representation
+    assigned_archetypes["archetype ID"] = assigned_archetypes["archetype ID"].replace(
+        {
+            0: "Lesney-0",
+            1: "Lesney-1",
+            4: "Lesney-2",
+            3: "Lesney-3",
+            7: "Lesney-4",
+            8: "Lesney-4",
+            9: "Lesney-4",
+            2: "Lesney-0",
+            5: "Lesney-2",
+            6: "Lesney-0",
+        }
+    )
+
+    assigned_archetypes["Asset Reference"] = assigned_archetypes["Asset Reference"].astype(int)
+
+    assigned_archetypes.to_csv(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Orbit - Wates/assigned_archetypes.csv", index=False
+    )
+
+
+def culworth_court():
+    """
+    Some rough works on Cuthwork Court
+
+    They're looking at an ASHP/GSHP
+
+    :return:
+    """
+
+    asset_list = pd.read_excel(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Orbit - Wates/001 - EPC CULWORTH COURT.xlsx",
+        sheet_name="EPC C",
+        header=1
+    )
+    asset_list = clean_colnames(asset_list)
+
+    # Let's get the EPC data
+    # Get the EPC data
+    epc_data = []
+    for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
+
+        address = home["Address"]
+        # Spelling error
+        if "Frinstead" in address:
+            address = address.replace("Frinstead", "Frinsted")
+
+        address1 = address.split(",")[0]
+
+        asset_type_map = {
+            "HOUSE": "House",
+            "BUNGALOWS": "Bungalow",
+            "FLATS": "Flat",
+            "MAISONETTES": "Maisonette",
+        }
+
+        searcher = SearchEpc(
+            address1=address1,
+            postcode=home["Address - Postcode"],
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key="",
+            full_address=address,
+        )
+        searcher.ordnance_survey_client.property_type = asset_type_map[home["Asset Type"]]
+        searcher.ordnance_survey_client.built_form = None
+
+        searcher.find_property(skip_os=True)
+        if searcher.newest_epc is None:
+            raise Exception("Couldn't find")
+
+        epc_data.append(
+            {
+                "Asset Reference": home["Asset Reference"],
+                **searcher.newest_epc.copy()
+            }
+        )
+    epc_data = pd.DataFrame(epc_data)
+
+    asset_list = asset_list.merge(epc_data, on="Asset Reference", how="left")
+    asset_list["floor-level"] = np.where(
+        asset_list["floor-level"] == "NODATA!",
+        "",
+        asset_list["floor-level"]
+    )
+
+    asset_list["built-form"] = np.where(
+        asset_list["built-form"] == "Enclosed End-Terrace",
+        "End-Terrace",
+        asset_list["built-form"]
+    )
+
+    archetype_combinations = asset_list[
+        ["Asset Type", "Property Type", "built-form", "floor-level"]
+    ].drop_duplicates()
+
+    z = asset_list[asset_list["built-form"] == "Enclosed End-Terrace"]
diff --git a/recommendations/tests/test_data/heating_recommendations_data.py b/recommendations/tests/test_data/heating_recommendations_data.py
index 0656e917..8bc43efb 100644
--- a/recommendations/tests/test_data/heating_recommendations_data.py
+++ b/recommendations/tests/test_data/heating_recommendations_data.py
@@ -55,11 +55,17 @@ testing_examples = [
             'fixed-lighting-outlets-count': 10.0, 'low-energy-fixed-light-count': 7.0, 'uprn': 100110195416.0,
             'uprn-source': 'Address Matched'
         },
-        "kwh": {
-
-        },
-        "recommendation_descripptions": [
-
-        ]
+        "heating_recommendation_descriptions": [
+            "Install an air source heat pump, and upgrade heating controls to Smart Thermostats, room sensors and "
+            "smart radiator valves (time & temperature zone control). The cost includes the £7500 boiler upgrade "
+            "scheme grant",
+        ],
+        "heating_controls_recommendation_descriptions": [
+            "Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves (time & "
+            "temperature zone control)"
+        ],
+        "notes": "This property has a boiler, radiators & mains gas with good efficiency so the only recommendation"
+                 "we expect here is for an air source heat pump. The heating controls are a programmer, room thermostat"
+                 "and TRVs and so we should expect a TTZC recommendation"
     }
 ]
diff --git a/recommendations/tests/test_heating_recommendations.py b/recommendations/tests/test_heating_recommendations.py
index 76927702..35373729 100644
--- a/recommendations/tests/test_heating_recommendations.py
+++ b/recommendations/tests/test_heating_recommendations.py
@@ -1,3 +1,4 @@
+from datetime import datetime
 import pandas as pd
 import msgpack
 from utils.s3 import read_dataframe_from_s3_parquet, read_from_s3
@@ -29,7 +30,18 @@ class TestHeatingRecommendations:
 
     @pytest.fixture
     def kwh_client(self):
-        return KwhData(bucket="retrofit-data-dev", read_consumption_data=True)
+        client = KwhData(bucket="retrofit-data-dev", read_consumption_data=False)
+        # We fix this pricing table for these tests
+        client.retail_price_comparison = pd.DataFrame(
+            [
+                {
+                    "Date": datetime.today().strftime("%Y-%m-%d"),
+                    'Average standard variable tariff (Large legacy suppliers)': 1
+                }
+            ]
+        )
+        client.retail_price_comparison["Date"] = pd.to_datetime(client.retail_price_comparison["Date"])
+        return client
 
     @pytest.mark.parametrize(
         "test_case",
@@ -60,8 +72,21 @@ class TestHeatingRecommendations:
                 "energy_assessment_is_newer": False
             }
         )
-        # TODO: Implement me
-        kwh_predictions = test_case["kwhs"]
+
+        # For these tests, this can be fixed
+        kwh_predictions = {
+            "heating_kwh_predictions": pd.DataFrame(
+                [
+                    {"id": p.uprn, "predictions": 12000}
+                ]
+            ),
+            "hotwater_kwh_predictions": pd.DataFrame(
+                [
+                    {"id": p.uprn, "predictions": 3000}
+                ]
+            ),
+        }
+
         p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=kwh_predictions)
 
         recommender = HeatingRecommender(property_instance=p)
@@ -71,4 +96,19 @@ class TestHeatingRecommendations:
 
         recommender.recommend(has_cavity_or_loft_recommendations=False)
 
-        # TODO: We check results against expected behaviour
+        assert len(recommender.heating_recommendations) == len(test_case["heating_recommendation_descriptions"])
+        assert (
+            len(recommender.heating_control_recommendations) ==
+            len(test_case["heating_controls_recommendation_descriptions"])
+        )
+
+        # Check the exact descriptions
+        assert (
+            {x["description"] for x in recommender.heating_recommendations} ==
+            set(test_case["heating_recommendation_descriptions"])
+        )
+
+        assert (
+            {x["description"] for x in recommender.heating_control_recommendations} ==
+            set(test_case["heating_controls_recommendation_descriptions"])
+        )

From 8004d2f1263bec4d83e2344224f76ace5bb4fd95 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 31 Aug 2024 15:18:05 +0100
Subject: [PATCH 145/182] adding heating unit tests

---
 backend/apis/GoogleSolarApi.py                |  12 +-
 backend/app/plan/schemas.py                   |   4 +-
 etl/customers/orbit/archetypes.py             |  85 +++++------
 .../orbit/funding_example_portfolio.py        | 141 ++++++++++++++++++
 recommendations/HeatingRecommender.py         |  10 +-
 recommendations/SolarPvRecommendations.py     |  18 ++-
 .../test_data/heating_recommendations_data.py | 135 +++++++++++++++--
 7 files changed, 336 insertions(+), 69 deletions(-)
 create mode 100644 etl/customers/orbit/funding_example_portfolio.py

diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py
index e930fcff..41ec7c11 100644
--- a/backend/apis/GoogleSolarApi.py
+++ b/backend/apis/GoogleSolarApi.py
@@ -148,7 +148,7 @@ class GoogleSolarApi:
         # Extract key data from the insights response
         self.roof_segments = self.insights_data["solarPotential"].get('roofSegmentStats', [])
         # Automatically exclude north-facing segments
-        self.exclude_north_facing_segments()
+        self.exclude_north_facing_segments(property_instance=property_instance)
         # If a property is semi-detached, it's possible for us to include segments from an attached unit
         if (property_instance.data["built-form"] == "Semi-Detached") and (
             property_instance.data["extension-count"] == 0
@@ -291,6 +291,8 @@ class GoogleSolarApi:
                 )
 
             roi_summary = pd.DataFrame(roi_summary)
+            if roi_summary.empty:
+                continue
 
             weighted_ratio = np.average(
                 roi_summary["ratio"].values, weights=roi_summary["generated_dc_energy"].values
@@ -309,7 +311,7 @@ class GoogleSolarApi:
                 }
             )
 
-        panel_performance = pd.DataFrame([panel_performance])
+        panel_performance = pd.DataFrame(panel_performance)
 
         if panel_performance.empty:
             self.panel_performance = pd.DataFrame(
@@ -487,7 +489,7 @@ class GoogleSolarApi:
 
         self.panel_performance = panel_performance
 
-    def exclude_north_facing_segments(self):
+    def exclude_north_facing_segments(self, property_instance):
         """
         Filter out any north-facing roof segments from the roof_segments attribute.
 
@@ -498,7 +500,9 @@ class GoogleSolarApi:
         for segment_index, segment in enumerate(self.roof_segments):
             segment["segmentIndex"] = segment_index
             # Check if the segment is north-facing
-            if self.NORTH_FACING_AZIMUTH_RANGE[0] <= segment['azimuthDegrees'] <= self.NORTH_FACING_AZIMUTH_RANGE[1]:
+            if (
+                self.NORTH_FACING_AZIMUTH_RANGE[0] <= segment['azimuthDegrees'] <= self.NORTH_FACING_AZIMUTH_RANGE[1]
+            ) and not property_instance.roof["is_flat"]:
                 continue
 
             filtered_segments.append(segment)
diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py
index 63ca7834..04a1eb89 100644
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@@ -35,7 +35,9 @@ class PlanTriggerRequest(BaseModel):
         "air_source_heat_pump",
         "internal_wall_insulation",
         "external_wall_insulation",
-        "secondary_heating"
+        "secondary_heating",
+        "boiler_upgrade",
+        "high_heat_retention_storage_heater",
     }
 
     _allowed_goals = {"Increasing EPC"}
diff --git a/etl/customers/orbit/archetypes.py b/etl/customers/orbit/archetypes.py
index 988da74f..cee18267 100644
--- a/etl/customers/orbit/archetypes.py
+++ b/etl/customers/orbit/archetypes.py
@@ -77,47 +77,48 @@ def lesney_farms():
         29291,  # No EPC for 225 Slade Green Road, Erith, Kent, DA8 2JW
     ]
     # Get the EPC data
-    epc_data = []
-    for _, home in tqdm(all_assets.iterrows(), total=len(all_assets)):
-        if home["Asset Reference"] in known_no_epc:
-            continue
-
-        address = home["Address"]
-        # Spelling error
-        if "Frinstead" in address:
-            address = address.replace("Frinstead", "Frinsted")
-
-        address1 = address.split(",")[0]
-
-        asset_type_map = {
-            "HOUSE": "House",
-            "BUNGALOWS": "Bungalow",
-            "FLATS": "Flat",
-            "MAISONETTES": "Maisonette",
-        }
-
-        searcher = SearchEpc(
-            address1=address1,
-            postcode=home["Address - Postcode"],
-            auth_token=EPC_AUTH_TOKEN,
-            os_api_key="",
-            full_address=address,
-        )
-        searcher.ordnance_survey_client.property_type = asset_type_map[home["Asset Type"]]
-        searcher.ordnance_survey_client.built_form = None
-
-        searcher.find_property(skip_os=True)
-        if searcher.newest_epc is None:
-            raise Exception("Couldn't find")
-
-        epc_data.append(
-            {
-                "Asset Reference": home["Asset Reference"],
-                **searcher.newest_epc.copy()
-            }
-        )
-
-    epc_data = pd.DataFrame(epc_data)
+    # epc_data = []
+    # for _, home in tqdm(all_assets.iterrows(), total=len(all_assets)):
+    #     if home["Asset Reference"] in known_no_epc:
+    #         continue
+    #
+    #     address = home["Address"]
+    #     # Spelling error
+    #     if "Frinstead" in address:
+    #         address = address.replace("Frinstead", "Frinsted")
+    #
+    #     address1 = address.split(",")[0]
+    #
+    #     asset_type_map = {
+    #         "HOUSE": "House",
+    #         "BUNGALOWS": "Bungalow",
+    #         "FLATS": "Flat",
+    #         "MAISONETTES": "Maisonette",
+    #     }
+    #
+    #     searcher = SearchEpc(
+    #         address1=address1,
+    #         postcode=home["Address - Postcode"],
+    #         auth_token=EPC_AUTH_TOKEN,
+    #         os_api_key="",
+    #         full_address=address,
+    #     )
+    #     searcher.ordnance_survey_client.property_type = asset_type_map[home["Asset Type"]]
+    #     searcher.ordnance_survey_client.built_form = None
+    #
+    #     searcher.find_property(skip_os=True)
+    #     if searcher.newest_epc is None:
+    #         raise Exception("Couldn't find")
+    #
+    #     epc_data.append(
+    #         {
+    #             "Asset Reference": home["Asset Reference"],
+    #             **searcher.newest_epc.copy()
+    #         }
+    #     )
+    #
+    # epc_data = pd.DataFrame(epc_data)
+    epc_data = pd.read_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Orbit - Wates/Bexley EPC data.csv", )
     # epc_data.to_csv(
     #     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Orbit - Wates/Bexley EPC data.csv", index=False
     # )
@@ -316,7 +317,7 @@ def lesney_farms():
     lesney_4[["Address", "Address - Postcode", "lodgement-date", "roof-description"]]
 
     assigned_archetypes = archetyped_data[
-        ["Asset Reference", "archetype ID", "Address"] + chosen_combination +
+        ["Asset Reference", "archetype ID", "Address", "Address - Postcode"] + chosen_combination +
         ["lodgement-date", "current-energy-rating", "current-energy-efficiency", "walls-description"]
         ].copy()
     # Map the archetype ID to their string representation
diff --git a/etl/customers/orbit/funding_example_portfolio.py b/etl/customers/orbit/funding_example_portfolio.py
new file mode 100644
index 00000000..cf0e151f
--- /dev/null
+++ b/etl/customers/orbit/funding_example_portfolio.py
@@ -0,0 +1,141 @@
+import pandas as pd
+
+from utils.s3 import save_csv_to_s3
+
+USER_ID = 8
+PORTFOLIO_ID = 100
+
+
+def app():
+    """
+    This function sets up an asset list with just a few properties to model the impact of the following scenarios:
+    1) EWI
+    2) EWI + Solar
+    3) EWI + Solar + ASHP
+    :return:
+    """
+
+    asset_list = [
+        # This is an example of a low D - SAP score is 60
+        {
+            "address": "37, Birling Road",
+            "postcode": "DA8 3JQ",
+            "uprn": 100020225444
+        },
+        {
+            "address": "16, Brasted Road",
+            "postcode": "DA8 3HU",
+            "uprn": 100020225805
+        },
+        {
+            "address": "25, Birling Road",
+            "postcode": "DA8 3JQ",
+            "uprn": 100020225432,
+        },
+        {
+            "address": "4, Halstead Road",
+            "postcode": "DA8 3HX",
+            "uprn": 100020229555
+        }
+    ]
+    asset_list = pd.DataFrame(asset_list)
+
+    filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
+    save_csv_to_s3(
+        dataframe=asset_list,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    non_invasive_recs = []
+    for _, al in asset_list.iterrows():
+        solar_rec = {
+            "type": "solar_pv",
+            "suitable": True,
+            "array_wattage": 4000,
+            "initial_ac_kwh_per_year": 3800,
+            "cost": 4009,
+            "panneled_roof_area": 20  # Rough estimate for 10 panels, around 1m x 1.8m (accomodate gaps and 30cm edge)
+        }
+
+        non_invasive_recs.append({
+            "uprn": al["uprn"],
+            "recommendations": [solar_rec],
+        })
+
+    # Store non-invasive recommendations in S3
+    non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(non_invasive_recs),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=non_invasive_recommendations_filename
+    )
+
+    body1 = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increasing EPC",
+        "goal_value": "A",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": "",
+        "scenario_name": "ECO4 funding - EWI",
+        "multi_plan": True,
+        "exclusions": [
+            "internal_wall_insulation",
+            "roof_insulation", "ventilation", "floor_insulation", "windows", "fireplace", "heating", "hot_water",
+            "lighting", "secondary_heating", "solar_pv"
+        ],
+        "budget": None,
+    }
+    print(body1)
+
+    body2 = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increasing EPC",
+        "goal_value": "A",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+        "scenario_name": "ECO4 funding - EWI + Solar",
+        "multi_plan": True,
+        "exclusions": [
+            "internal_wall_insulation",
+            "roof_insulation",
+            "ventilation",
+            "floor_insulation",
+            "windows",
+            "fireplace",
+            "heating",
+            "hot_water",
+            "lighting",
+            "secondary_heating",
+            "boiler_upgrade",
+            "high_heat_retention_storage_heater",
+        ],
+        "budget": None,
+    }
+    print(body2)
+
+    body3 = {
+        "portfolio_id": str(PORTFOLIO_ID),
+        "housing_type": "Private",
+        "goal": "Increasing EPC",
+        "goal_value": "A",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+        "scenario_name": "ECO4 funding - EWI + Solar + ASHP",
+        "multi_plan": True,
+        "exclusions": [
+            "internal_wall_insulation",
+            "roof_insulation", "ventilation", "floor_insulation", "windows", "fireplace", "hot_water",
+            "lighting", "secondary_heating",
+        ],
+        "budget": None,
+    }
+    print(body3)
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index d8e597e7..edac68b5 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -72,7 +72,10 @@ class HeatingRecommender:
         # This first iteration of the recommender will provide very basic recommendation
         # We recommend heating controls based on the main heating system
 
-        if self.is_high_heat_retention_valid() and not ashp_only_heating_recommendation:
+        if (self.is_high_heat_retention_valid() and
+            (not ashp_only_heating_recommendation) and
+            ("boiler_upgrade" not in exclusions)
+        ):
             # Recommend high heat retention storage heaters
             # TODO: We need to allow for the possibility that the property aleady has storage heaters, but just
             #       needs the controls
@@ -106,7 +109,10 @@ class HeatingRecommender:
             electic_heating_has_mains or
             has_gas_heaters or
             portable_heaters_has_mains
-        ) and not ashp_only_heating_recommendation):
+        ) and
+            (not ashp_only_heating_recommendation) and
+            ("boiler_upgrade" not in exclusions)
+        ):
             # This indicates that the home previously did not have a boiler in place and so would require
             # an overhaul to the system - right now, this is all reasons, apart from if there is an existing boiler
             system_change = not has_boiler
diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index 9456519a..d0d555c9 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -99,7 +99,11 @@ class SolarPvRecommendations:
         best_configurations = panel_performance.head(1).reset_index(drop=True)
 
         for rank, recommendation_config in best_configurations.iterrows():
-            roof_coverage_percent = round(recommendation_config["panneled_roof_area"] / total_roof_area * 100)
+            # If we dont have the panneled_roof_area in the recommendation_config we calculate it
+            if recommendation_config.get("panneled_roof_area", None):
+                roof_coverage_percent = round(recommendation_config["panneled_roof_area"] / total_roof_area * 100)
+            else:
+                raise Exception("IMPLEMENT ME")
             # Spread the cost to the individual units - adding a 20% contingency
             total_cost = recommendation_config["total_cost"] / n_units
             kw = np.floor(recommendation_config["array_wattage"] / 100) / 10
@@ -162,9 +166,12 @@ class SolarPvRecommendations:
 
         if non_invasive_recommendation.get("array_wattage") is not None:
 
-            roof_area = esimtate_pitched_roof_area(
-                floor_area=self.property.insulation_floor_area, floor_height=self.property.data["floor-height"]
-            )
+            if self.property.roof["is_flat"]:
+                roof_area = self.property.insulation_floor_area
+            else:
+                roof_area = esimtate_pitched_roof_area(
+                    floor_area=self.property.insulation_floor_area, floor_height=self.property.data["floor-height"]
+                )
             solar_configurations = pd.DataFrame(
                 [
                     {
@@ -175,6 +182,7 @@ class SolarPvRecommendations:
                 ]
             )
         else:
+            # TODO: There may be some instances where we don't want to use the solar API so we should cover for them
             panel_performance = self.property.solar_panel_configuration["panel_performance"]
             roof_area = self.property.roof_area
             solar_configurations = panel_performance.head(3).reset_index(drop=True)
@@ -182,6 +190,8 @@ class SolarPvRecommendations:
         # We combine each of these configurations with estimates with and without a battery
         for rank, recommendation_config in solar_configurations.iterrows():
             roof_coverage_percent = round(recommendation_config["panneled_roof_area"] / roof_area * 100)
+            # We round up to the nearest 10
+            roof_coverage_percent = np.ceil(roof_coverage_percent / 10) * 10
             for has_battery in [False, True]:
                 cost_result = self.costs.solar_pv(
                     wattage=recommendation_config["array_wattage"],
diff --git a/recommendations/tests/test_data/heating_recommendations_data.py b/recommendations/tests/test_data/heating_recommendations_data.py
index 8bc43efb..7f8c4682 100644
--- a/recommendations/tests/test_data/heating_recommendations_data.py
+++ b/recommendations/tests/test_data/heating_recommendations_data.py
@@ -1,19 +1,3 @@
-# import random
-# from pathlib import Path
-# import inspect
-# import pandas as pd
-#
-# # this can be used to get example data to build the test cases
-# src_file_path = inspect.getfile(lambda: None)
-# EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates"
-# epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
-# directory = random.sample(epc_directories, 1)[0]
-# data = pd.read_csv(directory / "certificates.csv", low_memory=False)
-# # Rename the columns to the same format as the api returns
-# data.columns = [c.replace("_", "-").lower() for c in data.columns]
-#
-# eg = data.sample(1).to_dict("records")[0]
-
 testing_examples = [
     {
         "epc": {
@@ -67,5 +51,124 @@ testing_examples = [
         "notes": "This property has a boiler, radiators & mains gas with good efficiency so the only recommendation"
                  "we expect here is for an air source heat pump. The heating controls are a programmer, room thermostat"
                  "and TRVs and so we should expect a TTZC recommendation"
+    },
+    {
+        "epc": {
+            'lmk-key': '153995620832008100717310934068296', 'address1': 'Apartment 13 The Quays',
+            'address2': 'Burscough', 'address3': None, 'postcode': 'L40 5TW',
+            'building-reference-number': 2604281568, 'current-energy-rating': 'C', 'potential-energy-rating': 'B',
+            'current-energy-efficiency': 69, 'potential-energy-efficiency': 84, 'property-type': 'Flat',
+            'built-form': 'Detached', 'inspection-date': '2008-10-06', 'local-authority': 'E07000127',
+            'constituency': 'E14001033', 'county': 'Lancashire', 'lodgement-date': '2008-10-07',
+            'transaction-type': 'marketed sale', 'environment-impact-current': 78,
+            'environment-impact-potential': 78, 'energy-consumption-current': 195,
+            'energy-consumption-potential': 192.0, 'co2-emissions-current': 1.7,
+            'co2-emiss-curr-per-floor-area': 29, 'co2-emissions-potential': 1.7, 'lighting-cost-current': 35,
+            'lighting-cost-potential': 38, 'heating-cost-current': 108, 'heating-cost-potential': 89,
+            'hot-water-cost-current': 256, 'hot-water-cost-potential': 104, 'total-floor-area': 57.2,
+            'energy-tariff': 'Single', 'mains-gas-flag': 'N', 'floor-level': '1st', 'flat-top-storey': 'Y',
+            'flat-storey-count': 2.0, 'main-heating-controls': 2603.0, 'multi-glaze-proportion': 100.0,
+            'glazed-type': 'double glazing installed during or after 2002', 'glazed-area': 'Normal',
+            'extension-count': 0.0, 'number-habitable-rooms': 3.0, 'number-heated-rooms': 3.0,
+            'low-energy-lighting': 77.0, 'number-open-fireplaces': 0.0,
+            'hotwater-description': 'Electric immersion, standard tariff', 'hot-water-energy-eff': 'Very Poor',
+            'hot-water-env-eff': 'Poor', 'floor-description': '(other premises below)', 'floor-energy-eff': None,
+            'floor-env-eff': None, 'windows-description': 'Fully double glazed', 'windows-energy-eff': 'Good',
+            'windows-env-eff': 'Good', 'walls-description': 'Cavity wall, as built, insulated (assumed)',
+            'walls-energy-eff': 'Good', 'walls-env-eff': 'Good',
+            'secondheat-description': 'Portable electric heaters', 'sheating-energy-eff': None,
+            'sheating-env-eff': None, 'roof-description': '(another dwelling above)', 'roof-energy-eff': None,
+            'roof-env-eff': None, 'mainheat-description': 'Room heaters, electric',
+            'mainheat-energy-eff': 'Very Poor', 'mainheat-env-eff': 'Poor',
+            'mainheatcont-description': 'Programmer and appliance thermostats', 'mainheatc-energy-eff': 'Good',
+            'mainheatc-env-eff': 'Good', 'lighting-description': 'Low energy lighting in 77% of fixed outlets',
+            'lighting-energy-eff': 'Very Good', 'lighting-env-eff': 'Very Good',
+            'main-fuel': 'electricity - this is for backwards compatibility only and should not be used',
+            'wind-turbine-count': 0.0, 'heat-loss-corridor': 'heated corridor', 'unheated-corridor-length': None,
+            'floor-height': 2.3, 'photo-supply': 0.0, 'solar-water-heating-flag': 'N',
+            'mechanical-ventilation': 'natural', 'address': 'Apartment 13 The Quays, Burscough',
+            'local-authority-label': 'West Lancashire', 'constituency-label': 'West Lancashire',
+            'posttown': 'ORMSKIRK', 'construction-age-band': 'England and Wales: 2003-2006',
+            'lodgement-datetime': '2008-10-07 17:31:09', 'tenure': 'owner-occupied',
+            'fixed-lighting-outlets-count': None, 'low-energy-fixed-light-count': None, 'uprn': 10012342725.0,
+            'uprn-source': 'Address Matched', 'used': None
+        },
+        "heating_recommendation_descriptions": [
+            "Install high heat retention electric storage heaters and upgrade heating controls to High Heat Retention "
+            "Storage Heater Controls"
+        ],
+        "heating_controls_recommendation_descriptions": [],
+        "notes": "This property has electric room heaters and is off gas so a boiler recommendation is not appropriate."
+                 "We would expect a high heat retention storage recommendation. The property is a flat and therefore"
+                 "we don't expect an air source heat pump recommendation. We also wouldn't expect a specific heating"
+                 "control recommendation here"
+    },
+    {
+        'lmk-key': '751851300152012022010205497220090', 'address1': '21, Fullers Close', 'address2': 'Kelvedon',
+        'address3': None, 'postcode': 'CO5 9JX', 'building-reference-number': 8075968, 'current-energy-rating': 'D',
+        'potential-energy-rating': 'D', 'current-energy-efficiency': 55, 'potential-energy-efficiency': 56,
+        'property-type_x': 'Bungalow', 'built-form_x': 'Detached', 'inspection-date': '2012-02-20',
+        'local-authority': 'E07000067', 'constituency': 'E14001045', 'county': 'Essex', 'lodgement-date': '2012-02-20',
+        'transaction-type': 'non marketed sale', 'environment-impact-current': 39, 'environment-impact-potential': 39,
+        'energy-consumption-current': 475, 'energy-consumption-potential': 472.0, 'co2-emissions-current': 5.4,
+        'co2-emiss-curr-per-floor-area': 84, 'co2-emissions-potential': 5.4, 'lighting-cost-current': 53.0,
+        'lighting-cost-potential': 40.0, 'heating-cost-current': 674.0, 'heating-cost-potential': 678.0,
+        'hot-water-cost-current': 110.0, 'hot-water-cost-potential': 110.0, 'total-floor-area': 64.45,
+        'energy-tariff': 'dual', 'mains-gas-flag': 'N', 'floor-level': 'NODATA!', 'flat-top-storey': None,
+        'flat-storey-count': None, 'main-heating-controls': '2402', 'multi-glaze-proportion': 100.0,
+        'glazed-type': 'double glazing installed before 2002', 'glazed-area': 'Normal', 'extension-count': 0.0,
+        'number-habitable-rooms': 3.0, 'number-heated-rooms': 3.0, 'low-energy-lighting': 67.0,
+        'number-open-fireplaces': 0.0, 'hotwater-description': 'Electric immersion, off-peak',
+        'hot-water-energy-eff': 'Average', 'hot-water-env-eff': 'Very Poor',
+        'floor-description': 'Suspended, no insulation (assumed)', 'floor-energy-eff': None, 'floor-env-eff': None,
+        'windows-description': 'Fully double glazed', 'windows-energy-eff': 'Average', 'windows-env-eff': 'Average',
+        'walls-description': 'Cavity wall, as built, insulated (assumed)', 'walls-energy-eff': 'Good',
+        'walls-env-eff': 'Good', 'secondheat-description': 'Room heaters, electric', 'sheating-energy-eff': None,
+        'sheating-env-eff': None, 'roof-description': 'Pitched, 300+ mm loft insulation',
+        'roof-energy-eff': 'Very Good',
+        'roof-env-eff': 'Very Good', 'mainheat-description': 'Electric storage heaters', 'mainheat-energy-eff': 'Poor',
+        'mainheat-env-eff': 'Very Poor', 'mainheatcont-description': 'Automatic charge control',
+        'mainheatc-energy-eff': 'Average', 'mainheatc-env-eff': 'Average',
+        'lighting-description': 'Low energy lighting in 67% of fixed outlets', 'lighting-energy-eff': 'Good',
+        'lighting-env-eff': 'Good', 'main-fuel': 'electricity (not community)', 'wind-turbine-count': 0.0,
+        'heat-loss-corridor': 'NO DATA!', 'unheated-corridor-length': None, 'floor-height': 2.38, 'photo-supply': 0.0,
+        'solar-water-heating-flag': None, 'mechanical-ventilation': 'natural', 'address': '21, Fullers Close, Kelvedon',
+        'local-authority-label': 'Braintree', 'constituency-label': 'Witham', 'posttown': 'COLCHESTER',
+        'construction-age-band': 'England and Wales: 1983-1990', 'lodgement-datetime': '2012-02-20 10:20:54',
+        'tenure': 'owner-occupied', 'fixed-lighting-outlets-count': 6.0, 'low-energy-fixed-light-count': 4.0,
+        'uprn': 100090311351.0, 'uprn-source': 'Address Matched', 'property-type_y': None, 'built-form_y': None,
+        'used': None
     }
+
 ]
+
+import random
+from pathlib import Path
+import inspect
+import pandas as pd
+
+# this can be used to get example data to build the test cases
+src_file_path = inspect.getfile(lambda: None)
+EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates"
+epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
+directory = random.sample(epc_directories, 1)[0]
+data = pd.read_csv(directory / "certificates.csv", low_memory=False)
+# Rename the columns to the same format as the api returns
+data.columns = [c.replace("_", "-").lower() for c in data.columns]
+
+used_examples = pd.DataFrame(
+    [
+        {
+            "mainheat-description": x["epc"]["mainheat-description"],
+            "mainheat-energy-eff": x["epc"]["mainheat-energy-eff"],
+            "property-type": x["epc"]["property-type"],
+            "built-form": x["epc"]["built-form"],
+            "used": True
+        } for x in testing_examples
+    ]
+)
+
+data = data.merge(used_examples, how="left", on=["mainheat-description", "mainheat-energy-eff"])
+data = data[pd.isnull(data["used"])]
+
+eg = data.sample(1).to_dict("records")[0]

From a5a0fa3574499fa7e3c2b040330ff3347a6c65af Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 31 Aug 2024 15:29:52 +0100
Subject: [PATCH 146/182] adding heating recommendation unit tests

---
 .../test_data/heating_recommendations_data.py | 127 +++++++++++++-----
 .../tests/test_heating_recommendations.py     |  10 ++
 2 files changed, 101 insertions(+), 36 deletions(-)

diff --git a/recommendations/tests/test_data/heating_recommendations_data.py b/recommendations/tests/test_data/heating_recommendations_data.py
index 7f8c4682..b5e7c42f 100644
--- a/recommendations/tests/test_data/heating_recommendations_data.py
+++ b/recommendations/tests/test_data/heating_recommendations_data.py
@@ -104,42 +104,95 @@ testing_examples = [
                  "control recommendation here"
     },
     {
-        'lmk-key': '751851300152012022010205497220090', 'address1': '21, Fullers Close', 'address2': 'Kelvedon',
-        'address3': None, 'postcode': 'CO5 9JX', 'building-reference-number': 8075968, 'current-energy-rating': 'D',
-        'potential-energy-rating': 'D', 'current-energy-efficiency': 55, 'potential-energy-efficiency': 56,
-        'property-type_x': 'Bungalow', 'built-form_x': 'Detached', 'inspection-date': '2012-02-20',
-        'local-authority': 'E07000067', 'constituency': 'E14001045', 'county': 'Essex', 'lodgement-date': '2012-02-20',
-        'transaction-type': 'non marketed sale', 'environment-impact-current': 39, 'environment-impact-potential': 39,
-        'energy-consumption-current': 475, 'energy-consumption-potential': 472.0, 'co2-emissions-current': 5.4,
-        'co2-emiss-curr-per-floor-area': 84, 'co2-emissions-potential': 5.4, 'lighting-cost-current': 53.0,
-        'lighting-cost-potential': 40.0, 'heating-cost-current': 674.0, 'heating-cost-potential': 678.0,
-        'hot-water-cost-current': 110.0, 'hot-water-cost-potential': 110.0, 'total-floor-area': 64.45,
-        'energy-tariff': 'dual', 'mains-gas-flag': 'N', 'floor-level': 'NODATA!', 'flat-top-storey': None,
-        'flat-storey-count': None, 'main-heating-controls': '2402', 'multi-glaze-proportion': 100.0,
-        'glazed-type': 'double glazing installed before 2002', 'glazed-area': 'Normal', 'extension-count': 0.0,
-        'number-habitable-rooms': 3.0, 'number-heated-rooms': 3.0, 'low-energy-lighting': 67.0,
-        'number-open-fireplaces': 0.0, 'hotwater-description': 'Electric immersion, off-peak',
-        'hot-water-energy-eff': 'Average', 'hot-water-env-eff': 'Very Poor',
-        'floor-description': 'Suspended, no insulation (assumed)', 'floor-energy-eff': None, 'floor-env-eff': None,
-        'windows-description': 'Fully double glazed', 'windows-energy-eff': 'Average', 'windows-env-eff': 'Average',
-        'walls-description': 'Cavity wall, as built, insulated (assumed)', 'walls-energy-eff': 'Good',
-        'walls-env-eff': 'Good', 'secondheat-description': 'Room heaters, electric', 'sheating-energy-eff': None,
-        'sheating-env-eff': None, 'roof-description': 'Pitched, 300+ mm loft insulation',
-        'roof-energy-eff': 'Very Good',
-        'roof-env-eff': 'Very Good', 'mainheat-description': 'Electric storage heaters', 'mainheat-energy-eff': 'Poor',
-        'mainheat-env-eff': 'Very Poor', 'mainheatcont-description': 'Automatic charge control',
-        'mainheatc-energy-eff': 'Average', 'mainheatc-env-eff': 'Average',
-        'lighting-description': 'Low energy lighting in 67% of fixed outlets', 'lighting-energy-eff': 'Good',
-        'lighting-env-eff': 'Good', 'main-fuel': 'electricity (not community)', 'wind-turbine-count': 0.0,
-        'heat-loss-corridor': 'NO DATA!', 'unheated-corridor-length': None, 'floor-height': 2.38, 'photo-supply': 0.0,
-        'solar-water-heating-flag': None, 'mechanical-ventilation': 'natural', 'address': '21, Fullers Close, Kelvedon',
-        'local-authority-label': 'Braintree', 'constituency-label': 'Witham', 'posttown': 'COLCHESTER',
-        'construction-age-band': 'England and Wales: 1983-1990', 'lodgement-datetime': '2012-02-20 10:20:54',
-        'tenure': 'owner-occupied', 'fixed-lighting-outlets-count': 6.0, 'low-energy-fixed-light-count': 4.0,
-        'uprn': 100090311351.0, 'uprn-source': 'Address Matched', 'property-type_y': None, 'built-form_y': None,
-        'used': None
+        "epc": {
+            'lmk-key': '751851300152012022010205497220090', 'address1': '21, Fullers Close', 'address2': 'Kelvedon',
+            'address3': None, 'postcode': 'CO5 9JX', 'building-reference-number': 8075968, 'current-energy-rating': 'D',
+            'potential-energy-rating': 'D', 'current-energy-efficiency': 55, 'potential-energy-efficiency': 56,
+            'property-type': 'Bungalow', 'built-form': 'Detached', 'inspection-date': '2012-02-20',
+            'local-authority': 'E07000067', 'constituency': 'E14001045', 'county': 'Essex',
+            'lodgement-date': '2012-02-20',
+            'transaction-type': 'non marketed sale', 'environment-impact-current': 39,
+            'environment-impact-potential': 39,
+            'energy-consumption-current': 475, 'energy-consumption-potential': 472.0, 'co2-emissions-current': 5.4,
+            'co2-emiss-curr-per-floor-area': 84, 'co2-emissions-potential': 5.4, 'lighting-cost-current': 53.0,
+            'lighting-cost-potential': 40.0, 'heating-cost-current': 674.0, 'heating-cost-potential': 678.0,
+            'hot-water-cost-current': 110.0, 'hot-water-cost-potential': 110.0, 'total-floor-area': 64.45,
+            'energy-tariff': 'dual', 'mains-gas-flag': 'N', 'floor-level': 'NODATA!', 'flat-top-storey': None,
+            'flat-storey-count': None, 'main-heating-controls': '2402', 'multi-glaze-proportion': 100.0,
+            'glazed-type': 'double glazing installed before 2002', 'glazed-area': 'Normal', 'extension-count': 0.0,
+            'number-habitable-rooms': 3.0, 'number-heated-rooms': 3.0, 'low-energy-lighting': 67.0,
+            'number-open-fireplaces': 0.0, 'hotwater-description': 'Electric immersion, off-peak',
+            'hot-water-energy-eff': 'Average', 'hot-water-env-eff': 'Very Poor',
+            'floor-description': 'Suspended, no insulation (assumed)', 'floor-energy-eff': None, 'floor-env-eff': None,
+            'windows-description': 'Fully double glazed', 'windows-energy-eff': 'Average', 'windows-env-eff': 'Average',
+            'walls-description': 'Cavity wall, as built, insulated (assumed)', 'walls-energy-eff': 'Good',
+            'walls-env-eff': 'Good', 'secondheat-description': 'Room heaters, electric', 'sheating-energy-eff': None,
+            'sheating-env-eff': None, 'roof-description': 'Pitched, 300+ mm loft insulation',
+            'roof-energy-eff': 'Very Good',
+            'roof-env-eff': 'Very Good', 'mainheat-description': 'Electric storage heaters',
+            'mainheat-energy-eff': 'Poor',
+            'mainheat-env-eff': 'Very Poor', 'mainheatcont-description': 'Automatic charge control',
+            'mainheatc-energy-eff': 'Average', 'mainheatc-env-eff': 'Average',
+            'lighting-description': 'Low energy lighting in 67% of fixed outlets', 'lighting-energy-eff': 'Good',
+            'lighting-env-eff': 'Good', 'main-fuel': 'electricity (not community)', 'wind-turbine-count': 0.0,
+            'heat-loss-corridor': 'NO DATA!', 'unheated-corridor-length': None, 'floor-height': 2.38,
+            'photo-supply': 0.0,
+            'solar-water-heating-flag': None, 'mechanical-ventilation': 'natural',
+            'address': '21, Fullers Close, Kelvedon',
+            'local-authority-label': 'Braintree', 'constituency-label': 'Witham', 'posttown': 'COLCHESTER',
+            'construction-age-band': 'England and Wales: 1983-1990', 'lodgement-datetime': '2012-02-20 10:20:54',
+            'tenure': 'owner-occupied', 'fixed-lighting-outlets-count': 6.0, 'low-energy-fixed-light-count': 4.0,
+            'uprn': 100090311351.0, 'uprn-source': 'Address Matched', 'property-type_y': None, 'built-form_y': None,
+            'used': None
+        },
+        "heating_recommendation_descriptions": [],
+        "heating_controls_recommendation_descriptions": [],
+        "notes": "This test has electric storage heaters with automatic charge control - this case should be researched"
+                 "and checked that a high heat retention storage recommendation is actually sensible. If it's not, "
+                 "we should adjust accordingly or perhaps have just a control recommendation"
+    },
+    {
+        "epc": {
+            'lmk-key': '1356416458532015082116515621278108', 'address1': '19a, St. Stephens Road', 'address2': None,
+            'address3': None, 'postcode': 'TW3 2BH', 'building-reference-number': 5821158378,
+            'current-energy-rating': 'E', 'potential-energy-rating': 'C', 'current-energy-efficiency': 54,
+            'potential-energy-efficiency': 76, 'property-type': 'Maisonette', 'built-form': 'Semi-Detached',
+            'inspection-date': '2015-08-21', 'local-authority': 'E09000018', 'constituency': 'E14000593',
+            'county': 'Greater London Authority', 'lodgement-date': '2015-08-21', 'transaction-type': 'marketed sale',
+            'environment-impact-current': 48, 'environment-impact-potential': 78, 'energy-consumption-current': 383,
+            'energy-consumption-potential': 155, 'co2-emissions-current': 3.4, 'co2-emiss-curr-per-floor-area': 68,
+            'co2-emissions-potential': 1.4, 'lighting-cost-current': 52, 'lighting-cost-potential': 34,
+            'heating-cost-current': 560, 'heating-cost-potential': 255, 'hot-water-cost-current': 166,
+            'hot-water-cost-potential': 102, 'total-floor-area': 51.0, 'energy-tariff': 'Single', 'mains-gas-flag': 'Y',
+            'floor-level': '1st', 'flat-top-storey': 'Y', 'flat-storey-count': None, 'main-heating-controls': '2104',
+            'multi-glaze-proportion': 100.0, 'glazed-type': 'double glazing, unknown install date',
+            'glazed-area': 'Normal', 'extension-count': 0.0, 'number-habitable-rooms': 3.0, 'number-heated-rooms': 3.0,
+            'low-energy-lighting': 50.0, 'number-open-fireplaces': 0.0, 'hotwater-description': 'From main system',
+            'hot-water-energy-eff': 'Average', 'hot-water-env-eff': 'Average',
+            'floor-description': '(another dwelling below)', 'floor-energy-eff': 'NO DATA!', 'floor-env-eff': None,
+            'windows-description': 'Fully double glazed', 'windows-energy-eff': 'Average', 'windows-env-eff': 'Average',
+            'walls-description': 'Solid brick, as built, no insulation (assumed)', 'walls-energy-eff': 'Very Poor',
+            'walls-env-eff': 'Very Poor', 'secondheat-description': 'Room heaters, mains gas',
+            'sheating-energy-eff': None, 'sheating-env-eff': None,
+            'roof-description': 'Pitched, 100 mm loft insulation',
+            'roof-energy-eff': 'Average', 'roof-env-eff': 'Average',
+            'mainheat-description': 'Boiler and radiators, mains gas', 'mainheat-energy-eff': 'Good',
+            'mainheat-env-eff': 'Good', 'mainheatcont-description': 'Programmer and room thermostat',
+            'mainheatc-energy-eff': 'Average', 'mainheatc-env-eff': 'Average',
+            'lighting-description': 'Low energy lighting in 50% of fixed outlets', 'lighting-energy-eff': 'Good',
+            'lighting-env-eff': 'Good', 'main-fuel': 'mains gas (not community)', 'wind-turbine-count': 0.0,
+            'heat-loss-corridor': 'no corridor', 'unheated-corridor-length': None, 'floor-height': None,
+            'photo-supply': None, 'solar-water-heating-flag': 'N', 'mechanical-ventilation': 'natural',
+            'address': '19a, St. Stephens Road', 'local-authority-label': 'Hounslow',
+            'constituency-label': 'Brentford and Isleworth', 'posttown': 'HOUNSLOW',
+            'construction-age-band': 'England and Wales: 1930-1949', 'lodgement-datetime': '2015-08-21 16:51:56',
+            'tenure': 'owner-occupied', 'fixed-lighting-outlets-count': None, 'low-energy-fixed-light-count': None,
+            'uprn': 100021560521.0, 'uprn-source': 'Address Matched', 'used': None
+        },
+        "heating_recommendation_descriptions": [],
+        "heating_controls_recommendation_descriptions": [],
+        "notes": ""
     }
-
 ]
 
 import random
@@ -168,7 +221,9 @@ used_examples = pd.DataFrame(
     ]
 )
 
-data = data.merge(used_examples, how="left", on=["mainheat-description", "mainheat-energy-eff"])
+data = data.merge(
+    used_examples, how="left", on=["mainheat-description", "mainheat-energy-eff", "built-form", "property-type"]
+)
 data = data[pd.isnull(data["used"])]
 
 eg = data.sample(1).to_dict("records")[0]
diff --git a/recommendations/tests/test_heating_recommendations.py b/recommendations/tests/test_heating_recommendations.py
index 35373729..968583e4 100644
--- a/recommendations/tests/test_heating_recommendations.py
+++ b/recommendations/tests/test_heating_recommendations.py
@@ -54,6 +54,16 @@ class TestHeatingRecommendations:
         :return:
         """
 
+        if test_case["epc"]["uprn"] == 100090311351:
+            raise Exception(
+                "This test has electric storage heaters with automatic charge control - this case should be researched"
+                "and checked that a high heat retention storage recommendation is actually sensible. If it's not, "
+                "we should adjust accordingly or perhaps have just a control recommendation"
+            )
+
+        if test_case["epc"]["uprn"] == 100021560521:
+            raise Exception("Finish this test - could do so while on the train")
+
         epc_records = {"original_epc": test_case["epc"].copy(), "full_sap_epc": {}, "old_data": []}
 
         epc_record = EPCRecord(

From c5d7867ff4edda01c9c4d86793180c6806b7280f Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Sat, 31 Aug 2024 20:59:13 +0100
Subject: [PATCH 147/182] added more heating tests

---
 .../test_data/heating_recommendations_data.py | 110 +++++++++++++++++-
 1 file changed, 105 insertions(+), 5 deletions(-)

diff --git a/recommendations/tests/test_data/heating_recommendations_data.py b/recommendations/tests/test_data/heating_recommendations_data.py
index b5e7c42f..cbc8ca65 100644
--- a/recommendations/tests/test_data/heating_recommendations_data.py
+++ b/recommendations/tests/test_data/heating_recommendations_data.py
@@ -91,7 +91,7 @@ testing_examples = [
             'posttown': 'ORMSKIRK', 'construction-age-band': 'England and Wales: 2003-2006',
             'lodgement-datetime': '2008-10-07 17:31:09', 'tenure': 'owner-occupied',
             'fixed-lighting-outlets-count': None, 'low-energy-fixed-light-count': None, 'uprn': 10012342725.0,
-            'uprn-source': 'Address Matched', 'used': None
+            'uprn-source': 'Address Matched',
         },
         "heating_recommendation_descriptions": [
             "Install high heat retention electric storage heaters and upgrade heating controls to High Heat Retention "
@@ -143,7 +143,6 @@ testing_examples = [
             'construction-age-band': 'England and Wales: 1983-1990', 'lodgement-datetime': '2012-02-20 10:20:54',
             'tenure': 'owner-occupied', 'fixed-lighting-outlets-count': 6.0, 'low-energy-fixed-light-count': 4.0,
             'uprn': 100090311351.0, 'uprn-source': 'Address Matched', 'property-type_y': None, 'built-form_y': None,
-            'used': None
         },
         "heating_recommendation_descriptions": [],
         "heating_controls_recommendation_descriptions": [],
@@ -181,13 +180,112 @@ testing_examples = [
             'mainheatc-energy-eff': 'Average', 'mainheatc-env-eff': 'Average',
             'lighting-description': 'Low energy lighting in 50% of fixed outlets', 'lighting-energy-eff': 'Good',
             'lighting-env-eff': 'Good', 'main-fuel': 'mains gas (not community)', 'wind-turbine-count': 0.0,
-            'heat-loss-corridor': 'no corridor', 'unheated-corridor-length': None, 'floor-height': None,
+            'heat-loss-corridor': 'no corridor', 'unheated-corridor-length': None, 'floor-height': 2.5,
             'photo-supply': None, 'solar-water-heating-flag': 'N', 'mechanical-ventilation': 'natural',
             'address': '19a, St. Stephens Road', 'local-authority-label': 'Hounslow',
             'constituency-label': 'Brentford and Isleworth', 'posttown': 'HOUNSLOW',
             'construction-age-band': 'England and Wales: 1930-1949', 'lodgement-datetime': '2015-08-21 16:51:56',
             'tenure': 'owner-occupied', 'fixed-lighting-outlets-count': None, 'low-energy-fixed-light-count': None,
-            'uprn': 100021560521.0, 'uprn-source': 'Address Matched', 'used': None
+            'uprn': 100021560521.0, 'uprn-source': 'Address Matched',
+        },
+        "heating_recommendation_descriptions": [],
+        "heating_controls_recommendation_descriptions": [],
+        "notes": ""
+    },
+    {
+        "epc": {
+            'lmk-key': '1164410099442014062611405027442168', 'address1': '31, Brightside Road', 'address2': None,
+            'address3': None, 'postcode': 'SE13 6EP', 'building-reference-number': 5481394278,
+            'current-energy-rating': 'E', 'potential-energy-rating': 'C', 'current-energy-efficiency': 48,
+            'potential-energy-efficiency': 79, 'property-type': 'House', 'built-form': 'Mid-Terrace',
+            'inspection-date': '2014-06-26', 'local-authority': 'E09000023', 'constituency': 'E14000789',
+            'county': 'Greater London Authority', 'lodgement-date': '2014-06-26',
+            'transaction-type': 'assessment for green deal', 'environment-impact-current': 44,
+            'environment-impact-potential': 77, 'energy-consumption-current': 334,
+            'energy-consumption-potential': 121.0, 'co2-emissions-current': 5.1, 'co2-emiss-curr-per-floor-area': 64,
+            'co2-emissions-potential': 1.9, 'lighting-cost-current': 70.0, 'lighting-cost-potential': 49.0,
+            'heating-cost-current': 964.0, 'heating-cost-potential': 571.0, 'hot-water-cost-current': 107.0,
+            'hot-water-cost-potential': 72.0, 'total-floor-area': 80.0, 'energy-tariff': 'Single',
+            'mains-gas-flag': 'Y', 'floor-level': 'NODATA!', 'flat-top-storey': None, 'flat-storey-count': None,
+            'main-heating-controls': '2102', 'multi-glaze-proportion': 100.0,
+            'glazed-type': 'double glazing installed before 2002', 'glazed-area': 'Normal', 'extension-count': 1.0,
+            'number-habitable-rooms': 3.0, 'number-heated-rooms': 3.0, 'low-energy-lighting': 56.0,
+            'number-open-fireplaces': 0.0, 'hotwater-description': 'From main system', 'hot-water-energy-eff': 'Good',
+            'hot-water-env-eff': 'Good', 'floor-description': 'Suspended, no insulation (assumed)',
+            'floor-energy-eff': None, 'floor-env-eff': None, 'windows-description': 'Fully double glazed',
+            'windows-energy-eff': 'Average', 'windows-env-eff': 'Average',
+            'walls-description': 'Solid brick, as built, no insulation (assumed)', 'walls-energy-eff': 'Very Poor',
+            'walls-env-eff': 'Very Poor', 'secondheat-description': 'Room heaters, mains gas',
+            'sheating-energy-eff': None, 'sheating-env-eff': None,
+            'roof-description': 'Pitched, no insulation (assumed)',
+            'roof-energy-eff': 'Very Poor', 'roof-env-eff': 'Very Poor',
+            'mainheat-description': 'Boiler and radiators, mains gas', 'mainheat-energy-eff': 'Good',
+            'mainheat-env-eff': 'Good', 'mainheatcont-description': 'Programmer, no room thermostat',
+            'mainheatc-energy-eff': 'Very Poor', 'mainheatc-env-eff': 'Very Poor',
+            'lighting-description': 'Low energy lighting in 56% of fixed outlets', 'lighting-energy-eff': 'Good',
+            'lighting-env-eff': 'Good', 'main-fuel': 'mains gas (not community)', 'wind-turbine-count': 0.0,
+            'heat-loss-corridor': 'NO DATA!', 'unheated-corridor-length': None, 'floor-height': 2.5,
+            'photo-supply': 0.0,
+            'solar-water-heating-flag': None, 'mechanical-ventilation': 'natural', 'address': '31, Brightside Road',
+            'local-authority-label': 'Lewisham', 'constituency-label': 'Lewisham, Deptford', 'posttown': 'LONDON',
+            'construction-age-band': 'England and Wales: before 1900', 'lodgement-datetime': '2014-06-26 11:40:50',
+            'tenure': 'owner-occupied', 'fixed-lighting-outlets-count': 9.0, 'low-energy-fixed-light-count': 5.0,
+            'uprn': 100021936225.0, 'uprn-source': 'Address Matched',
+        },
+        "heating_recommendation_descriptions": [
+            'Install an air source heat pump, and upgrade heating controls to Smart Thermostats, room sensors and '
+            'smart radiator valves (time & temperature zone control). The cost includes the £7500 boiler upgrade '
+            'scheme grant',
+        ],
+        "heating_controls_recommendation_descriptions": [
+            'upgrade heating controls to Room thermostat, programmer and TRVs',
+            'Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves (time & '
+            'temperature zone control)'
+        ],
+        "notes": "Because this property already has a boiler, we don't recommend HHR. We only have a "
+                 "heating recommendation for an air source heat pump. Because the heating controls are "
+                 "Programmer, no room thermostat, we have a programmer, room thermostat and trvs recommendation"
+                 "for heating controls and for TTZC."
+    },
+    {
+        "epc": {
+            'lmk-key': '1139584119102014052116014126342698', 'address1': '13, Starbuck Street', 'address2': 'Rudry',
+            'address3': None, 'postcode': 'CF83 3DP', 'building-reference-number': 2187913278,
+            'current-energy-rating': 'E', 'potential-energy-rating': 'D', 'current-energy-efficiency': 44,
+            'potential-energy-efficiency': 61, 'property-type': 'Flat', 'built-form': 'Semi-Detached',
+            'inspection-date': '2014-05-21', 'local-authority': 'W06000018', 'constituency': 'W07000076',
+            'county': None,
+            'lodgement-date': '2014-05-21', 'transaction-type': 'rental (private)', 'environment-impact-current': 49,
+            'environment-impact-potential': 64, 'energy-consumption-current': 343,
+            'energy-consumption-potential': 240.0, 'co2-emissions-current': 4.0, 'co2-emiss-curr-per-floor-area': 61,
+            'co2-emissions-potential': 2.8, 'lighting-cost-current': 49.0, 'lighting-cost-potential': 49.0,
+            'heating-cost-current': 752.0, 'heating-cost-potential': 429.0, 'hot-water-cost-current': 281.0,
+            'hot-water-cost-potential': 281.0, 'total-floor-area': 66.0, 'energy-tariff': 'Single',
+            'mains-gas-flag': 'N', 'floor-level': '1st', 'flat-top-storey': 'Y', 'flat-storey-count': None,
+            'main-heating-controls': 2602.0, 'multi-glaze-proportion': 100.0,
+            'glazed-type': 'double glazing installed during or after 2002', 'glazed-area': 'Normal',
+            'extension-count': 0.0, 'number-habitable-rooms': 4.0, 'number-heated-rooms': 4.0,
+            'low-energy-lighting': 86.0, 'number-open-fireplaces': 0.0,
+            'hotwater-description': 'Electric immersion, standard tariff', 'hot-water-energy-eff': 'Very Poor',
+            'hot-water-env-eff': 'Very Poor', 'floor-description': '(other premises below)', 'floor-energy-eff': None,
+            'floor-env-eff': None, 'windows-description': 'Fully double glazed', 'windows-energy-eff': 'Good',
+            'windows-env-eff': 'Good', 'walls-description': 'Cavity wall, as built, no insulation (assumed)',
+            'walls-energy-eff': 'Poor', 'walls-env-eff': 'Poor', 'secondheat-description': 'None',
+            'sheating-energy-eff': None, 'sheating-env-eff': None,
+            'roof-description': 'Pitched, 200 mm loft insulation',
+            'roof-energy-eff': 'Good', 'roof-env-eff': 'Good', 'mainheat-description': 'Room heaters, electric',
+            'mainheat-energy-eff': 'Very Poor', 'mainheat-env-eff': 'Very Poor',
+            'mainheatcont-description': 'Appliance thermostats', 'mainheatc-energy-eff': 'Good',
+            'mainheatc-env-eff': 'Good', 'lighting-description': 'Low energy lighting in 86% of fixed outlets',
+            'lighting-energy-eff': 'Very Good', 'lighting-env-eff': 'Very Good',
+            'main-fuel': 'electricity (not community)', 'wind-turbine-count': 0.0, 'heat-loss-corridor': 'no corridor',
+            'unheated-corridor-length': None, 'floor-height': None, 'photo-supply': 0.0,
+            'solar-water-heating-flag': None,
+            'mechanical-ventilation': 'natural', 'address': '13, Starbuck Street, Rudry',
+            'local-authority-label': 'Caerphilly', 'constituency-label': 'Caerphilly', 'posttown': 'CAERPHILLY',
+            'construction-age-band': 'England and Wales: 1950-1966', 'lodgement-datetime': '2014-05-21 16:01:41',
+            'tenure': 'rental (private)', 'fixed-lighting-outlets-count': 7.0, 'low-energy-fixed-light-count': 6.0,
+            'uprn': 43088770.0, 'uprn-source': 'Address Matched',
         },
         "heating_recommendation_descriptions": [],
         "heating_controls_recommendation_descriptions": [],
@@ -224,6 +322,8 @@ used_examples = pd.DataFrame(
 data = data.merge(
     used_examples, how="left", on=["mainheat-description", "mainheat-energy-eff", "built-form", "property-type"]
 )
-data = data[pd.isnull(data["used"])]
+data = data[pd.isnull(data["used"])].drop(columns=["used"])
 
 eg = data.sample(1).to_dict("records")[0]
+print(eg["mainheat-description"])
+print(eg["mainheat-energy-eff"])

From 9e088ffe51416d2afdb3cd9c7ddef33b986c8e20 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 3 Sep 2024 12:56:25 +0100
Subject: [PATCH 148/182] estimating eco/gbis eligibility for birmingham

---
 etl/customers/bcc_tender/app.py               | 159 ++++++++++++++++++
 recommendations/HeatingRecommender.py         |  94 +++++++----
 .../test_data/heating_recommendations_data.py |  68 +++++++-
 3 files changed, 282 insertions(+), 39 deletions(-)
 create mode 100644 etl/customers/bcc_tender/app.py

diff --git a/etl/customers/bcc_tender/app.py b/etl/customers/bcc_tender/app.py
new file mode 100644
index 00000000..c949eecf
--- /dev/null
+++ b/etl/customers/bcc_tender/app.py
@@ -0,0 +1,159 @@
+"""
+This script prepares some data for the Birmingham City Council tender
+"""
+import pandas as pd
+import numpy as np
+
+epc_data = pd.read_csv("local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv")
+
+# Broad assumptions
+# Around 67% of homes in the Uk have an EPC, to be conservative with our estimates, we round up to 70%:
+# https://www.ons.gov.uk/peoplepopulationandcommunity/housing/articles/energyefficiencyofhousinginenglandandwales/2023
+# However, we have 322128 homes in Birmingham with an EPC, which is 76% of the total number of homes in Birmingham
+# based on the 2021 census, which put this figure at 423,500 homes
+PROPORTION_OF_HOMES_WITH_AN_EPC = 0.761
+N_HOUSEHOLDS_IN_BIRMINGHAM = 423_500
+N_HOMES_WITHOUT_AN_EPC = 423_500 - 322128
+
+# 55% of households are recipients of benefits in the West Midlands
+# (2021/2022 - https://www.statista.com/statistics/382858/uk-state-benefits-by-region/)
+PROPORTION_OF_HOMES_ON_BENEFITS = 0.55
+
+# https://www.justgroupplc.co.uk/~/media/Files/J/Just-Retirement-Corp/news-doc/2023/six-in-10-homeowners-eligible-for
+# -benefits-failing-to-claim-just-group-annual-insight-report.pdf
+PROPORTION_OF_HOMEOWNERS_CLAIMING_FOR_BENEFITS = 0.106
+
+# Breakdown of properties in council tax bands in the UK, to give us an estimate of the number of properties in A-D
+band_a_proportion = 0.239
+band_b_proportion = 0.195
+band_c_proportion = 0.219
+band_d_proportion = 0.156
+COUNCIL_TAX_BAND_A_TO_D_PROPORTION = band_a_proportion + band_b_proportion + band_c_proportion + band_d_proportion
+
+# Get the newest record, based on lodgment datetime, by uprn
+epc_data["LODGEMENT_DATETIME"] = pd.to_datetime(epc_data["LODGEMENT_DATETIME"], errors="coerce")
+epc_data = epc_data.sort_values(["LODGEMENT_DATETIME"], ascending=False).drop_duplicates("UPRN")
+
+# We want to figure out the number of properties that are eligible for ECO/GBIS funding
+
+social_tenures = ["Rented (social)", "rental (social)"]
+owner_occupied_tenures = ["Owner-occupied", "owner-occupied"]
+prs_tenures = ["Rented (private)", "rental (private)"]
+
+# If social tenure, then as long as the property is EPC D-G, it's eligible
+epc_data["eligibility_type"] = None
+
+# Eligibiltiy 1: ECO4 help to heat group OO - tenure is owner occupied and EPC rating D-G
+epc_data["eligibility_type"] = np.where(
+    (
+        epc_data["TENURE"].isin(owner_occupied_tenures) &
+        epc_data["CURRENT_ENERGY_RATING"].isin(["D", "E", "F", "G"]) &
+        pd.isnull(epc_data["eligibility_type"])
+    ),
+    "eco4_oo_hthg_needs_scaling_on_benefits",
+    epc_data["eligibility_type"]
+)
+
+# Eligibility 2: ECO4 help to heat group PRS - tenure is private rental and EPC rating E-G
+epc_data["eligibility_type"] = np.where(
+    (
+        epc_data["TENURE"].isin(prs_tenures) &
+        epc_data["CURRENT_ENERGY_RATING"].isin(["E", "F", "G"]) &
+        pd.isnull(epc_data["eligibility_type"])
+    ),
+    "eco4_prs_hthg_needs_scaling_on_benefits",
+    epc_data["eligibility_type"]
+)
+
+# Eligibiltiy 3: ECO4 Social housing - tenure is social rented and EPC rating D-G
+epc_data["eligibility_type"] = np.where(
+    (
+        epc_data["TENURE"].isin(social_tenures) &
+        epc_data["CURRENT_ENERGY_RATING"].isin(["D", "E", "F", "G"]) &
+        pd.isnull(epc_data["eligibility_type"])
+    ),
+    "eco4_social_housing",
+    epc_data["eligibility_type"]
+)
+
+# Eligibility 4: GBIS General Eligibility, OO - tenure is owner occupied and EPC rating D-G
+# This is a subset of Eligiblity 1. We scale eco4_oo_hthg_needs_scaling based on thhe % of properties on benefits
+# For any properties left over that are deemed as not eligibile, a % of these will be eligible for GBIS via Eligibility
+# 4, and therefore any properties that fall out of Eligibility 1, a % will fall into eligibility 4 based a % of units
+# being in council tax bands A-D
+
+# Eligibility 5: GBIS General Eligibility, PRS - tenure is private rental and EPC rating D-G
+# Additionally, some units that fall our of Eligibility 2 will be eligible for GBIS via Eligibility 5, via the same
+# mechanism as Eligibility 4. We handle this later
+epc_data["eligibility_type"] = np.where(
+    (
+        epc_data["TENURE"].isin(prs_tenures) &
+        epc_data["CURRENT_ENERGY_RATING"].isin(["D", "E", "F", "G"]) &
+        pd.isnull(epc_data["eligibility_type"])
+    ),
+    "gbis_prs_ge_needs_scaling_on_council_tax_band",
+    epc_data["eligibility_type"]
+)
+
+# Eligibiilty 6: GBIS General Eligibility, Social - tenure is social rented and EPC rating D-G, but also the property
+# should be rented out below market rate
+# This is a subset of Eligibility 3 - we likely don't need to do any scaling
+
+n_eco4_oo_hthg_needs_scaling_on_benefits = epc_data[
+    epc_data["eligibility_type"] == "eco4_oo_hthg_needs_scaling_on_benefits"
+    ].shape[0]
+
+n_eco4_prs_hthg_needs_scaling_on_benefits = epc_data[
+    epc_data["eligibility_type"] == "eco4_prs_hthg_needs_scaling_on_benefits"
+    ].shape[0]
+
+n_eco4_social = epc_data[
+    epc_data["eligibility_type"] == "eco4_social_housing"
+    ].shape[0]
+
+n_gbis_prs_ge_needs_scaling_on_council_tax_band = epc_data[
+    epc_data["eligibility_type"] == "gbis_prs_ge_needs_scaling_on_council_tax_band"
+    ].shape[0]
+
+n_eligibility_1 = np.floor(n_eco4_oo_hthg_needs_scaling_on_benefits * PROPORTION_OF_HOMEOWNERS_CLAIMING_FOR_BENEFITS)
+
+n_eligibility_2 = np.floor(n_eco4_prs_hthg_needs_scaling_on_benefits * PROPORTION_OF_HOMES_ON_BENEFITS)
+
+n_eligiblity_3 = n_eco4_social
+
+# We subtract the number of homes in eligiblity 1, from the number of homes under ECO4 OO, HTHG, before scaling on
+# benefits. This gives us the number of homes that were not on benefits. We then scale this number based on the % of
+# homes in council tax bands A-D
+n_eligiblity_4 = np.floor(
+    (n_eco4_oo_hthg_needs_scaling_on_benefits - n_eligibility_1) * COUNCIL_TAX_BAND_A_TO_D_PROPORTION
+)
+
+# We also need to add on homes that fall out of eligibility 2
+n_eligibiltiy_5 = np.floor(
+    np.floor(n_gbis_prs_ge_needs_scaling_on_council_tax_band * COUNCIL_TAX_BAND_A_TO_D_PROPORTION) +
+    np.floor((n_eco4_prs_hthg_needs_scaling_on_benefits - n_eligibility_2) * COUNCIL_TAX_BAND_A_TO_D_PROPORTION)
+)
+
+total_eligible = n_eligibility_1 + n_eligibility_2 + n_eligiblity_3 + n_eligiblity_4 + n_eligibiltiy_5
+
+# We don't scale up the # of homes based on % of homes with an EPC, because
+n_owner_occupied = epc_data[epc_data["TENURE"].isin(owner_occupied_tenures)].shape[0]
+oo_eligibility = (n_eligibility_1 + n_eligiblity_4)
+
+# 68% of owner occupied are eligibiltiy
+proportion_of_oo_eligible = oo_eligibility / n_owner_occupied
+# We then use this % on the rest of the homes in Birmingham that do not have an EPC
+oo_eligible_without_an_epc = np.floor(N_HOMES_WITHOUT_AN_EPC * proportion_of_oo_eligible)
+oo_eligibility = oo_eligibility + oo_eligible_without_an_epc
+
+# All private rentals require an EPC
+prs_eligibility = (n_eligibility_2 + n_eligibiltiy_5)
+# Most social housing properties will have an EPC so we don't scale this up
+social_eligibility = n_eligiblity_3
+
+# We scale this up since this number is based on the number of homes in Birmingham with an EPC, and we want to
+# estimate the total number of homes in Birmingham
+total_eligible = oo_eligibility + prs_eligibility + social_eligibility
+
+proportion_of_homes_eligibile = total_eligible / N_HOUSEHOLDS_IN_BIRMINGHAM
+# Approx 58% of homes in Birmingham are eligible for ECO/GBIS funding
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index edac68b5..78dce329 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -28,7 +28,7 @@ class HeatingRecommender:
             self.property.main_heating["clean_description"] in self.ELECTRIC_HEATING_DESCRIPTIONS
         )
 
-    def is_high_heat_retention_valid(self):
+    def is_high_heat_retention_valid(self, ashp_only_heating_recommendation, exclusions):
         """
         Check conditions if high heat retention storage is valid
         :return:
@@ -40,11 +40,59 @@ class HeatingRecommender:
             self.property.main_heating["clean_description"] in ["No system present, electric heaters assumed"]
         )
 
-        return self.has_electric_heating_description or electric_heating_assumed
+        has_electric = self.has_electric_heating_description or electric_heating_assumed
+
+        return (
+            has_electric and (not ashp_only_heating_recommendation) and ("boiler_upgrade" not in exclusions)
+        )
+
+    def is_boiler_upgrade_suitable(self, exclusions, ashp_only_heating_recommendation):
+        """
+        These are the conditions we apply to recommend a boiler installation
+        :return:
+        """
+
+        # 1) if the property has mains heating with boiler and radiators, we recommend optimal heating controls
+        has_boiler = self.property.main_heating["clean_description"] in ["Boiler and radiators, mains gas"]
+
+        # 2) If the property doesn't have a heating system, but it has access to the mains gas
+        no_heating_has_mains = self.property.main_heating["clean_description"] in [
+            'No system present, electric heaters assumed'
+        ] and self.property.data["mains-gas-flag"]
+
+        # The property is using portable heaters and has access to gas mains
+        has_room_heaters = (
+            self.property.main_heating["clean_description"] in ["Room heaters, mains gas", "Room heaters, electric"] and
+            self.property.data["mains-gas-flag"]
+        )
+
+        # We also check if the property has electric heating, but it has access to the mains gas
+        electic_heating_has_mains = self.has_electric_heating_description and self.property.data["mains-gas-flag"]
+
+        portable_heaters_has_mains = (
+            self.property.main_heating["clean_description"] in ["Portable electric heaters assumed for most rooms"]
+            and
+            self.property.data["mains-gas-flag"]
+        )
+
+        is_valid = (
+            (
+                has_boiler or
+                no_heating_has_mains or
+                electic_heating_has_mains or
+                has_room_heaters or
+                portable_heaters_has_mains
+            ) and
+            (not ashp_only_heating_recommendation) and
+            ("boiler_upgrade" not in exclusions)
+        )
+
+        return is_valid, has_boiler
 
     def recommend(self, has_cavity_or_loft_recommendations, phase=0, exclusions=None):
         """
         Produces heating recommendations
+
         :param has_cavity_or_loft_recommendations: boolean indicating if we have produced a cavity or loft insulation
         recommendation. If there are cavity or loft recommendations, the property would need to complete those measures
         before being able to get the boiler upgrade scheme benefits. The messaging in the front end would be to
@@ -56,6 +104,8 @@ class HeatingRecommender:
         #       the boiler, but instead flushing the system will make it run more efficiently. There is a cost for this
         #       in the Costs class, stored as SYSTEM_FLUSH_COST
 
+        # TODO: Right now, we don't have recommendations for electric boilers - we should probably have one
+
         exclusions = [] if exclusions is None else exclusions
         non_invasive_ashp_recommendation = next(
             (r for r in self.property.non_invasive_recommendations if r["type"] == "air_source_heat_pump"),
@@ -72,47 +122,19 @@ class HeatingRecommender:
         # This first iteration of the recommender will provide very basic recommendation
         # We recommend heating controls based on the main heating system
 
-        if (self.is_high_heat_retention_valid() and
-            (not ashp_only_heating_recommendation) and
-            ("boiler_upgrade" not in exclusions)
-        ):
+        hhr_valid = self.is_high_heat_retention_valid(ashp_only_heating_recommendation, exclusions)
+
+        if hhr_valid:
             # Recommend high heat retention storage heaters
             # TODO: We need to allow for the possibility that the property aleady has storage heaters, but just
             #       needs the controls
             self.recommend_hhr_storage_heaters(phase=phase, system_change=True, heating_controls_only=False)
 
-        # if the property has mains heating with boiler and radiators, we recommend optimal heating controls
-        has_boiler = self.property.main_heating["clean_description"] in ["Boiler and radiators, mains gas"]
-
-        # We also check that the property doesn't have a heating system, but it has access to the mains gas
-        no_heating_has_mains = self.property.main_heating["clean_description"] in [
-            'No system present, electric heaters assumed'
-        ] and self.property.data["mains-gas-flag"]
-
-        has_gas_heaters = (
-            self.property.main_heating["clean_description"] in ["Room heaters, mains gas"] and
-            self.property.data["mains-gas-flag"]
+        gas_boiler_suitable, has_boiler = self.is_boiler_upgrade_suitable(
+            exclusions=exclusions, ashp_only_heating_recommendation=ashp_only_heating_recommendation
         )
 
-        # We also check if the property has electric heating, but it has access to the mains gas
-        electic_heating_has_mains = self.has_electric_heating_description and self.property.data["mains-gas-flag"]
-
-        portable_heaters_has_mains = (
-            self.property.main_heating["clean_description"] in ["Portable electric heaters assumed for most rooms"]
-            and
-            self.property.data["mains-gas-flag"]
-        )
-
-        if ((
-            has_boiler or
-            no_heating_has_mains or
-            electic_heating_has_mains or
-            has_gas_heaters or
-            portable_heaters_has_mains
-        ) and
-            (not ashp_only_heating_recommendation) and
-            ("boiler_upgrade" not in exclusions)
-        ):
+        if gas_boiler_suitable:
             # This indicates that the home previously did not have a boiler in place and so would require
             # an overhaul to the system - right now, this is all reasons, apart from if there is an existing boiler
             system_change = not has_boiler
diff --git a/recommendations/tests/test_data/heating_recommendations_data.py b/recommendations/tests/test_data/heating_recommendations_data.py
index cbc8ca65..f283050b 100644
--- a/recommendations/tests/test_data/heating_recommendations_data.py
+++ b/recommendations/tests/test_data/heating_recommendations_data.py
@@ -279,7 +279,7 @@ testing_examples = [
             'mainheatc-env-eff': 'Good', 'lighting-description': 'Low energy lighting in 86% of fixed outlets',
             'lighting-energy-eff': 'Very Good', 'lighting-env-eff': 'Very Good',
             'main-fuel': 'electricity (not community)', 'wind-turbine-count': 0.0, 'heat-loss-corridor': 'no corridor',
-            'unheated-corridor-length': None, 'floor-height': None, 'photo-supply': 0.0,
+            'unheated-corridor-length': None, 'floor-height': 2.5, 'photo-supply': 0.0,
             'solar-water-heating-flag': None,
             'mechanical-ventilation': 'natural', 'address': '13, Starbuck Street, Rudry',
             'local-authority-label': 'Caerphilly', 'constituency-label': 'Caerphilly', 'posttown': 'CAERPHILLY',
@@ -287,9 +287,67 @@ testing_examples = [
             'tenure': 'rental (private)', 'fixed-lighting-outlets-count': 7.0, 'low-energy-fixed-light-count': 6.0,
             'uprn': 43088770.0, 'uprn-source': 'Address Matched',
         },
-        "heating_recommendation_descriptions": [],
+        "heating_recommendation_descriptions": [
+            'Install high heat retention electric storage heaters and upgrade heating controls to High Heat Retention '
+            'Storage Heater Controls'
+        ],
         "heating_controls_recommendation_descriptions": [],
-        "notes": ""
+        "notes": "This property is a flat so we don't have an ASHP recommendation. It also doesn't have access to the "
+                 "mains and so it can't have a gas boiler. We don't expect any controls recommendations"
+    },
+    {
+        "epc": {
+            'lmk-key': '492646189022010060208143796198410', 'address1': '67, Ridgeway Road', 'address2': None,
+            'address3': None, 'postcode': 'HP5 2EW', 'building-reference-number': 1976846768,
+            'current-energy-rating': 'D', 'potential-energy-rating': 'D', 'current-energy-efficiency': 64,
+            'potential-energy-efficiency': 68, 'property-type': 'Bungalow', 'built-form': 'Detached',
+            'inspection-date': '2010-06-01', 'local-authority': 'E07000005', 'constituency': 'E14000631',
+            'county': 'Buckinghamshire', 'lodgement-date': '2010-06-02', 'transaction-type': 'marketed sale',
+            'environment-impact-current': 67, 'environment-impact-potential': 70, 'energy-consumption-current': 249,
+            'energy-consumption-potential': 231.0, 'co2-emissions-current': 3.5, 'co2-emiss-curr-per-floor-area': 35,
+            'co2-emissions-potential': 3.2, 'lighting-cost-current': 89.0, 'lighting-cost-potential': 51.0,
+            'heating-cost-current': 627.0, 'heating-cost-potential': 603.0, 'hot-water-cost-current': 105.0,
+            'hot-water-cost-potential': 105.0, 'total-floor-area': 76.0, 'energy-tariff': 'Single',
+            'mains-gas-flag': 'Y', 'floor-level': 'NO DATA!', 'flat-top-storey': None, 'flat-storey-count': None,
+            'main-heating-controls': 2104.0, 'multi-glaze-proportion': 100.0,
+            'glazed-type': 'double glazing installed during or after 2002', 'glazed-area': 'Normal',
+            'extension-count': 0.0, 'number-habitable-rooms': 7.0, 'number-heated-rooms': 7.0,
+            'low-energy-lighting': 25.0, 'number-open-fireplaces': 1.0, 'hotwater-description': 'From main system',
+            'hot-water-energy-eff': 'Very Good', 'hot-water-env-eff': 'Very Good',
+            'floor-description': 'Suspended, no insulation (assumed)', 'floor-energy-eff': None, 'floor-env-eff': None,
+            'windows-description': 'Fully double glazed', 'windows-energy-eff': 'Good', 'windows-env-eff': 'Good',
+            'walls-description': 'Cavity wall, filled cavity', 'walls-energy-eff': 'Good', 'walls-env-eff': 'Good',
+            'secondheat-description': 'Room heaters, wood logs', 'sheating-energy-eff': None, 'sheating-env-eff': None,
+            'roof-description': 'Pitched, 150 mm loft insulation', 'roof-energy-eff': 'Good', 'roof-env-eff': 'Good',
+            'mainheat-description': 'Boiler and radiators, mains gas', 'mainheat-energy-eff': 'Very Good',
+            'mainheat-env-eff': 'Very Good', 'mainheatcont-description': 'Programmer and room thermostat',
+            'mainheatc-energy-eff': 'Average', 'mainheatc-env-eff': 'Average',
+            'lighting-description': 'Low energy lighting in 25% of fixed outlets', 'lighting-energy-eff': 'Average',
+            'lighting-env-eff': 'Average',
+            'main-fuel': 'mains gas - this is for backwards compatibility only and should not be used',
+            'wind-turbine-count': 0.0, 'heat-loss-corridor': 'NO DATA!', 'unheated-corridor-length': None,
+            'floor-height': 2.4, 'photo-supply': 0.0, 'solar-water-heating-flag': 'N',
+            'mechanical-ventilation': 'natural', 'address': '67, Ridgeway Road', 'local-authority-label': 'Chiltern',
+            'constituency-label': 'Chesham and Amersham', 'posttown': 'CHESHAM',
+            'construction-age-band': 'England and Wales: 1930-1949', 'lodgement-datetime': '2010-06-02 08:14:37',
+            'tenure': 'owner-occupied', 'fixed-lighting-outlets-count': None, 'low-energy-fixed-light-count': None,
+            'uprn': 100080513604.0, 'uprn-source': 'Address Matched'
+        },
+        "heating_recommendation_descriptions": [
+            'Install an air source heat pump, and upgrade heating controls to Smart Thermostats, room sensors and '
+            'smart radiator valves (time & temperature zone control). The cost includes the £7500 boiler upgrade '
+            'scheme grant'
+        ],
+        "heating_controls_recommendation_descriptions": [
+            'upgrade heating controls to Room thermostat, programmer and TRVs',
+            'Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves (time & '
+            'temperature zone control)'
+
+        ],
+        "notes": "This has a very efficient boiler and is a detached bungalow, but only has "
+                 "Programmer and room thermostat for heating controls so we'd expect an ASHP heating recommendation"
+                 "as the only option, and heating controls recommendations for programmer, room thermostats and trvs"
+                 "as well as ttzc"
     }
 ]
 
@@ -306,6 +364,7 @@ directory = random.sample(epc_directories, 1)[0]
 data = pd.read_csv(directory / "certificates.csv", low_memory=False)
 # Rename the columns to the same format as the api returns
 data.columns = [c.replace("_", "-").lower() for c in data.columns]
+data["floor-height"] = data["floor-height"].fillna(2.45)
 
 used_examples = pd.DataFrame(
     [
@@ -327,3 +386,6 @@ data = data[pd.isnull(data["used"])].drop(columns=["used"])
 eg = data.sample(1).to_dict("records")[0]
 print(eg["mainheat-description"])
 print(eg["mainheat-energy-eff"])
+print(eg["property-type"])
+print(eg["built-form"])
+print(eg["mainheatcont-description"])

From 6b5246fa21831f5ef53010bd54cfa347ab5156ab Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 3 Sep 2024 17:04:27 +0100
Subject: [PATCH 149/182] finished with BCC tender

---
 etl/customers/bcc_tender/app.py | 49 +++++++++++++++++++++++++--------
 1 file changed, 38 insertions(+), 11 deletions(-)

diff --git a/etl/customers/bcc_tender/app.py b/etl/customers/bcc_tender/app.py
index c949eecf..281cf864 100644
--- a/etl/customers/bcc_tender/app.py
+++ b/etl/customers/bcc_tender/app.py
@@ -115,30 +115,33 @@ n_gbis_prs_ge_needs_scaling_on_council_tax_band = epc_data[
     epc_data["eligibility_type"] == "gbis_prs_ge_needs_scaling_on_council_tax_band"
     ].shape[0]
 
-n_eligibility_1 = np.floor(n_eco4_oo_hthg_needs_scaling_on_benefits * PROPORTION_OF_HOMEOWNERS_CLAIMING_FOR_BENEFITS)
+# We're going to make the broad assumption that all homeowners claiming for benefits, live in homes in council tax
+# bands A-D. There there are no additionals in eligibility 4 and 5
 
-n_eligibility_2 = np.floor(n_eco4_prs_hthg_needs_scaling_on_benefits * PROPORTION_OF_HOMES_ON_BENEFITS)
+# n_eligibility_1 = np.floor(n_eco4_oo_hthg_needs_scaling_on_benefits * PROPORTION_OF_HOMEOWNERS_CLAIMING_FOR_BENEFITS)
+n_eligibility_1 = np.floor(n_eco4_oo_hthg_needs_scaling_on_benefits * COUNCIL_TAX_BAND_A_TO_D_PROPORTION)
+
+# n_eligibility_2 = np.floor(n_eco4_prs_hthg_needs_scaling_on_benefits * PROPORTION_OF_HOMES_ON_BENEFITS)
+n_eligibility_2 = np.floor(n_eco4_prs_hthg_needs_scaling_on_benefits * COUNCIL_TAX_BAND_A_TO_D_PROPORTION)
 
 n_eligiblity_3 = n_eco4_social
 
 # We subtract the number of homes in eligiblity 1, from the number of homes under ECO4 OO, HTHG, before scaling on
 # benefits. This gives us the number of homes that were not on benefits. We then scale this number based on the % of
 # homes in council tax bands A-D
-n_eligiblity_4 = np.floor(
-    (n_eco4_oo_hthg_needs_scaling_on_benefits - n_eligibility_1) * COUNCIL_TAX_BAND_A_TO_D_PROPORTION
-)
+# n_eligiblity_4 = np.floor(
+#     (n_eco4_oo_hthg_needs_scaling_on_benefits - n_eligibility_1) * COUNCIL_TAX_BAND_A_TO_D_PROPORTION
+# )
 
 # We also need to add on homes that fall out of eligibility 2
 n_eligibiltiy_5 = np.floor(
-    np.floor(n_gbis_prs_ge_needs_scaling_on_council_tax_band * COUNCIL_TAX_BAND_A_TO_D_PROPORTION) +
-    np.floor((n_eco4_prs_hthg_needs_scaling_on_benefits - n_eligibility_2) * COUNCIL_TAX_BAND_A_TO_D_PROPORTION)
+    np.floor(n_gbis_prs_ge_needs_scaling_on_council_tax_band * COUNCIL_TAX_BAND_A_TO_D_PROPORTION)
+    # np.floor((n_eco4_prs_hthg_needs_scaling_on_benefits - n_eligibility_2) * COUNCIL_TAX_BAND_A_TO_D_PROPORTION)
 )
 
-total_eligible = n_eligibility_1 + n_eligibility_2 + n_eligiblity_3 + n_eligiblity_4 + n_eligibiltiy_5
-
 # We don't scale up the # of homes based on % of homes with an EPC, because
 n_owner_occupied = epc_data[epc_data["TENURE"].isin(owner_occupied_tenures)].shape[0]
-oo_eligibility = (n_eligibility_1 + n_eligiblity_4)
+oo_eligibility = n_eligibility_1
 
 # 68% of owner occupied are eligibiltiy
 proportion_of_oo_eligible = oo_eligibility / n_owner_occupied
@@ -156,4 +159,28 @@ social_eligibility = n_eligiblity_3
 total_eligible = oo_eligibility + prs_eligibility + social_eligibility
 
 proportion_of_homes_eligibile = total_eligible / N_HOUSEHOLDS_IN_BIRMINGHAM
-# Approx 58% of homes in Birmingham are eligible for ECO/GBIS funding
+# Approx 53% of homes in Birmingham are eligible for ECO/GBIS funding
+
+# Approximately 53% of Homes are eligible for some form of ECO4 or GBIS funding, 227k homes
+# This is broken down as follows:
+# - 155k owner occupiers
+# - 33k private rentals
+# - 39k social housing
+
+# We can't seem to identify the properties owned by the council in the company ownership data, because what is the
+# entity that owns the property? Is it the council, or is it a company that is owned by the council? We can't be sure
+# and so since BCC owns 54,000 social housing properties (5k) supported housing
+# [https://www.birmingham.gov.uk/info/50094/housing_options/2686/apply_for_social_housing#:~:text=We%20manage
+# %20around%2054%2C000%20social,a%20member%20of%20your%20household.]
+# and there are 78,410 social housing properties in Birmingham, we can assume that the council owns 54,000 of these
+# and so 69% of the social housing is owned by the Council
+
+# Since we saw that 38,779 of 78,410 social housing looked to be able to benefit from ECO/GBIS funding, we can assume
+# that 69% of these are owned by the council, which is 26,757 properties
+
+# So, with these assumptions in mind:
+# We can commit to [x] per annum based on your 54k council-owned, of which approximately 27k are likely to be eligible
+# for some form of ECO/GBIS funding. We will work directly with Housing associations to address the remaining 12k
+# social properties that may be eligible for funding through ECO/GBIS.
+# We will market directly to the 33k private rentals and 155k owner occupiers that are eligible for funding,
+# and assuming a 5% conversion, will aim to complete work on

From a46c0eed39447030f04c57fffa23d20605e7e870 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 3 Sep 2024 17:48:24 +0100
Subject: [PATCH 150/182] Setting up energy assessments extraction process

---
 backend/app/config.py                     |  3 ++
 backend/app/energy_assessments/router.py  | 65 +++++++++++++++++++++++
 backend/app/energy_assessments/schemas.py |  7 +++
 3 files changed, 75 insertions(+)
 create mode 100644 backend/app/energy_assessments/router.py
 create mode 100644 backend/app/energy_assessments/schemas.py

diff --git a/backend/app/config.py b/backend/app/config.py
index b5ea72fe..9aaa0a52 100644
--- a/backend/app/config.py
+++ b/backend/app/config.py
@@ -33,6 +33,9 @@ class Settings(BaseSettings):
     HEATING_KWH_PREDICTIONS_BUCKET: str
     HOTWATER_KWH_PREDICTIONS_BUCKET: str
 
+    # Other S3 buckts
+    ENERGY_ASSESSMENTS_BUCKET: str
+
     class Config:
         env_file = "backend/.env"
 
diff --git a/backend/app/energy_assessments/router.py b/backend/app/energy_assessments/router.py
new file mode 100644
index 00000000..8a52bdfb
--- /dev/null
+++ b/backend/app/energy_assessments/router.py
@@ -0,0 +1,65 @@
+from fastapi import APIRouter, Depends
+from starlette.responses import Response
+
+from backend.app.db.connection import db_engine
+
+from backend.app.config import get_settings
+from backend.app.dependencies import validate_token
+from backend.app.energy_assessments.schemas import EnergyAssessmentUploadPayload
+
+from sqlalchemy.exc import IntegrityError, OperationalError
+from sqlalchemy.orm import sessionmaker
+
+from utils.logger import setup_logger
+
+logger = setup_logger()
+
+router = APIRouter(
+    prefix="/energy-assessments",
+    tags=["energy-assessments"],
+    dependencies=[Depends(validate_token)],
+    responses={404: {"description": "Not found"}}
+)
+
+
+@router.post("/upload")
+async def upload(body: EnergyAssessmentUploadPayload):
+    """
+    Given a location in S3, this service will retrieve the data in s3 and perform the following:
+    1) Extract the data and store it to the data
+    2) Extract the links to other artefacts collected during the energy assessment, such as EPRs, floor plans and
+       condition reports
+
+    This will allow us to do the following:
+    1) Present the findings of the energy assessment to the client
+    2) Allow the end use to download the artefacts collected during the energy assessment
+
+    Eventually, we will this service to collect the key documents from the service where they're uploaded
+    (e.g. Onedrive) and store them to S3, but for the moment, this is sufficient
+    """
+
+    logger.info("Connecting to db")
+    session = sessionmaker(bind=db_engine)()
+
+    try:
+        logger.info("Uploading energy assessment data")
+    except IntegrityError:
+        logger.error("Database integrity error occurred", exc_info=True)
+        session.rollback()
+        return Response(status_code=500, content="Database integrity error.")
+    except OperationalError:
+        logger.error("Database operational error occurred", exc_info=True)
+        session.rollback()
+        return Response(status_code=500, content="Database operational error.")
+    except ValueError:
+        logger.error("Value error - possibly due to malformed data", exc_info=True)
+        session.rollback()
+        return Response(status_code=400, content="Bad request: malformed data.")
+    except Exception as e:  # General exception handling
+        logger.error(f"An error occurred: {e}")
+        session.rollback()
+        return Response(status_code=500, content="An unexpected error occurred.")
+    finally:
+        session.close()
+
+    return Response(status_code=200)
diff --git a/backend/app/energy_assessments/schemas.py b/backend/app/energy_assessments/schemas.py
new file mode 100644
index 00000000..83a9a44e
--- /dev/null
+++ b/backend/app/energy_assessments/schemas.py
@@ -0,0 +1,7 @@
+from pydantic import BaseModel
+
+
+class EnergyAssessmentUploadPayload(BaseModel):
+    portfolio_id: int
+    # This is the s3 location, where the informaton collected during the energy assessment is stored
+    s3_filepath: str

From 54e29a98dc9ab12f23545b3183327cfc697eb186 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 3 Sep 2024 18:32:44 +0100
Subject: [PATCH 151/182] set up structure for uploading retrofit assessments
 but not going to implement right now

---
 backend/app/energy_assessments/router.py |  2 ++
 etl/xml_survey_extraction/app.py         | 40 ++----------------------
 2 files changed, 4 insertions(+), 38 deletions(-)

diff --git a/backend/app/energy_assessments/router.py b/backend/app/energy_assessments/router.py
index 8a52bdfb..ec49c1c1 100644
--- a/backend/app/energy_assessments/router.py
+++ b/backend/app/energy_assessments/router.py
@@ -36,6 +36,8 @@ async def upload(body: EnergyAssessmentUploadPayload):
 
     Eventually, we will this service to collect the key documents from the service where they're uploaded
     (e.g. Onedrive) and store them to S3, but for the moment, this is sufficient
+
+    # TODO - Holding up on implementing this
     """
 
     logger.info("Connecting to db")
diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py
index f5394abf..be39d0df 100644
--- a/etl/xml_survey_extraction/app.py
+++ b/etl/xml_survey_extraction/app.py
@@ -13,7 +13,7 @@ logger = setup_logger()
 BUCKET = "retrofit-energy-assessments-dev"
 USER_ID = 8
 SCENARIOS = {
-    86: {
+    101: {
         "project_code": "VDE001",
         "surveyor": "JAFFERSONS ENERGY CONSULTANTS",
         "bodies": [
@@ -64,42 +64,6 @@ SCENARIOS = {
             }
         ]
     },
-    87: {
-        "project_code": "VDE002",
-        "surveyor": "JAFFERSONS ENERGY CONSULTANTS",
-        "bodies": [
-            # Scenario A: Solar PV, AHSP
-            {
-                "portfolio_id": str(87),
-                "housing_type": "Private",
-                "goal": "Increasing EPC",
-                "goal_value": "A",
-                "trigger_file_path": "",
-                "already_installed_file_path": "",
-                "patches_file_path": "",
-                "non_invasive_recommendations_file_path": "",
-                "exclusions": ["floor_insulation", "fireplace"],
-                "budget": None,
-                "scenario_name": "Deep Retrofit",
-                "multi_plan": True,
-            },
-            # Scenario B, floor insulation, PV, AHSP
-            {
-                "portfolio_id": str(87),
-                "housing_type": "Private",
-                "goal": "Increasing EPC",
-                "goal_value": "A",
-                "trigger_file_path": "",
-                "already_installed_file_path": "",
-                "patches_file_path": "",
-                "non_invasive_recommendations_file_path": "",
-                "exclusions": ["fireplace"],
-                "budget": None,
-                "scenario_name": "Whole House Retrofit",
-                "multi_plan": True,
-            }
-        ]
-    }
 }
 
 
@@ -166,7 +130,7 @@ def main():
         # For each property, we download the xmls and extract the data
         database_data = []
         for uprn, xmls in assessments_map.items():
-            
+
             extracted_data = {}
             for xml in xmls:
                 xml_data = read_from_s3(bucket_name=BUCKET, s3_file_name=xml)

From 8a3a3dbb168c6c28534b8e8fc2a6abc03097f621 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 3 Sep 2024 19:05:57 +0100
Subject: [PATCH 152/182] setting up vectis inputs

---
 etl/xml_survey_extraction/app.py | 110 +++++++++++++++++++++++++------
 1 file changed, 89 insertions(+), 21 deletions(-)

diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py
index be39d0df..b51c01b8 100644
--- a/etl/xml_survey_extraction/app.py
+++ b/etl/xml_survey_extraction/app.py
@@ -12,6 +12,7 @@ logger = setup_logger()
 
 BUCKET = "retrofit-energy-assessments-dev"
 USER_ID = 8
+non_invasive_recommendations_filepath = "{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv"
 SCENARIOS = {
     101: {
         "project_code": "VDE001",
@@ -19,7 +20,7 @@ SCENARIOS = {
         "bodies": [
             # Scenario A: Cavity wall insulation
             {
-                "portfolio_id": str(86),
+                "portfolio_id": str(101),
                 "housing_type": "Private",
                 "goal": "Increasing EPC",
                 "goal_value": "A",
@@ -27,14 +28,16 @@ SCENARIOS = {
                 "already_installed_file_path": "",
                 "patches_file_path": "",
                 "non_invasive_recommendations_file_path": "",
-                "exclusions": ["floor_insulation", "fireplace", "solar_pv", "heating", 'lighting'],
+                "inclusions": [
+                    "draught_proofing", "secondary_glazing", "trickle_vents", "low_energy_lighting",
+                ],
                 "budget": None,
-                "scenario_name": "Low Hanging Fruit",
+                "scenario_name": "Quick wins - do now while tenanted",
                 "multi_plan": True,
             },
             # Scenario B: CWI, Solar PV, AHSP
             {
-                "portfolio_id": str(86),
+                "portfolio_id": str(101),
                 "housing_type": "Private",
                 "goal": "Increasing EPC",
                 "goal_value": "A",
@@ -42,30 +45,95 @@ SCENARIOS = {
                 "already_installed_file_path": "",
                 "patches_file_path": "",
                 "non_invasive_recommendations_file_path": "",
-                "exclusions": ["floor_insulation", "fireplace", 'lighting'],
+                "inclusions": [
+                    "draught_proofing",
+                    "secondary_glazing",
+                    "trickle_vents",
+                    "low_energy_lighting",
+                    "suspended_floor_insulation",
+                    "internal_wall_insulation"
+                ],
                 "budget": None,
-                "scenario_name": "Deep Retrofit",
+                "scenario_name": "Do when void",
                 "multi_plan": True,
             },
-            # Scenario C, CWI, floor insulation, PV, AHSP
-            {
-                "portfolio_id": str(86),
-                "housing_type": "Private",
-                "goal": "Increasing EPC",
-                "goal_value": "A",
-                "trigger_file_path": "",
-                "already_installed_file_path": "",
-                "patches_file_path": "",
-                "non_invasive_recommendations_file_path": "",
-                "exclusions": ["fireplace", 'lighting'],
-                "budget": None,
-                "scenario_name": "Whole House Retrofit",
-                "multi_plan": True,
-            }
         ]
     },
 }
 
+# TODO: These non-intrusive recommendations should be detected from the EPRs, the scenarios and the condition report?
+NON_INTRUSITVE_RECOMMENDATIONS = [
+    {
+        # 2 Grove Mansions
+        "uprn": 121016121,
+        "recommendations": [
+            {
+                "type": "draught_proofing",
+                "cost": None,
+                "survey": True
+            },
+            {"type": "secondary_glazing", "cost": None, "survey": True},
+            {"type": "trickle_vents", "cost": None, "survey": True},
+            {"type": "suspended_floor_insulation", "cost": None, "survey": True},
+            {"type": "internal_wall_insulation", "cost": None, "survey": True},
+        ]
+    },
+    {
+        # 8 Grove Mansions
+        "uprn": 10024087855,
+        "recommendations": [
+            {"type": "draught_proofing", "cost": None, "survey": True},
+            {"type": "secondary_glazing", "cost": None, "survey": True},
+            {"type": "trickle_vents", "cost": None, "survey": True},
+            {"type": "low_energy_lighting", "cost": None, "survey": True},
+            {"type": "internal_wall_insulation", "cost": None, "survey": True},
+        ]
+    },
+    {
+        # 9 Grove Mansions
+        "uprn": 121016128,
+        "recommendations": [
+            {"type": "draught_proofing", "cost": None, "survey": True},
+            {"type": "secondary_glazing", "cost": None, "survey": True},
+            {"type": "trickle_vents", "cost": None, "survey": True},
+            {"type": "low_energy_lighting", "cost": None, "survey": True},
+            {"type": "suspended_floor_insulation", "cost": None},
+            {"type": "internal_wall_insulation", "cost": None, "survey": True},
+        ]
+    },
+    {
+        # 5 Grove Mansions
+        "uprn": 121016124,
+        "recommendations": [
+            {"type": "secondary_glazing", "cost": None, "survey": True},
+            {"type": "trickle_vents", "cost": None, "survey": True},
+            {"type": "low_energy_lighting", "cost": None, "survey": True},
+            {"type": "internal_wall_insulation", "cost": None, "survey": True},
+        ]
+    },
+    {
+        # 14 Grove Mansions
+        "uprn": 121016117,
+        "recommendations": [
+            {"type": "draught_proofing", "cost": None, "survey": True},
+            {"type": "secondary_glazing", "cost": None, "survey": True},
+            {"type": "trickle_vents", "cost": None, "survey": True},
+            {"type": "low_energy_lighting", "cost": None, "survey": True},
+            {"type": "internal_wall_insulation", "cost": None, "survey": True},
+        ]
+    },
+    {
+        # 19 Grove Mansions
+        "uprn": 121016117,
+        "recommendations": [
+            {"type": "low_energy_lighting", "cost": None, "survey": True},
+            {"type": "secondary_glazing", "cost": None, "survey": True},
+            {"type": "internal_wall_insulation", "cost": None, "survey": True},
+            {"type": "room_roof_insulation", "cost": None, "survey": True},
+        ]
+    },
+]
+
 
 def main():
     """

From 3eed20ff05c1d29444f5eb83298eeb2851735cd4 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Tue, 3 Sep 2024 19:13:15 +0100
Subject: [PATCH 153/182] not in preparation of uploading energy assessment
 data

---
 etl/xml_survey_extraction/app.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py
index b51c01b8..5c09b7bf 100644
--- a/etl/xml_survey_extraction/app.py
+++ b/etl/xml_survey_extraction/app.py
@@ -62,6 +62,8 @@ SCENARIOS = {
 }
 
 # TODO: These non-intrusive recommendations should be detected from the EPRs, the scenarios and the condition report?
+#       For recommendations like trickle vents, we can deduce this from the condition report, depending on the
+#       ventilation of the room and the presence of trickle vents.
 NON_INTRUSITVE_RECOMMENDATIONS = [
     {
         # 2 Grove Mansions

From 92564be655263f74a8ea9c5cc5d0309edce265ee Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 4 Sep 2024 10:26:39 +0100
Subject: [PATCH 154/182] migrating xml extraction to new router

---
 backend/app/energy_assessments/router.py  | 32 +++++++++++++++++++++--
 backend/app/energy_assessments/schemas.py |  7 +++--
 etl/xml_survey_extraction/app.py          |  2 +-
 3 files changed, 36 insertions(+), 5 deletions(-)

diff --git a/backend/app/energy_assessments/router.py b/backend/app/energy_assessments/router.py
index ec49c1c1..c4e0308b 100644
--- a/backend/app/energy_assessments/router.py
+++ b/backend/app/energy_assessments/router.py
@@ -10,6 +10,16 @@ from backend.app.energy_assessments.schemas import EnergyAssessmentUploadPayload
 from sqlalchemy.exc import IntegrityError, OperationalError
 from sqlalchemy.orm import sessionmaker
 
+from backend.app.db.functions.energy_assessment_functions import bulk_insert_energy_assessments
+from sqlalchemy.orm import sessionmaker
+from backend.app.db.connection import db_engine
+from utils.s3 import read_from_s3, list_files_and_subfolders_in_s3_folder, list_xmls_in_s3_folder, save_csv_to_s3
+from utils.logger import setup_logger
+from etl.xml_survey_extraction.XmlParser import XmlParser
+import os
+import pandas as pd
+from io import BytesIO
+
 from utils.logger import setup_logger
 
 logger = setup_logger()
@@ -37,14 +47,32 @@ async def upload(body: EnergyAssessmentUploadPayload):
     Eventually, we will this service to collect the key documents from the service where they're uploaded
     (e.g. Onedrive) and store them to S3, but for the moment, this is sufficient
 
-    # TODO - Holding up on implementing this
     """
 
     logger.info("Connecting to db")
     session = sessionmaker(bind=db_engine)()
 
     try:
-        logger.info("Uploading energy assessment data")
+        logger.info("Extracting energy assessment data")
+        energy_assessments = list_files_and_subfolders_in_s3_folder(
+            bucket_name=get_settings().ENERGY_ASSESSMENTS_BUCKET,
+            folder_name=f"{body.surveyor}/{body.project_code}/"
+        )
+
+        logger.info(
+            f"Found {len(energy_assessments)} energy assessments for {body.surveyor} and {body.project_code}"
+        )
+        assessments_map = {}
+        for assessment in energy_assessments:
+            uploaded_xmls = list_xmls_in_s3_folder(
+                bucket_name=get_settings().ENERGY_ASSESSMENTS_BUCKET,
+                folder_name=os.path.join(assessment, "docs & plans")
+            )
+            uprn = int(assessment.rstrip("/").split("/")[-1])
+            assessments_map[uprn] = uploaded_xmls
+
+        logger.info(f"Exatracted XMLS for the energy assessments")
+
     except IntegrityError:
         logger.error("Database integrity error occurred", exc_info=True)
         session.rollback()
diff --git a/backend/app/energy_assessments/schemas.py b/backend/app/energy_assessments/schemas.py
index 83a9a44e..cfee76ff 100644
--- a/backend/app/energy_assessments/schemas.py
+++ b/backend/app/energy_assessments/schemas.py
@@ -3,5 +3,8 @@ from pydantic import BaseModel
 
 class EnergyAssessmentUploadPayload(BaseModel):
     portfolio_id: int
-    # This is the s3 location, where the informaton collected during the energy assessment is stored
-    s3_filepath: str
+    # This is the energy assessment company/individual that conducted the energy assessment, where the data is uploaded
+    # against
+    surveyor: str
+    # is a code, like VEC001, which is used to identify the project and also where the data is uploaded against
+    project_code: str
diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py
index 5c09b7bf..ffe6274c 100644
--- a/etl/xml_survey_extraction/app.py
+++ b/etl/xml_survey_extraction/app.py
@@ -15,7 +15,7 @@ USER_ID = 8
 non_invasive_recommendations_filepath = "{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv"
 SCENARIOS = {
     101: {
-        "project_code": "VDE001",
+        "project_code": "VEC001",
         "surveyor": "JAFFERSONS ENERGY CONSULTANTS",
         "bodies": [
             # Scenario A: Cavity wall insulation

From 64b423ad2e8f810e75048e3c447b9e820352c5e4 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 4 Sep 2024 11:02:56 +0100
Subject: [PATCH 155/182] pulling out other file types from s3 during energy
 assessment upload

---
 backend/app/energy_assessments/router.py | 43 ++++++++++++++++++++++--
 1 file changed, 41 insertions(+), 2 deletions(-)

diff --git a/backend/app/energy_assessments/router.py b/backend/app/energy_assessments/router.py
index c4e0308b..21c4e4c1 100644
--- a/backend/app/energy_assessments/router.py
+++ b/backend/app/energy_assessments/router.py
@@ -13,7 +13,10 @@ from sqlalchemy.orm import sessionmaker
 from backend.app.db.functions.energy_assessment_functions import bulk_insert_energy_assessments
 from sqlalchemy.orm import sessionmaker
 from backend.app.db.connection import db_engine
-from utils.s3 import read_from_s3, list_files_and_subfolders_in_s3_folder, list_xmls_in_s3_folder, save_csv_to_s3
+from utils.s3 import (
+    read_from_s3, list_files_and_subfolders_in_s3_folder, list_xmls_in_s3_folder, save_csv_to_s3,
+    list_files_in_s3_folder
+)
 from utils.logger import setup_logger
 from etl.xml_survey_extraction.XmlParser import XmlParser
 import os
@@ -68,8 +71,44 @@ async def upload(body: EnergyAssessmentUploadPayload):
                 bucket_name=get_settings().ENERGY_ASSESSMENTS_BUCKET,
                 folder_name=os.path.join(assessment, "docs & plans")
             )
+
+            energy_assessment_files = list_files_in_s3_folder(
+                bucket_name=get_settings().ENERGY_ASSESSMENTS_BUCKET,
+                folder_name=os.path.join(assessment, "docs & plans")
+            )
+            # Remove xmls from the list of files
+            energy_assessment_files = [file for file in energy_assessment_files if file not in uploaded_xmls]
+            # We now split this into the different types of files
+            # EPR
+            eprs = [
+                file for file in energy_assessment_files if "EPR.pdf" in file.split("/")[-1].replace(" ", "").lower()
+            ]
+            # Condition report
+            condition_reports = [
+                file for file in energy_assessment_files if "cr.pdf" in file.split("/")[-1].replace(" ", "").lower()
+            ]
+            # Evidence report
+            evidence_reports = [
+                file for file in energy_assessment_files
+                if "evidence.pdf" in file.split("/")[-1].replace(" ", "").lower()
+            ]
+            # Summary report
+            summary_reports = [
+                file for file in energy_assessment_files
+                if "sn.pdf" in file.split("/")[-1].replace(" ", "").lower()
+            ]
+            # Floor plans - these are just the jpgs
+            floor_plans = [file for file in energy_assessment_files if file.endswith(".jpg")]
+
             uprn = int(assessment.rstrip("/").split("/")[-1])
-            assessments_map[uprn] = uploaded_xmls
+            assessments_map[uprn] = {
+                "xmls": uploaded_xmls,
+                "eprs": eprs,
+                "condition_reports": condition_reports,
+                "evidence_reports": evidence_reports,
+                "summary_reports": summary_reports,
+                "floor_plans": floor_plans
+            }
 
         logger.info(f"Exatracted XMLS for the energy assessments")
 

From 32cdd70b71157aedcfc4d6ab8568b7acb3c38ddb Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 4 Sep 2024 11:33:48 +0100
Subject: [PATCH 156/182] debugged get_property_type from xml extractor

---
 backend/app/energy_assessments/router.py | 58 +++++++++++++++++++++---
 etl/xml_survey_extraction/XmlParser.py   | 51 ++++++++++++++++-----
 2 files changed, 91 insertions(+), 18 deletions(-)

diff --git a/backend/app/energy_assessments/router.py b/backend/app/energy_assessments/router.py
index 21c4e4c1..1e4b44f0 100644
--- a/backend/app/energy_assessments/router.py
+++ b/backend/app/energy_assessments/router.py
@@ -103,14 +103,60 @@ async def upload(body: EnergyAssessmentUploadPayload):
             uprn = int(assessment.rstrip("/").split("/")[-1])
             assessments_map[uprn] = {
                 "xmls": uploaded_xmls,
-                "eprs": eprs,
-                "condition_reports": condition_reports,
-                "evidence_reports": evidence_reports,
-                "summary_reports": summary_reports,
-                "floor_plans": floor_plans
+                "EPR": eprs,
+                "Condition Report": condition_reports,
+                "Evidence Report": evidence_reports,
+                "Summary Information": summary_reports,
+                "Floor PLan": floor_plans
             }
 
-        logger.info(f"Exatracted XMLS for the energy assessments")
+        logger.info("Extracted energy assessment data and storing file locations to database")
+        xml_data_to_store = []
+        energy_assessment_documents = []
+        for uprn, files in assessments_map.items():
+
+            # Create the rows of data to insert into the energy assessment documents
+            property_ea_docs = []
+            for doc_type, doc_files in files.items():
+                if doc_type == "xmls":
+                    continue
+                property_ea_docs.append(
+                    {
+                        "uprn": uprn,
+                        "document_type": doc_type,
+                        "document_location": doc_files
+                    }
+                )
+            energy_assessment_documents.extend(property_ea_docs)
+
+            xmls = files["xmls"]
+            extracted_data = {}
+            for xml in xmls:
+                xml_data = read_from_s3(bucket_name=get_settings().ENERGY_ASSESSMENTS_BUCKET, s3_file_name=xml)
+                xml_data_io = BytesIO(xml_data)
+                xml_parser = XmlParser(
+                    file=xml_data_io,
+                    filekey=os.path.join(f"s3://{get_settings().ENERGY_ASSESSMENTS_BUCKET}", xml),
+                    uprn=uprn,
+                    surveyor_company=body.surveyor,
+                )
+                xml_parser.run()
+                if xml_parser.is_lig:
+                    logger.info(f"Extracted data from {xml}")
+                extracted_epc = xml_parser.epc
+                extracted_additional_data = xml_parser.additional_data
+
+                data_to_update = {
+                    **extracted_epc, **extracted_additional_data
+                }
+
+                # We need to update the keys to match the database schema - i.e. we should replace all hyphens with
+                # underscores
+                data_to_update = {k.replace("-", "_"): v for k, v in data_to_update.items()}
+
+                extracted_data.update(data_to_update)
+
+            xml_data_to_store.append(extracted_data)
 
     except IntegrityError:
         logger.error("Database integrity error occurred", exc_info=True)
diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py
index 0bc3d56b..c39e8f95 100644
--- a/etl/xml_survey_extraction/XmlParser.py
+++ b/etl/xml_survey_extraction/XmlParser.py
@@ -9,6 +9,7 @@ from etl.xml_survey_extraction.pcdb import heating_data
 PROPERTY_TYPE_LOOKUP = {
     "0": "House",
     "House": "House",
+    "2": "Flat"
 }
 
 
@@ -471,6 +472,13 @@ class XmlParser:
         if not property_type:
             property_type = self.xml.getElementsByTagName('PropertyType1')
 
+        if len(property_type) > 1:
+            property_types = {PROPERTY_TYPE_LOOKUP[p.firstChild.nodeValue] for p in property_type}
+            if len(property_types) > 1:
+                raise ValueError("Multiple property types found")
+
+            return property_types.pop()
+
         return PROPERTY_TYPE_LOOKUP[property_type[0].firstChild.nodeValue]
 
     def get_sap(self):
@@ -683,6 +691,30 @@ class XmlParser:
 
         self.perimeter = self.heat_loss_perimeter + self.party_wall_length
 
+    @staticmethod
+    def _parse_windows_content(window, glazing_type_lookup, orientation_lookup):
+
+        # There may not be a pvc frame
+        pvc_frame = window.getElementsByTagName("PVC-Frame")
+        pvc_frame = pvc_frame[0].firstChild.nodeValue if pvc_frame else None
+
+        # There may not be a glazing gap for single glazed windows
+        glazing_gap = window.getElementsByTagName("Glazing-Gap")
+        glazing_gap = glazing_gap[0].firstChild.nodeValue if glazing_gap else None
+
+        parsed = {
+            "window_location": window.getElementsByTagName("Window-Location")[0].firstChild.nodeValue,
+            "window_area": window.getElementsByTagName("Window-Area")[0].firstChild.nodeValue,
+            "window_type": window.getElementsByTagName("Window-Type")[0].firstChild.nodeValue,
+            "glazing_type": glazing_type_lookup[
+                window.getElementsByTagName("Glazing-Type")[0].firstChild.nodeValue
+            ],
+            "pvc_frame": pvc_frame,
+            "glazing_gap": glazing_gap,
+            "orientation": orientation_lookup[window.getElementsByTagName("Orientation")[0].firstChild.nodeValue]
+        }
+        return parsed
+
     def get_windows(self):
         """
         Extracts data about the windows in the property, including the number of windows and the window type.
@@ -692,7 +724,8 @@ class XmlParser:
         sap_windows = self.xml.getElementsByTagName("SAP-Windows")[0].getElementsByTagName("SAP-Window")
 
         glazing_type_lookup = {
-            "3": "double glazing, unknown install date"
+            "3": "double glazing, unknown install date",
+            "5": "Single glazing",
         }
 
         orientation_lookup = {
@@ -707,15 +740,9 @@ class XmlParser:
         }
 
         self.windows = [
-            {
-                "window_location": window.getElementsByTagName("Window-Location")[0].firstChild.nodeValue,
-                "window_area": window.getElementsByTagName("Window-Area")[0].firstChild.nodeValue,
-                "window_type": window.getElementsByTagName("Window-Type")[0].firstChild.nodeValue,
-                "glazing_type": glazing_type_lookup[
-                    window.getElementsByTagName("Glazing-Type")[0].firstChild.nodeValue
-                ],
-                "pvc_frame": window.getElementsByTagName("PVC-Frame")[0].firstChild.nodeValue,
-                "glazing_gap": window.getElementsByTagName("Glazing-Gap")[0].firstChild.nodeValue,
-                "orientation": orientation_lookup[window.getElementsByTagName("Orientation")[0].firstChild.nodeValue]
-            } for window in sap_windows
+            self._parse_windows_content(
+                window=window,
+                glazing_type_lookup=glazing_type_lookup,
+                orientation_lookup=orientation_lookup
+            ) for window in sap_windows
         ]

From 50fa3f7ad2c3923b2944b85c70d552fce3af9fcf Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 4 Sep 2024 12:08:46 +0100
Subject: [PATCH 157/182] handling parsing of fields in flats

---
 etl/xml_survey_extraction/XmlParser.py | 54 +++++++++++++++++++++-----
 1 file changed, 45 insertions(+), 9 deletions(-)

diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py
index c39e8f95..2ea8659a 100644
--- a/etl/xml_survey_extraction/XmlParser.py
+++ b/etl/xml_survey_extraction/XmlParser.py
@@ -180,6 +180,39 @@ class XmlParser:
         # Put together all of the additional data we capture
         self.extract_additional_data()
 
+    def _parse_heat_loss_corridor(self):
+        hlc_lookup = {"2": "unheated corridor", "Unheated": "unheated corridor"}
+        if self.is_lig:
+            heat_loss_corridor = self.get_node_value('Heat-Loss-Corridor')
+        else:
+            # For some reason, this tag is spelt incorrectly in the rdsap xml
+            heat_loss_corridor = self.get_node_value('FlatCoridor')
+        return hlc_lookup[heat_loss_corridor]
+
+    def _parse_heat_loss_corridor_length(self):
+        if self.is_lig:
+            return self.get_node_value('Unheated-Corridor-Length')
+        return self.get_node_value('FlatShelteredWallLength')
+
+    def _parse_flat_storey_count(self):
+        # in the EPR the tag is Storeys
+        if self.is_lig:
+            storeys = None
+        else:
+            storeys = self.get_node_value('Storeys')
+        return storeys
+
+    def _parse_flat_top_storey(self):
+        if self.is_lig:
+            return self.get_node_value('Top-Storey')
+        return None
+
+    def _parse_floor_level(self):
+        if self.is_lig:
+            flat_details = self.xml.getElementsByTagName('SAP-Flat-Details')[0]
+            return flat_details.getElementsByTagName("Level")[0].firstChild.nodeValue
+        return None
+
     def extract_epc(self):
 
         if self.floor_dimensions is None:
@@ -191,15 +224,18 @@ class XmlParser:
         property_type = self.get_property_type()
 
         if property_type == "Flat":
-            raise NotImplementedError(
-                "Need to handle: heat-loss-corridor, unheated-corridor-length, flat-storey-count, flat-top-storey, "
-                "floor-level"
-            )
-        heat_loss_corridor = "NO DATA!"
-        unheated_corridor_length = ""
-        flat_storey_count = ""
-        flat_top_storey = ""
-        floor_level = "NO DATA!"
+            heat_loss_corridor = self._parse_heat_loss_corridor()
+            unheated_corridor_length = self._parse_heat_loss_corridor_length()
+            flat_storey_count = self._parse_flat_storey_count()
+            flat_top_storey = self._parse_flat_top_storey()
+            floor_level = self._parse_floor_level()
+
+        else:
+            heat_loss_corridor = "NO DATA!"
+            unheated_corridor_length = ""
+            flat_storey_count = ""
+            flat_top_storey = ""
+            floor_level = "NO DATA!"
 
         floor_height = np.mean([
             float(x['room_height']) for x in self.floor_dimensions if

From a5bd856bad27cd36d1af44f449279263c9a4c8d3 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 4 Sep 2024 14:02:01 +0100
Subject: [PATCH 158/182] debugging xml extraction for grove mansions

---
 etl/xml_survey_extraction/XmlParser.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py
index 2ea8659a..60e32a58 100644
--- a/etl/xml_survey_extraction/XmlParser.py
+++ b/etl/xml_survey_extraction/XmlParser.py
@@ -107,6 +107,7 @@ class XmlParser:
 
     BUILT_FORM_MAP = {
         "1": "Detached",
+        "4": "Mid-Terrace",
     }
 
     GLAZED_AREA_MAP = {
@@ -122,7 +123,8 @@ class XmlParser:
     }
 
     TENURE_MAP = {
-        '1': "Owner-occupied"
+        '1': "Owner-occupied",
+        "2": "Rented (social)"
     }
 
     TARIFF_MAP = {
@@ -421,9 +423,17 @@ class XmlParser:
         }
 
         cylinder_insulation_type = {
+            None: "",
             "1": "Foam",
         }
 
+        cylinder_insulation_thickness = int(
+            self.get_node_value('Cylinder-Insulation-Thickness')
+        ) if self.get_node_value('Cylinder-Insulation-Thickness') else None
+
+        cylinder_thermostat = boolean_lookup[self.get_node_value('Cylinder-Thermostat')] \
+            if self.get_node_value('Cylinder-Thermostat') else None
+
         self.additional_data = {
             "file_location": self.filekey,
             "surveyor_name": self.surveyor_name,
@@ -445,8 +455,8 @@ class XmlParser:
             "percent_draftproofed": int(self.get_node_value('Percent-Draughtproofed')),
             "has_hot_water_cylinder": boolean_lookup[self.get_node_value('Has-Hot-Water-Cylinder')],
             "cylinder_insulation_type": cylinder_insulation_type[self.get_node_value('Cylinder-Insulation-Type')],
-            "cylinder_insulation_thickness": int(self.get_node_value('Cylinder-Insulation-Thickness')),
-            "cylinder_thermostat": boolean_lookup[self.get_node_value('Cylinder-Thermostat')],
+            "cylinder_insulation_thickness": cylinder_insulation_thickness,
+            "cylinder_thermostat": cylinder_thermostat,
             "main_dwelling_ground_floor_area": float(main_dwelling_ground_floor_area),
             "number_of_windows": int(number_of_windows),
             "windows_area": float(windows_area),

From cdb1bebddc5435a6cfddcc6bd373aca8e8270e27 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 4 Sep 2024 14:14:20 +0100
Subject: [PATCH 159/182] debugging xml parser

---
 etl/xml_survey_extraction/XmlParser.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/etl/xml_survey_extraction/XmlParser.py b/etl/xml_survey_extraction/XmlParser.py
index 60e32a58..ffe191a4 100644
--- a/etl/xml_survey_extraction/XmlParser.py
+++ b/etl/xml_survey_extraction/XmlParser.py
@@ -107,6 +107,7 @@ class XmlParser:
 
     BUILT_FORM_MAP = {
         "1": "Detached",
+        "3": "End-Terrace",
         "4": "Mid-Terrace",
     }
 
@@ -123,8 +124,9 @@ class XmlParser:
     }
 
     TENURE_MAP = {
-        '1': "Owner-occupied",
-        "2": "Rented (social)"
+        "1": "Owner-occupied",
+        "2": "Rented (social)",
+        "3": "Rented (private)",
     }
 
     TARIFF_MAP = {

From d8e337e55dde723f2f29766716b0b4e3233c066a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 4 Sep 2024 15:58:08 +0100
Subject: [PATCH 160/182] adding other energy assessment tables

---
 backend/app/db/models/energy_assessments.py | 27 +++++++-
 backend/app/energy_assessments/router.py    | 73 ++++++++++++++-------
 2 files changed, 77 insertions(+), 23 deletions(-)

diff --git a/backend/app/db/models/energy_assessments.py b/backend/app/db/models/energy_assessments.py
index 3928f9fa..a5667a0a 100644
--- a/backend/app/db/models/energy_assessments.py
+++ b/backend/app/db/models/energy_assessments.py
@@ -1,4 +1,4 @@
-from sqlalchemy import Column, Integer, BigInteger, Text, Float, DateTime, Boolean, Date
+from sqlalchemy import Column, Integer, BigInteger, Text, Float, DateTime, Boolean, Date, ForeignKey
 from sqlalchemy.ext.declarative import declarative_base
 
 Base = declarative_base()
@@ -163,3 +163,28 @@ class EnergyAssessment(Base):
     @staticmethod
     def empty_response():
         return {"epc": {}, "condition": {}}
+
+
+class EnergyAssessmentScenarios(Base):
+    __tablename__ = 'energy_assessment_scenarios'
+    id = Column(BigInteger, primary_key=True, autoincrement=True)
+    scenario_name = Column(Text, nullable=False)
+    energy_assessment_id = Column(BigInteger, ForeignKey('energy_assessments.id'), nullable=False)
+
+
+class EnergyAssessmentDocuments(Base):
+    __tablename__ = 'energy_assessment_documents'
+    id = Column(BigInteger, primary_key=True, autoincrement=True)
+    uprn = Column(BigInteger, nullable=False)
+    energy_assessment_id = Column(BigInteger, ForeignKey('energy_assessments.id'), nullable=False)
+    document_type = Column(Text, nullable=False)  # You can handle this using an enum if needed
+    document_location = Column(Text, nullable=False)
+    uploaded_at = Column(DateTime(timezone=True), nullable=False)
+    scenario_id = Column(BigInteger, ForeignKey('energy_assessment_scenarios.id'), nullable=True)
+
+    @staticmethod
+    def empty_response():
+        return {
+            "id": None, "uprn": None, "document_type": None, "document_location": None, "uploaded_at": None,
+            "scenario_id": None
+        }
diff --git a/backend/app/energy_assessments/router.py b/backend/app/energy_assessments/router.py
index 1e4b44f0..f0577c25 100644
--- a/backend/app/energy_assessments/router.py
+++ b/backend/app/energy_assessments/router.py
@@ -1,28 +1,24 @@
+import os
+from io import BytesIO
+
 from fastapi import APIRouter, Depends
 from starlette.responses import Response
 
-from backend.app.db.connection import db_engine
-
 from backend.app.config import get_settings
 from backend.app.dependencies import validate_token
 from backend.app.energy_assessments.schemas import EnergyAssessmentUploadPayload
 
+from sqlalchemy.orm import sessionmaker
 from sqlalchemy.exc import IntegrityError, OperationalError
-from sqlalchemy.orm import sessionmaker
-
-from backend.app.db.functions.energy_assessment_functions import bulk_insert_energy_assessments
-from sqlalchemy.orm import sessionmaker
 from backend.app.db.connection import db_engine
+from backend.app.db.functions.energy_assessment_functions import bulk_insert_energy_assessments
+
+from etl.xml_survey_extraction.XmlParser import XmlParser
+
 from utils.s3 import (
     read_from_s3, list_files_and_subfolders_in_s3_folder, list_xmls_in_s3_folder, save_csv_to_s3,
     list_files_in_s3_folder
 )
-from utils.logger import setup_logger
-from etl.xml_survey_extraction.XmlParser import XmlParser
-import os
-import pandas as pd
-from io import BytesIO
-
 from utils.logger import setup_logger
 
 logger = setup_logger()
@@ -81,7 +77,7 @@ async def upload(body: EnergyAssessmentUploadPayload):
             # We now split this into the different types of files
             # EPR
             eprs = [
-                file for file in energy_assessment_files if "EPR.pdf" in file.split("/")[-1].replace(" ", "").lower()
+                file for file in energy_assessment_files if "epr.pdf" in file.split("/")[-1].replace(" ", "").lower()
             ]
             # Condition report
             condition_reports = [
@@ -100,6 +96,31 @@ async def upload(body: EnergyAssessmentUploadPayload):
             # Floor plans - these are just the jpgs
             floor_plans = [file for file in energy_assessment_files if file.endswith(".jpg")]
 
+            # We now retrieve scenarios
+            scenario_folders = list_files_and_subfolders_in_s3_folder(
+                bucket_name=get_settings().ENERGY_ASSESSMENTS_BUCKET,
+                folder_name=assessment
+            )
+
+            # filter folders that contain the word scenario
+            scenario_folders = [
+                folder for folder in scenario_folders if "scenario" in folder.rstrip("/").split("/")[-1].lower()
+            ]
+            scenario_site_notes = []
+            scenario_draft_epcs = []
+            for sf in scenario_folders:
+                scenario_files = list_files_in_s3_folder(
+                    bucket_name=get_settings().ENERGY_ASSESSMENTS_BUCKET,
+                    folder_name=sf
+                )
+                notes = [
+                    file for file in scenario_files if "sitenotes" in file.split("/")[-1].replace(" ", "").lower()
+                ]
+                # This should be the leftovers
+                draft_epc = [file for file in scenario_files if file not in notes]
+                scenario_site_notes.extend(notes)
+                scenario_draft_epcs.extend(draft_epc)
+
             uprn = int(assessment.rstrip("/").split("/")[-1])
             assessments_map[uprn] = {
                 "xmls": uploaded_xmls,
@@ -107,26 +128,29 @@ async def upload(body: EnergyAssessmentUploadPayload):
                 "Condition Report": condition_reports,
                 "Evidence Report": evidence_reports,
                 "Summary Information": summary_reports,
-                "Floor PLan": floor_plans
+                "Floor Plan": floor_plans,
+                "Scenario Site Notes": scenario_site_notes,
+                "Scenario Draft EPC": scenario_draft_epcs
             }
 
         logger.info("Extracted energy assessment data and storing file locations to database")
         xml_data_to_store = []
         energy_assessment_documents = []
         for uprn, files in assessments_map.items():
-
             # Create the rows of data to insert into the energy assessment documents
             property_ea_docs = []
             for doc_type, doc_files in files.items():
                 if doc_type == "xmls":
                     continue
-                property_ea_docs.append(
-                    {
-                        "uprn": uprn,
-                        "document_type": doc_type,
-                        "document_location": doc_files
-                    }
-                )
+
+                for doc in doc_files:
+                    property_ea_docs.append(
+                        {
+                            "uprn": uprn,
+                            "document_type": doc_type,
+                            "document_location": doc
+                        }
+                    )
             energy_assessment_documents.extend(property_ea_docs)
 
             xmls = files["xmls"]
@@ -158,6 +182,11 @@ async def upload(body: EnergyAssessmentUploadPayload):
 
             xml_data_to_store.append(extracted_data)
 
+        logger.info("Storing energy assessment xml data to database")
+        bulk_insert_energy_assessments(session, xml_data_to_store)
+
+        # TODO: Store energy_assessment_documents
+
     except IntegrityError:
         logger.error("Database integrity error occurred", exc_info=True)
         session.rollback()

From 9bac1e713271933cd6a71f20c618b3325c01590a Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 4 Sep 2024 16:40:51 +0100
Subject: [PATCH 161/182] created basic sqlalchemy functions

---
 .../functions/energy_assessment_functions.py  | 78 ++++++++++++++++++-
 backend/app/energy_assessments/router.py      | 45 +++++++++--
 2 files changed, 114 insertions(+), 9 deletions(-)

diff --git a/backend/app/db/functions/energy_assessment_functions.py b/backend/app/db/functions/energy_assessment_functions.py
index b223d2f5..e810c168 100644
--- a/backend/app/db/functions/energy_assessment_functions.py
+++ b/backend/app/db/functions/energy_assessment_functions.py
@@ -1,7 +1,9 @@
-from backend.app.db.models.energy_assessments import EnergyAssessment
+from backend.app.db.models.energy_assessments import (
+    EnergyAssessment, EnergyAssessmentScenarios, EnergyAssessmentDocuments
+)
 from sqlalchemy.orm import Session
 from sqlalchemy.exc import IntegrityError
-from typing import Optional
+from typing import Optional, List
 from sqlalchemy import desc
 
 
@@ -60,3 +62,75 @@ def get_latest_assessment_by_uprn(session: Session, uprn: int) -> Optional[Energ
     except Exception as e:
         print(f"An error occurred: {e}")
         return None
+
+
+def create_energy_assessment_scenario(session: Session, data_list: List[dict], energy_assessment_id: int):
+    """
+    This function creates the necessary energy assessment scenarios if they don't already exist.
+
+    :param session: The SQLAlchemy session.
+    :param data_list: A list of dictionaries containing document data with scenario information.
+    :param energy_assessment_id: The ID of the energy assessment.
+    """
+    try:
+        # Extract unique scenario names from the data
+        scenario_names = {item['scenario_id'] for item in data_list if item['scenario_id'] is not None}
+
+        for scenario_name in scenario_names:
+            # Check if the scenario already exists in the database
+            existing_scenario = session.query(EnergyAssessmentScenarios).filter_by(scenario_name=scenario_name).first()
+
+            if not existing_scenario:
+                # Create a new scenario
+                new_scenario = EnergyAssessmentScenarios(
+                    scenario_name=scenario_name, energy_assessment_id=energy_assessment_id
+                )
+                session.add(new_scenario)
+
+        # Commit all scenario creations
+        session.commit()
+        print("Scenarios created successfully.")
+
+    except IntegrityError as e:
+        session.rollback()
+        print(f"Error occurred: {e}")
+
+
+def create_scenario_documents(session: Session, data_list: List[dict]):
+    """
+    This function creates documents in the energy_assessment_documents table, linking them to scenarios if applicable.
+    For usage in the energy assessment upload router
+
+    :param session: The SQLAlchemy session.
+    :param data_list: A list of dictionaries containing document data.
+    """
+    try:
+        for data in data_list:
+            scenario_name = data.get('scenario_id')
+
+            if scenario_name:
+                # Get the scenario ID from the scenario name
+                scenario = session.query(EnergyAssessmentScenarios).filter_by(scenario_name=scenario_name).first()
+
+                if scenario:
+                    data['scenario_id'] = scenario.id
+                else:
+                    print(f"Scenario '{scenario_name}' not found. Skipping document.")
+
+            # Create the new document
+            new_document = EnergyAssessmentDocuments(
+                uprn=data['uprn'],
+                document_type=data['document_type'],
+                document_location=data['document_location'],
+                scenario_id=data['scenario_id']  # Might be None
+            )
+
+            session.add(new_document)
+
+        # Commit all document insertions
+        session.commit()
+        print("Documents created successfully.")
+
+    except IntegrityError as e:
+        session.rollback()
+        print(f"Error occurred: {e}")
diff --git a/backend/app/energy_assessments/router.py b/backend/app/energy_assessments/router.py
index f0577c25..1c55f005 100644
--- a/backend/app/energy_assessments/router.py
+++ b/backend/app/energy_assessments/router.py
@@ -106,8 +106,7 @@ async def upload(body: EnergyAssessmentUploadPayload):
             scenario_folders = [
                 folder for folder in scenario_folders if "scenario" in folder.rstrip("/").split("/")[-1].lower()
             ]
-            scenario_site_notes = []
-            scenario_draft_epcs = []
+            scenario_documents = []
             for sf in scenario_folders:
                 scenario_files = list_files_in_s3_folder(
                     bucket_name=get_settings().ENERGY_ASSESSMENTS_BUCKET,
@@ -118,8 +117,13 @@ async def upload(body: EnergyAssessmentUploadPayload):
                 ]
                 # This should be the leftovers
                 draft_epc = [file for file in scenario_files if file not in notes]
-                scenario_site_notes.extend(notes)
-                scenario_draft_epcs.extend(draft_epc)
+                scenario_documents.append(
+                    {
+                        "identifier": sf.rstrip("/").split("/")[-1],
+                        "Scenario Site Notes": notes,
+                        "Scenario Draft EPC": draft_epc
+                    }
+                )
 
             uprn = int(assessment.rstrip("/").split("/")[-1])
             assessments_map[uprn] = {
@@ -129,8 +133,7 @@ async def upload(body: EnergyAssessmentUploadPayload):
                 "Evidence Report": evidence_reports,
                 "Summary Information": summary_reports,
                 "Floor Plan": floor_plans,
-                "Scenario Site Notes": scenario_site_notes,
-                "Scenario Draft EPC": scenario_draft_epcs
+                "scenario_documents": scenario_documents
             }
 
         logger.info("Extracted energy assessment data and storing file locations to database")
@@ -143,12 +146,40 @@ async def upload(body: EnergyAssessmentUploadPayload):
                 if doc_type == "xmls":
                     continue
 
+                if doc_type == "scenario_documents":
+                    for doc in doc_files:
+                        # This scenario id is put in as a placeholder means os associating the scenario documents with
+                        # the correct scenario
+                        scenario_id = doc["identifier"]
+                        for sn in doc["Scenario Site Notes"]:
+                            property_ea_docs.append(
+                                {
+                                    "uprn": uprn,
+                                    "document_type": "Scenario Site Notes",
+                                    "document_location": sn,
+                                    "scenario_id": scenario_id
+                                }
+                            )
+
+                        for d_epc in doc["Scenario Draft EPC"]:
+                            property_ea_docs.append(
+                                {
+                                    "uprn": uprn,
+                                    "document_type": "Scenario Draft EPC",
+                                    "document_location": d_epc,
+                                    "scenario_id": scenario_id
+                                }
+                            )
+
+                    continue
+
                 for doc in doc_files:
                     property_ea_docs.append(
                         {
                             "uprn": uprn,
                             "document_type": doc_type,
-                            "document_location": doc
+                            "document_location": doc,
+                            "scenario_id": None
                         }
                     )
             energy_assessment_documents.extend(property_ea_docs)

From 984abe72922d3f2a22b82c2aeb586534d4bcb35e Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Wed, 4 Sep 2024 19:39:31 +0100
Subject: [PATCH 162/182] Working upload of documents to backend

---
 .../functions/energy_assessment_functions.py  | 118 +++++++++++-------
 backend/app/db/models/energy_assessments.py   |  23 +++-
 backend/app/energy_assessments/router.py      |  39 +++++-
 3 files changed, 126 insertions(+), 54 deletions(-)

diff --git a/backend/app/db/functions/energy_assessment_functions.py b/backend/app/db/functions/energy_assessment_functions.py
index e810c168..ca2f721c 100644
--- a/backend/app/db/functions/energy_assessment_functions.py
+++ b/backend/app/db/functions/energy_assessment_functions.py
@@ -1,19 +1,26 @@
 from backend.app.db.models.energy_assessments import (
-    EnergyAssessment, EnergyAssessmentScenarios, EnergyAssessmentDocuments
+    EnergyAssessment, EnergyAssessmentScenarios, EnergyAssessmentDocuments, DocumentTypeEnum
 )
 from sqlalchemy.orm import Session
 from sqlalchemy.exc import IntegrityError
-from typing import Optional, List
+from typing import Optional, List, Dict
 from sqlalchemy import desc
+from utils.logger import setup_logger
+
+logger = setup_logger()
 
 
-def bulk_insert_energy_assessments(session: Session, data_list):
+def bulk_insert_energy_assessments(session: Session, data_list: List[dict]) -> Dict[int, int]:
     """
-    This function inserts or updates multiple energy assessment records into the database.
+    This function inserts or updates multiple energy assessment records into the database and returns a mapping of
+    uprn to energy_assessment_id.
 
     :param session: The SQLAlchemy session.
     :param data_list: A list of dictionaries containing energy assessment data.
+    :return: A dictionary mapping each uprn to its corresponding energy_assessment_id.
     """
+    uprn_to_assessment_id = {}
+
     try:
         for data in data_list:
             uprn = data.get('uprn')
@@ -30,19 +37,30 @@ def bulk_insert_energy_assessments(session: Session, data_list):
                 for key, value in data.items():
                     setattr(existing_record, key, value)
                 session.add(existing_record)
+
+                # Map the uprn to the existing record's ID
+                uprn_to_assessment_id[uprn] = existing_record.id
             else:
                 # Insert a new record
                 new_assessment = EnergyAssessment(**data)
                 session.add(new_assessment)
 
+                # Flush the session to get the newly created ID before commit
+                session.flush()
+
+                # Map the uprn to the new record's ID
+                uprn_to_assessment_id[uprn] = new_assessment.id
+
         # Commit the transaction
         session.commit()
-        print("All records inserted or updated successfully.")
+        logger.info("All records inserted or updated successfully.")
 
     except IntegrityError as e:
         # Rollback the session in case of error
         session.rollback()
-        print(f"Error occurred: {e}")
+        logger.info(f"Error occurred: {e}")
+
+    return uprn_to_assessment_id
 
 
 def get_latest_assessment_by_uprn(session: Session, uprn: int) -> Optional[EnergyAssessment]:
@@ -60,77 +78,81 @@ def get_latest_assessment_by_uprn(session: Session, uprn: int) -> Optional[Energ
 
         return latest_assessment.to_dict() if latest_assessment else EnergyAssessment.empty_response()
     except Exception as e:
-        print(f"An error occurred: {e}")
+        logger.info(f"An error occurred: {e}")
         return None
 
 
-def create_energy_assessment_scenario(session: Session, data_list: List[dict], energy_assessment_id: int):
+def create_scenarios_for_documents(session: Session, document_list: List[dict], uprn_to_assessment_id: dict):
     """
-    This function creates the necessary energy assessment scenarios if they don't already exist.
+    Creates scenarios for documents by UPRN and links them to the energy assessments.
 
     :param session: The SQLAlchemy session.
-    :param data_list: A list of dictionaries containing document data with scenario information.
-    :param energy_assessment_id: The ID of the energy assessment.
+    :param document_list: A list of dictionaries containing document data.
+    :param uprn_to_assessment_id: A dictionary mapping UPRN to energy_assessment_id.
     """
     try:
-        # Extract unique scenario names from the data
-        scenario_names = {item['scenario_id'] for item in data_list if item['scenario_id'] is not None}
+        for document in document_list:
+            uprn = document.get('uprn')
+            scenario_name = document.get('scenario_id')
 
-        for scenario_name in scenario_names:
-            # Check if the scenario already exists in the database
-            existing_scenario = session.query(EnergyAssessmentScenarios).filter_by(scenario_name=scenario_name).first()
+            if scenario_name:
+                # Get the associated energy_assessment_id for the UPRN
+                energy_assessment_id = uprn_to_assessment_id.get(uprn)
 
-            if not existing_scenario:
-                # Create a new scenario
-                new_scenario = EnergyAssessmentScenarios(
-                    scenario_name=scenario_name, energy_assessment_id=energy_assessment_id
-                )
-                session.add(new_scenario)
+                # Check if the scenario already exists
+                existing_scenario = session.query(EnergyAssessmentScenarios).filter_by(
+                    scenario_name=scenario_name,
+                    energy_assessment_id=energy_assessment_id
+                ).first()
 
-        # Commit all scenario creations
+                if not existing_scenario:
+                    # Create the scenario
+                    new_scenario = EnergyAssessmentScenarios(
+                        scenario_name=scenario_name,
+                        energy_assessment_id=energy_assessment_id
+                    )
+                    session.add(new_scenario)
+                    session.flush()  # Get the new scenario ID
+
+                    # Update document with new scenario ID
+                    document['scenario_id'] = new_scenario.id
+                else:
+                    # If the scenario already exists, just use its ID
+                    document['scenario_id'] = existing_scenario.id
+
+        # Commit the scenarios
         session.commit()
-        print("Scenarios created successfully.")
+        logger.info("Scenarios created successfully.")
 
     except IntegrityError as e:
         session.rollback()
-        print(f"Error occurred: {e}")
+        logger.info(f"Error occurred: {e}")
 
 
-def create_scenario_documents(session: Session, data_list: List[dict]):
+def create_documents(session: Session, document_list: List[dict]):
     """
-    This function creates documents in the energy_assessment_documents table, linking them to scenarios if applicable.
-    For usage in the energy assessment upload router
+    Inserts documents into the energy_assessment_documents table, linking them to scenarios and assessments.
 
     :param session: The SQLAlchemy session.
-    :param data_list: A list of dictionaries containing document data.
+    :param document_list: A list of dictionaries containing document data.
     """
     try:
-        for data in data_list:
-            scenario_name = data.get('scenario_id')
-
-            if scenario_name:
-                # Get the scenario ID from the scenario name
-                scenario = session.query(EnergyAssessmentScenarios).filter_by(scenario_name=scenario_name).first()
-
-                if scenario:
-                    data['scenario_id'] = scenario.id
-                else:
-                    print(f"Scenario '{scenario_name}' not found. Skipping document.")
-
-            # Create the new document
+        for document in document_list:
+            # Ensure the document_type is cast to Enum
             new_document = EnergyAssessmentDocuments(
-                uprn=data['uprn'],
-                document_type=data['document_type'],
-                document_location=data['document_location'],
-                scenario_id=data['scenario_id']  # Might be None
+                uprn=document['uprn'],
+                document_type=DocumentTypeEnum(document['document_type']).value,
+                document_location=document['document_location'],
+                energy_assessment_id=document['energy_assessment_id'],
+                scenario_id=document.get('scenario_id')  # Might be None if no scenario
             )
 
             session.add(new_document)
 
         # Commit all document insertions
         session.commit()
-        print("Documents created successfully.")
+        logger.info("Documents created successfully.")
 
     except IntegrityError as e:
         session.rollback()
-        print(f"Error occurred: {e}")
+        logger.info(f"Error occurred: {e}")
diff --git a/backend/app/db/models/energy_assessments.py b/backend/app/db/models/energy_assessments.py
index a5667a0a..46912c9b 100644
--- a/backend/app/db/models/energy_assessments.py
+++ b/backend/app/db/models/energy_assessments.py
@@ -1,5 +1,8 @@
 from sqlalchemy import Column, Integer, BigInteger, Text, Float, DateTime, Boolean, Date, ForeignKey
 from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.dialects.postgresql import ENUM as PgEnum
+import enum
+from datetime import datetime
 
 Base = declarative_base()
 
@@ -172,19 +175,33 @@ class EnergyAssessmentScenarios(Base):
     energy_assessment_id = Column(BigInteger, ForeignKey('energy_assessments.id'), nullable=False)
 
 
+class DocumentTypeEnum(enum.Enum):
+    EPR = "EPR"
+    ConditionReport = "Condition Report"
+    EvidenceReport = "Evidence Report"
+    SummaryInformation = "Summary Information"
+    FloorPlan = "Floor Plan"
+    ScenarioDraftEPC = "Scenario Draft EPC"
+    ScenarioSiteNotes = "Scenario Site Notes"
+
+
 class EnergyAssessmentDocuments(Base):
     __tablename__ = 'energy_assessment_documents'
     id = Column(BigInteger, primary_key=True, autoincrement=True)
     uprn = Column(BigInteger, nullable=False)
     energy_assessment_id = Column(BigInteger, ForeignKey('energy_assessments.id'), nullable=False)
-    document_type = Column(Text, nullable=False)  # You can handle this using an enum if needed
+    document_type = Column(PgEnum(DocumentTypeEnum, name="document_type", create_type=False), nullable=False)
     document_location = Column(Text, nullable=False)
-    uploaded_at = Column(DateTime(timezone=True), nullable=False)
+    uploaded_at = Column(DateTime(timezone=True), nullable=False, default=datetime.utcnow)
     scenario_id = Column(BigInteger, ForeignKey('energy_assessment_scenarios.id'), nullable=True)
 
     @staticmethod
     def empty_response():
         return {
-            "id": None, "uprn": None, "document_type": None, "document_location": None, "uploaded_at": None,
+            "id": None,
+            "uprn": None,
+            "document_type": None,
+            "document_location": None,
+            "uploaded_at": None,
             "scenario_id": None
         }
diff --git a/backend/app/energy_assessments/router.py b/backend/app/energy_assessments/router.py
index 1c55f005..0f5fcf1b 100644
--- a/backend/app/energy_assessments/router.py
+++ b/backend/app/energy_assessments/router.py
@@ -1,5 +1,6 @@
 import os
 from io import BytesIO
+from typing import List
 
 from fastapi import APIRouter, Depends
 from starlette.responses import Response
@@ -11,7 +12,9 @@ from backend.app.energy_assessments.schemas import EnergyAssessmentUploadPayload
 from sqlalchemy.orm import sessionmaker
 from sqlalchemy.exc import IntegrityError, OperationalError
 from backend.app.db.connection import db_engine
-from backend.app.db.functions.energy_assessment_functions import bulk_insert_energy_assessments
+from backend.app.db.functions.energy_assessment_functions import (
+    bulk_insert_energy_assessments, create_scenarios_for_documents, create_documents
+)
 
 from etl.xml_survey_extraction.XmlParser import XmlParser
 
@@ -23,6 +26,29 @@ from utils.logger import setup_logger
 
 logger = setup_logger()
 
+
+def insert_energy_assessment_documents(document_list: List[dict], uprn_to_assessment_id: dict):
+    """
+    Inserts or updates energy assessment documents, assigning the correct energy_assessment_id.
+
+    :param document_list: A list of dictionaries containing document data.
+    :param uprn_to_assessment_id: A dictionary mapping UPRN to energy_assessment_id.
+    """
+    for document in document_list:
+        uprn = document['uprn']
+        # Assign the energy_assessment_id based on uprn
+        energy_assessment_id = uprn_to_assessment_id.get(uprn)
+
+        if not energy_assessment_id:
+            logger.info(f"No energy_assessment_id found for UPRN: {uprn}. Skipping document.")
+            continue
+
+        # Attach energy_assessment_id to each document
+        document['energy_assessment_id'] = energy_assessment_id
+
+    logger.info("Energy Assessment IDs assigned to documents.")
+
+
 router = APIRouter(
     prefix="/energy-assessments",
     tags=["energy-assessments"],
@@ -214,9 +240,16 @@ async def upload(body: EnergyAssessmentUploadPayload):
             xml_data_to_store.append(extracted_data)
 
         logger.info("Storing energy assessment xml data to database")
-        bulk_insert_energy_assessments(session, xml_data_to_store)
+        uprn_to_assessment_id = bulk_insert_energy_assessments(session, xml_data_to_store)
 
-        # TODO: Store energy_assessment_documents
+        # Insert energy assessment id into the documents data
+        insert_energy_assessment_documents(energy_assessment_documents, uprn_to_assessment_id)
+
+        create_scenarios_for_documents(session, energy_assessment_documents, uprn_to_assessment_id)
+
+        create_documents(session, energy_assessment_documents)
+
+        session.close()
 
     except IntegrityError:
         logger.error("Database integrity error occurred", exc_info=True)

From 85eaeccad8dd53ff055419596ecac68d01ec346b Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 5 Sep 2024 12:16:25 +0100
Subject: [PATCH 163/182] adding measure map so we can specific certain
 measures

---
 backend/app/plan/router.py             |  11 +-
 backend/app/plan/schemas.py            |  79 +++++++---
 etl/customers/bcc_tender/app.py        |  25 ++++
 etl/customers/vectis/outputs.py        | 196 +++++++++++++++++++++++++
 recommendations/Recommendations.py     |  57 +++++--
 recommendations/WallRecommendations.py |  15 +-
 6 files changed, 341 insertions(+), 42 deletions(-)
 create mode 100644 etl/customers/vectis/outputs.py

diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index e773e303..929ce7fa 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -620,6 +620,13 @@ async def trigger_plan(body: PlanTriggerRequest):
         if individual_units:
             # Model the solar potential at the property level
             for unit in tqdm(individual_units):
+
+                # TODO: Tidy up this code
+                # We don't need to do this if we have global inclusions that don't include solar
+                if body.inclusions:
+                    if "solar_pv" not in body.inclusions:
+                        continue
+
                 property_instance = [p for p in input_properties if p.id == unit["property_id"]][0]
                 # At this level, we check if the property is suitable for solar and if now, skip
                 if not property_instance.is_solar_pv_valid():
@@ -668,7 +675,9 @@ async def trigger_plan(body: PlanTriggerRequest):
         recommendations_scoring_data = []
         representative_recommendations = {}
         for p in tqdm(input_properties):
-            recommender = Recommendations(property_instance=p, materials=materials, exclusions=body.exclusions)
+            recommender = Recommendations(
+                property_instance=p, materials=materials, exclusions=body.exclusions, inclusions=body.inclusions
+            )
             property_recommendations, property_representative_recommendations = recommender.recommend()
 
             if not property_recommendations:
diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py
index 04a1eb89..5487caad 100644
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@@ -1,6 +1,53 @@
 from pydantic import BaseModel, conlist, validator
 from typing import Optional
 
+TYPICAL_MEASURE_TYPES = [
+    "wall_insulation",
+    "roof_insulation",
+    "ventilation",
+    "floor_insulation",
+    "windows",
+    "fireplace",
+    "heating",
+    "hot_water",
+    "low_energy_lighting",
+    "secondary_heating",
+    "solar_pv"
+]
+
+SPECIFIC_MEASURES = [
+    # Specific measures
+    # Walls
+    "internal_wall_insulation",
+    "external_wall_insulation",
+    "cavity_wall_insulation"
+    # Roof
+    "loft_insulation",
+    "flat_roof_insulation",
+    "room_roof_insulation",
+    # Floor
+    "suspended_floor_insulation",
+    "solid_floor_insulation",
+    # Heating
+    "boiler_upgrade",
+    "high_heat_retention_storage_heater",
+    "air_source_heat_pump",
+
+    # Specific measures that will typically come from an energy assessment
+    "trickle_vents",
+    "draught_proofing",
+    "mixed_glazing",  # This covers partial double glazing and secondary glazing
+]
+
+# This allows us to extend high level categories for measures such as "wall_insulation" to the specific measures
+# such as "external_wall_insulation", "internal_wall_insulation", "cavity_wall_insulation"
+MEASURE_MAP = {
+    "wall_insulation": ["internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation"],
+    "roof_insulation": ["loft_insulation", "flat_roof_insulation", "room_roof_insulation"],
+    "floor_insulation": ["suspended_floor_insulation", "solid_floor_insulation"],
+    "heating": ["boiler_upgrade", "high_heat_retention_storage_heater", "air_source_heat_pump"],
+}
+
 
 class PlanTriggerRequest(BaseModel):
     budget: Optional[float] = None
@@ -13,33 +60,13 @@ class PlanTriggerRequest(BaseModel):
     patches_file_path: Optional[str] = None
     non_invasive_recommendations_file_path: Optional[str] = None
     exclusions: Optional[conlist(str, min_items=1)] = None
+    inclusions: Optional[conlist(str, min_items=1)] = None
+
     scenario_name: Optional[str] = ""
     # If true, will allow us to create multiple plans for the same portfolio, whereas if this is false, if this property
     # exists in the portfolio, it will be ignored
     multi_plan: Optional[bool] = False
 
-    # Pre-defined list of possibilities for exclusions
-    _allowed_exclusions = {
-        # Measure classes
-        "wall_insulation",
-        "ventilation",
-        "roof_insulation",
-        "floor_insulation",
-        "windows",
-        "fireplace",
-        "heating",
-        "hot_water",
-        "lighting",
-        "solar_pv",
-        # Specific measures
-        "air_source_heat_pump",
-        "internal_wall_insulation",
-        "external_wall_insulation",
-        "secondary_heating",
-        "boiler_upgrade",
-        "high_heat_retention_storage_heater",
-    }
-
     _allowed_goals = {"Increasing EPC"}
 
     _allowed_housing_types = {"Social", "Private"}
@@ -47,10 +74,16 @@ class PlanTriggerRequest(BaseModel):
     # Validator to ensure exclusions are within the pre-defined possibilities
     @validator('exclusions', each_item=True)
     def check_exclusions(cls, v):
-        if v not in cls._allowed_exclusions:
+        if v not in TYPICAL_MEASURE_TYPES + SPECIFIC_MEASURES:
             raise ValueError(f"{v} is not an allowed exclusion")
         return v
 
+    @validator('inclusions', each_item=True)
+    def check_inclusions(cls, v):
+        if v not in TYPICAL_MEASURE_TYPES + SPECIFIC_MEASURES:
+            raise ValueError(f"{v} is not an allowed inclusion")
+        return v
+
     # Validator to ensure that the goal is within the pre-defined possibilities
     @validator('goal')
     def check_goal(cls, v):
diff --git a/etl/customers/bcc_tender/app.py b/etl/customers/bcc_tender/app.py
index 281cf864..8cdc6e13 100644
--- a/etl/customers/bcc_tender/app.py
+++ b/etl/customers/bcc_tender/app.py
@@ -95,6 +95,31 @@ epc_data["eligibility_type"] = np.where(
     epc_data["eligibility_type"]
 )
 
+# Example EPCS to analysis
+analysis_epcs = epc_data[~pd.isnull(epc_data["eligibility_type"])].copy()
+# Keep just columns we need
+analysis_epcs = analysis_epcs[
+    [
+        "UPRN", "TENURE", "CURRENT_ENERGY_RATING", "WALLS_DESCRIPTION", "ROOF_DESCRIPTION",
+        "CONSTRUCTION_AGE_BAND", "TOTAL_FLOOR_AREA", "PROPERTY_TYPE", "BUILT_FORM", "MAINHEAT_DESCRIPTION",
+        "eligibility_type",
+    ]
+]
+analysis_epcs["grouped_epc_band"] = np.where(
+    analysis_epcs["CURRENT_ENERGY_RATING"].isin(["D"]),
+    "EPC D",
+    "EPC E-G"
+)
+analysis_epcs.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/bcc tender/analysis_epcs.csv", index=False)
+
+# Create aggregations and we store this information
+agg_cols = ["CURRENT_ENERGY_RATING", "CONSTRUCTION_AGE_BAND", "PROPERTY_TYPE", "BUILT_FORM", "grouped_epc_band"]
+agg_cols = ["WALLS_DESCRIPTION", "ROOF_DESCRIPTION", "MAINHEAT_DESCRIPTION"]
+for col in agg_cols:
+    agg_df = analysis_epcs.groupby([col]).size().reset_index(name="Number of Properties")
+    agg_df["Percentage of Properties"] = 100 * agg_df["Number of Properties"] / agg_df["Number of Properties"].sum()
+    agg_df.to_csv(f"/Users/khalimconn-kowlessar/Documents/hestia/Customers/bcc tender/{col}.csv", index=False)
+
 # Eligibiilty 6: GBIS General Eligibility, Social - tenure is social rented and EPC rating D-G, but also the property
 # should be rented out below market rate
 # This is a subset of Eligibility 3 - we likely don't need to do any scaling
diff --git a/etl/customers/vectis/outputs.py b/etl/customers/vectis/outputs.py
new file mode 100644
index 00000000..c6d0905f
--- /dev/null
+++ b/etl/customers/vectis/outputs.py
@@ -0,0 +1,196 @@
+import pandas as pd
+from utils.s3 import save_csv_to_s3
+
+
+def app():
+    # This is the payload to be used to extract the energy assessment data from s3 and upload it to the database,
+    # as well as produce links to each of the uploaded documents.
+
+    portfolio_id = 101
+
+    body = {
+        "portfolio_id": portfolio_id,
+        "surveyor": "JAFFERSONS ENERGY CONSULTANTS",
+        "project_code": "VEC001",
+    }
+
+    # These are the recommendations based on the on-site survey of the property.
+    non_intrusive_recommendations = [
+        {
+            # 2 Grove Mansions
+            "uprn": 121016121,
+            "recommendations": [
+                {
+                    "type": "draught_proofing",
+                    "cost": 123,
+                    "survey": True,
+                    "sap_points": 1
+                },
+                {
+                    "type": "mixed_glazing", "cost": 12345, "survey": True,
+                    "description": "Install double glazing to north facing windows and secondary glazing to the "
+                                   "remaining windows at the front of the building",
+                    "sap_points": 3
+                },
+                {"type": "trickle_vents", "cost": 500, "survey": True},
+                {"type": "suspended_floor_insulation", "cost": None, "survey": True, "sap_points": 2},
+                {"type": "internal_wall_insulation", "cost": None, "survey": True, "sap_points": 5},
+            ]
+        },
+        {
+            # 8 Grove Mansions
+            "uprn": 10024087855,
+            "recommendations": [
+                {"type": "draught_proofing", "cost": 123, "survey": True, "sap_points": 2},
+                {
+                    "type": "mixed_glazing", "cost": 12345, "survey": True,
+                    "description": "Install double glazing to north facing windows and secondary glazing to the "
+                                   "remaining windows at the front of the building",
+                    "sap_points": 4
+                },
+                {"type": "trickle_vents", "cost": 500, "survey": True},
+                {"type": "low_energy_lighting", "cost": None, "survey": True, "sap_points": 0},
+                {"type": "internal_wall_insulation", "cost": None, "survey": True, 'sap_points': 5},
+            ]
+        },
+        {
+            # 9 Grove Mansions
+            "uprn": 121016128,
+            "recommendations": [
+                {"type": "draught_proofing", "cost": 123, "survey": True, "sap_points": 1},
+                {
+                    "type": "mixed_glazing", "cost": 12345, "survey": True,
+                    "description": "Install double glazing to north facing windows and secondary glazing to the "
+                                   "remaining windows at the front of the building",
+                    "sap_points": 3
+                },
+                {"type": "trickle_vents", "cost": 500, "survey": True},
+                {"type": "low_energy_lighting", "cost": None, "survey": True, "sap_points": 1},
+                {"type": "suspended_floor_insulation", "cost": None, "sap_points": 1},
+                {"type": "internal_wall_insulation", "cost": None, "survey": True, "sap_points": 6},
+            ]
+        },
+        {
+            # 5 Grove Mansions
+            "uprn": 121016124,
+            "recommendations": [
+                {
+                    "type": "mixed_glazing", "cost": 12345, "survey": True,
+                    "description": "Install double glazing to north facing windows and secondary glazing to the "
+                                   "remaining windows at the front of the building",
+                    "sap_points": 5
+                },
+                {"type": "trickle_vents", "cost": 500, "survey": True},
+                {"type": "low_energy_lighting", "cost": None, "survey": True, "sap_points": 2},
+                {"type": "internal_wall_insulation", "cost": None, "survey": True, "sap_points": 8},
+            ]
+        },
+        {
+            # 14 Grove Mansions
+            "uprn": 121016117,
+            "recommendations": [
+                {"type": "draught_proofing", "cost": 123, "survey": True, "sap_points": 1},
+                {
+                    "type": "mixed_glazing", "cost": 12345, "survey": True,
+                    "description": "Install double glazing to north facing windows and secondary glazing to the "
+                                   "remaining windows at the front of the building",
+                    "sap_points": 4
+                },
+                {"type": "trickle_vents", "cost": 500, "survey": True},
+                {"type": "low_energy_lighting", "cost": None, "survey": True, "sap_points": 1},
+                {"type": "internal_wall_insulation", "cost": None, "survey": True, "sap_points": 6},
+            ]
+        },
+        {
+            # 19 Grove Mansions
+            "uprn": 10024087902,
+            "recommendations": [
+                {"type": "low_energy_lighting", "cost": None, "survey": True, "sap_points": 0},
+                {"type": "internal_wall_insulation", "cost": None, "survey": True, "sap_points": 2},
+                {"type": "room_roof_insulation", "cost": None, "survey": True, "sap_points": 16},
+            ]
+        },
+    ]
+
+    asset_list = [
+        {
+            "uprn": 121016121, "address": "", "postcode": ""
+        },
+        {
+            "uprn": 10024087855, "address": "", "postcode": ""
+        },
+        {
+            "uprn": 121016128, "address": "", "postcode": ""
+        },
+        {
+            "uprn": 121016124, "address": "", "postcode": ""
+        },
+        {
+            "uprn": 121016117, "address": "", "postcode": ""
+        },
+        {
+            "uprn": 10024087902, "address": "", "postcode": ""
+        },
+    ]
+    asset_list = pd.DataFrame(asset_list)
+
+    filename = f"{8}/{portfolio_id}/asset_list.csv"
+    save_csv_to_s3(
+        dataframe=asset_list,
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=filename
+    )
+
+    # TODO Create asset list
+    # TODO: Store asset list & non_intrusive_recommendations
+    # Store non-invasive recommendations in S3
+    non_invasive_recommendations_filename = f"{8}/{portfolio_id}/non_invasive_recommendations.json"
+    save_csv_to_s3(
+        dataframe=pd.DataFrame(non_intrusive_recommendations),
+        bucket_name="retrofit-plan-inputs-dev",
+        file_name=non_invasive_recommendations_filename
+    )
+
+    # This is the first scenario which includes the first batch of recommendations
+    body1 = {
+        "portfolio_id": str(portfolio_id),
+        "housing_type": "Private",
+        "goal": "Increasing EPC",
+        "goal_value": "A",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+        "inclusions": [
+            "draught_proofing", "mixed_glazing", "trickle_vents", "low_energy_lighting",
+        ],
+        "budget": None,
+        "scenario_name": "Quick wins - do now while tenanted",
+        "multi_plan": True,
+    }
+
+    # This is the second scenario which includes the second batch of recommendations
+    body2 = {
+        "portfolio_id": str(portfolio_id),
+        "housing_type": "Private",
+        "goal": "Increasing EPC",
+        "goal_value": "A",
+        "trigger_file_path": filename,
+        "already_installed_file_path": "",
+        "patches_file_path": "",
+        "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+        "inclusions": [
+            "draught_proofing",
+            "mixed_glazing",
+            "trickle_vents",
+            "low_energy_lighting",
+            "suspended_floor_insulation",
+            "internal_wall_insulation"
+        ],
+        "budget": None,
+        "scenario_name": "Do when void",
+        "multi_plan": True,
+    }
+
+    print(body1)
+    print(body2)
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 4f75b30b..a5b9d454 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -17,6 +17,7 @@ from recommendations.SecondaryHeating import SecondaryHeating
 from backend.ml_models.AnnualBillSavings import AnnualBillSavings
 from backend.apis.GoogleSolarApi import GoogleSolarApi
 import backend.app.assumptions as assumptions
+from backend.app.plan.schemas import TYPICAL_MEASURE_TYPES, SPECIFIC_MEASURES, MEASURE_MAP
 
 ASHP_COP = 3
 STARTING_DUMMY_ID_VALUE = -9999
@@ -32,15 +33,24 @@ class Recommendations:
         property_instance: Property,
         materials: List,
         exclusions: List[str] = None,
+        inclusions: List[str] = None,
     ):
         """
         :param property_instance: Instance of the Property class, for the home associated to property_id
         :param materials: List of materials to be used in the recommendations
+        :param exclusions: List of specific measures or measure types to exclude from recommendations. Defaulted to
+                            None, meaning no exclusions to be applied
+        :param inclusions: List of specific measures of measure types to include. Defaulted to None, meaning all
+                           measures are included
         """
 
         self.property_instance = property_instance
         self.materials = materials
         self.exclusions = exclusions if exclusions else []
+        self.inclusions = inclusions if inclusions else []
+
+        self.all_typical_measures = TYPICAL_MEASURE_TYPES
+        self.all_specific_measures = SPECIFIC_MEASURES
 
         self.floor_recommender = FloorRecommendations(property_instance=property_instance, materials=materials)
         self.wall_recomender = WallRecommendations(property_instance=property_instance, materials=materials)
@@ -56,6 +66,24 @@ class Recommendations:
         self.hotwater_recommender = HotwaterRecommendations(property_instance=property_instance)
         self.secondary_heating_recommender = SecondaryHeating(property_instance=property_instance)
 
+    def find_included_measures(self):
+        """
+        Determines the set of measures to be included in recommendations
+        """
+
+        inclusions_full = [MEASURE_MAP[x] if x in MEASURE_MAP else x for x in self.inclusions]
+        exclusions_full = [MEASURE_MAP[x] if x in MEASURE_MAP else x for x in self.exclusions]
+
+        if inclusions_full and exclusions_full:
+            # All typical measures
+            return self.all_specific_measures
+
+        if inclusions_full:
+            return inclusions_full
+
+        if exclusions_full:
+            return [m for m in self.all_specific_measures if m not in exclusions_full]
+
     def recommend(self):
 
         """
@@ -68,15 +96,20 @@ class Recommendations:
 
         property_recommendations = []
         phase = 0
+        measures = self.find_included_measures()
 
         # Building Fabric
-        if "wall_insulation" not in self.exclusions:
-            self.wall_recomender.recommend(phase=phase, exclusions=self.exclusions)
+        if (
+            ("wall_insulation" in measures) or
+            ("internal_wall_insulation" in measures) or
+            ("external_wall_insulation" in measures)
+        ):
+            self.wall_recomender.recommend(phase=phase, measures=measures)
             if self.wall_recomender.recommendations:
                 property_recommendations.append(self.wall_recomender.recommendations)
                 phase += 1
 
-        if "roof_insulation" not in self.exclusions:
+        if "roof_insulation" in measures:
             self.roof_recommender.recommend(phase=phase)
             if self.roof_recommender.recommendations:
                 property_recommendations.append(self.roof_recommender.recommendations)
@@ -90,32 +123,32 @@ class Recommendations:
         # real impact on the SAP score. Therefore, we don't need to include phasing for ventilation. If we
         # have any
         # wall or roof recommendations, we will ensure that ventilation is included in the simulation
-        if "ventilation" not in self.exclusions:
+        if "ventilation" in measures:
             if self.wall_recomender.recommendations or self.roof_recommender.recommendations:
                 self.ventilation_recomender.recommend()
                 if self.ventilation_recomender.recommendation:
                     property_recommendations.append(self.ventilation_recomender.recommendation)
 
-        if "floor_insulation" not in self.exclusions:
+        if "floor_insulation" in measures:
             self.floor_recommender.recommend(phase=phase)
             if self.floor_recommender.recommendations:
                 property_recommendations.append(self.floor_recommender.recommendations)
                 phase += 1
 
-        if "windows" not in self.exclusions:
+        if "windows" in measures:
             self.windows_recommender.recommend(phase=phase)
             if self.windows_recommender.recommendation:
                 property_recommendations.append(self.windows_recommender.recommendation)
                 phase += 1
 
-        if "fireplace" not in self.exclusions:
+        if "fireplace" in measures:
             self.fireplace_recommender.recommend(phase=phase)
             if self.fireplace_recommender.recommendation:
                 property_recommendations.append(self.fireplace_recommender.recommendation)
                 phase += 1
 
         # Heating and Electical systems
-        if "heating" not in self.exclusions:
+        if "heating" in measures:
 
             cavity_or_loft_recommendations = [
                 r for r in self.wall_recomender.recommendations + self.roof_recommender.recommendations
@@ -167,26 +200,26 @@ class Recommendations:
                 phase += amount_to_increment
 
         # Hot water
-        if "hot_water" not in self.exclusions:
+        if "hot_water" in measures:
             self.hotwater_recommender.recommend(phase=phase)
             if self.hotwater_recommender.recommendations:
                 property_recommendations.append(self.hotwater_recommender.recommendations)
                 phase += 1
 
-        if "lighting" not in self.exclusions:
+        if "low_energy_lighting" in measures:
             self.lighting_recommender.recommend(phase=phase)
             if self.lighting_recommender.recommendation:
                 property_recommendations.append(self.lighting_recommender.recommendation)
                 phase += 1
 
-        if "secondary_heating" not in self.exclusions:
+        if "secondary_heating" in measures:
             self.secondary_heating_recommender.recommend(phase=phase)
             if self.secondary_heating_recommender.recommendation:
                 property_recommendations.append(self.secondary_heating_recommender.recommendation)
                 phase += 1
 
         # Renewables
-        if "solar_pv" not in self.exclusions:
+        if "solar_pv" in measures:
             self.solar_recommender.recommend(phase=phase)
             if self.solar_recommender.recommendation:
                 property_recommendations.append(self.solar_recommender.recommendation)
diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py
index b73f187c..43727517 100644
--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@@ -190,7 +190,7 @@ class WallRecommendations(Definitions):
 
         return ewi_recommendations
 
-    def recommend(self, phase=0, exclusions=None):
+    def recommend(self, phase=0, measures=None):
         # if building built after 1990 + we're able to identify U-value +
         # U-value less than 0.18 and if in or close to a conversation area,
         # recommend internal wall insulation as a possible measure
@@ -268,7 +268,7 @@ class WallRecommendations(Definitions):
 
         # Remaining wall types are treated with IWI or EWI
         if (u_value >= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE) and self.is_suitable_for_solid_insulation():
-            self.find_insulation(u_value, phase, exclusions=exclusions)
+            self.find_insulation(u_value, phase, measures=measures)
             return
 
         # If the u-value is within regulations, we don't do anything
@@ -558,7 +558,7 @@ class WallRecommendations(Definitions):
 
         return recommendations
 
-    def find_insulation(self, u_value, phase, exclusions=None):
+    def find_insulation(self, u_value, phase, measures=None):
         """
         This function contains the logic for finding potential insulation measures for a property, depending
         on the parts available and whether the property can have external wall insulation installed
@@ -570,10 +570,13 @@ class WallRecommendations(Definitions):
         # we separate the logic for for recommending them, therefore we don't
         # consider diminishing returns between the two as they are considered to be separate measures
 
-        exclusions = [] if exclusions is None else exclusions
+        if measures is None:
+            ewi_valid = self.ewi_valid()
+        else:
+            ewi_valid = self.ewi_valid() and "external_wall_insulation" in measures
 
         ewi_recommendations = []
-        if self.ewi_valid() and "external_wall_insulation" not in exclusions:
+        if ewi_valid:
             ewi_recommendations = self._find_insulation(
                 u_value=u_value,
                 insulation_materials=pd.DataFrame(
@@ -584,7 +587,7 @@ class WallRecommendations(Definitions):
             )
 
         iwi_recommendations = []
-        if "internal_wall_insulation" not in exclusions:
+        if "internal_wall_insulation" in measures:
             iwi_recommendations = self._find_insulation(
                 u_value=u_value,
                 insulation_materials=pd.DataFrame(self.internal_wall_insulation_materials),

From efbcd0f0b8a2ac357a04ffe621694d63bdca9481 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 5 Sep 2024 13:36:27 +0100
Subject: [PATCH 164/182] adding measures into individual measures classes wip

---
 backend/app/plan/schemas.py                   |  5 ++-
 recommendations/Recommendations.py            | 35 +++++++++----------
 recommendations/RoofRecommendations.py        | 17 +++++----
 recommendations/VentilationRecommendations.py |  5 ++-
 recommendations/WallRecommendations.py        | 18 +++++-----
 5 files changed, 43 insertions(+), 37 deletions(-)

diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py
index 5487caad..2968babf 100644
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@@ -37,12 +37,15 @@ SPECIFIC_MEASURES = [
     "trickle_vents",
     "draught_proofing",
     "mixed_glazing",  # This covers partial double glazing and secondary glazing
+    "cavity_extract_and_refill",
 ]
 
 # This allows us to extend high level categories for measures such as "wall_insulation" to the specific measures
 # such as "external_wall_insulation", "internal_wall_insulation", "cavity_wall_insulation"
 MEASURE_MAP = {
-    "wall_insulation": ["internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation"],
+    "wall_insulation": [
+        "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation", "cavity_extract_and_refill"
+    ],
     "roof_insulation": ["loft_insulation", "flat_roof_insulation", "room_roof_insulation"],
     "floor_insulation": ["suspended_floor_insulation", "solid_floor_insulation"],
     "heating": ["boiler_upgrade", "high_heat_retention_storage_heater", "air_source_heat_pump"],
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index a5b9d454..34586e01 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -71,6 +71,9 @@ class Recommendations:
         Determines the set of measures to be included in recommendations
         """
 
+        if self.property_instance.non_invasive_recommendations:
+            raise Exception("IMPLEMENT ME")
+
         inclusions_full = [MEASURE_MAP[x] if x in MEASURE_MAP else x for x in self.inclusions]
         exclusions_full = [MEASURE_MAP[x] if x in MEASURE_MAP else x for x in self.exclusions]
 
@@ -96,24 +99,19 @@ class Recommendations:
 
         property_recommendations = []
         phase = 0
+        # TODO: We should form measures form non-intrusive recommendations too
         measures = self.find_included_measures()
 
         # Building Fabric
-        if (
-            ("wall_insulation" in measures) or
-            ("internal_wall_insulation" in measures) or
-            ("external_wall_insulation" in measures)
-        ):
-            self.wall_recomender.recommend(phase=phase, measures=measures)
-            if self.wall_recomender.recommendations:
-                property_recommendations.append(self.wall_recomender.recommendations)
-                phase += 1
+        self.wall_recomender.recommend(phase=phase, measures=measures)
+        if self.wall_recomender.recommendations:
+            property_recommendations.append(self.wall_recomender.recommendations)
+            phase += 1
 
-        if "roof_insulation" in measures:
-            self.roof_recommender.recommend(phase=phase)
-            if self.roof_recommender.recommendations:
-                property_recommendations.append(self.roof_recommender.recommendations)
-                phase += 1
+        self.roof_recommender.recommend(phase=phase, measures=measures)
+        if self.roof_recommender.recommendations:
+            property_recommendations.append(self.roof_recommender.recommendations)
+            phase += 1
 
         # Ventilation recommendations
         # We only produce a ventilation recommendation if the property is recommended to have wall or roof
@@ -123,11 +121,10 @@ class Recommendations:
         # real impact on the SAP score. Therefore, we don't need to include phasing for ventilation. If we
         # have any
         # wall or roof recommendations, we will ensure that ventilation is included in the simulation
-        if "ventilation" in measures:
-            if self.wall_recomender.recommendations or self.roof_recommender.recommendations:
-                self.ventilation_recomender.recommend()
-                if self.ventilation_recomender.recommendation:
-                    property_recommendations.append(self.ventilation_recomender.recommendation)
+        if self.wall_recomender.recommendations or self.roof_recommender.recommendations:
+            self.ventilation_recomender.recommend(measures=measures)
+            if self.ventilation_recomender.recommendation:
+                property_recommendations.append(self.ventilation_recomender.recommendation)
 
         if "floor_insulation" in measures:
             self.floor_recommender.recommend(phase=phase)
diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py
index 5075928e..8878b465 100644
--- a/recommendations/RoofRecommendations.py
+++ b/recommendations/RoofRecommendations.py
@@ -1,6 +1,7 @@
 import math
 import pandas as pd
 from backend.Property import Property
+from backend.app.plan.schemas import MEASURE_MAP
 from typing import List
 from datatypes.enums import QuantityUnits
 from recommendations.recommendation_utils import (
@@ -108,11 +109,13 @@ class RoofRecommendations:
 
         return full_insulated_room_roof or room_roof_insulated_at_rafters
 
-    def recommend(self, phase):
+    def recommend(self, phase, measures=None):
 
         if self.property.roof["has_dwelling_above"]:
             return
 
+        measures = MEASURE_MAP["roof_insulation"] if measures is None else measures
+
         u_value = self.property.roof["thermal_transmittance"]
 
         # We check if the roof is already insulated and if so, we exit
@@ -153,19 +156,19 @@ class RoofRecommendations:
 
         self.estimated_u_value = u_value
         if (u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE) and (
-            "loft_insulation" not in self.property.non_invasive_recommendations
+            "loft_insulation" not in measures
         ):
             # The Roof is already compliant
             return
 
-        if self.property.roof["is_pitched"] or self.property.roof["is_flat"]:
-            insulation_thickness = (
-                0 if "loft_insulation" not in self.property.non_invasive_recommendations else self.insulation_thickness
-            )
+        if (self.property.roof["is_pitched"] and "loft_insulation" in measures) or (
+            self.property.roof["is_flat"] and "flat_roof_insulation"
+        ):
+            insulation_thickness = 0 if "loft_insulation" not in measures else self.insulation_thickness
             self.recommend_roof_insulation(u_value, insulation_thickness, self.property.roof, phase)
             return
 
-        if self.property.roof["is_roof_room"]:
+        if self.property.roof["is_roof_room"] and ("room_roof_insulation" in measures):
             self.recommend_room_roof_insulation(u_value, phase)
             return
 
diff --git a/recommendations/VentilationRecommendations.py b/recommendations/VentilationRecommendations.py
index 1120654a..4f88b953 100644
--- a/recommendations/VentilationRecommendations.py
+++ b/recommendations/VentilationRecommendations.py
@@ -29,7 +29,7 @@ class VentilationRecommendations(Definitions):
     def identify_ventilation(self):
         self.has_ventilaion = self.property.data["mechanical-ventilation"] in self.VENTILATION_DESCRIPTIONS
 
-    def recommend(self):
+    def recommend(self, measures=None):
         """
         If there is no ventilation, we recommend installing ventilation
 
@@ -37,6 +37,9 @@ class VentilationRecommendations(Definitions):
         ventilation if there is natural ventilation
         :return:
         """
+        measures = ["ventilation"] if measures is None else measures
+        if "ventilation" not in measures:
+            return
 
         self.identify_ventilation()
         if self.has_ventilaion:
diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py
index 43727517..18e269ab 100644
--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@@ -5,6 +5,7 @@ import pandas as pd
 
 from datatypes.enums import QuantityUnits
 from backend.Property import Property
+from backend.app.plan.schemas import MEASURE_MAP
 from BaseUtility import Definitions
 from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes
 from recommendations.recommendation_utils import (
@@ -195,6 +196,10 @@ class WallRecommendations(Definitions):
         # U-value less than 0.18 and if in or close to a conversation area,
         # recommend internal wall insulation as a possible measure
 
+        measures = MEASURE_MAP["wall_insulation"] if measures is None else measures
+        if not measures:
+            return
+
         u_value = self.property.walls["thermal_transmittance"]
         u_value = None if pd.isnull(u_value) else u_value
 
@@ -235,7 +240,7 @@ class WallRecommendations(Definitions):
                 and (u_value >= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE)
             ):
                 # Recommend insulation
-                self.find_insulation(u_value, phase)
+                self.find_insulation(u_value, phase, measures)
                 return
 
             # We can't detect it's a cavity wall, but it was built after 1990 so likely built with insulation already
@@ -259,7 +264,7 @@ class WallRecommendations(Definitions):
 
         self.estimated_u_value = u_value
 
-        if is_cavity_wall or "cavity_extract_and_refill" in self.property.non_invasive_recommendations:
+        if (is_cavity_wall and "cavity_wall_insulation" in measures) or "cavity_extract_and_refill" in measures:
             if u_value >= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
                 # Test filling cavity
                 self.find_cavity_insulation(u_value, insulation_thickness, phase)
@@ -558,7 +563,7 @@ class WallRecommendations(Definitions):
 
         return recommendations
 
-    def find_insulation(self, u_value, phase, measures=None):
+    def find_insulation(self, u_value, phase, measures):
         """
         This function contains the logic for finding potential insulation measures for a property, depending
         on the parts available and whether the property can have external wall insulation installed
@@ -570,13 +575,8 @@ class WallRecommendations(Definitions):
         # we separate the logic for for recommending them, therefore we don't
         # consider diminishing returns between the two as they are considered to be separate measures
 
-        if measures is None:
-            ewi_valid = self.ewi_valid()
-        else:
-            ewi_valid = self.ewi_valid() and "external_wall_insulation" in measures
-
         ewi_recommendations = []
-        if ewi_valid:
+        if self.ewi_valid() and "external_wall_insulation" in measures:
             ewi_recommendations = self._find_insulation(
                 u_value=u_value,
                 insulation_materials=pd.DataFrame(

From b04cf15ad2257baab8ed2f756f21a5021a6d1894 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 5 Sep 2024 13:43:35 +0100
Subject: [PATCH 165/182] adding measures to floor class

---
 recommendations/FloorRecommendations.py | 13 ++++++++++---
 recommendations/Recommendations.py      |  2 +-
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/recommendations/FloorRecommendations.py b/recommendations/FloorRecommendations.py
index c63d45c2..0e6c46ad 100644
--- a/recommendations/FloorRecommendations.py
+++ b/recommendations/FloorRecommendations.py
@@ -5,6 +5,7 @@ import pandas as pd
 
 from BaseUtility import Definitions
 from datatypes.enums import QuantityUnits
+from backend.app.plan.schemas import MEASURE_MAP
 from backend.Property import Property
 from recommendations.recommendation_utils import (
     r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns, update_lowest_selected_u_value,
@@ -70,7 +71,13 @@ class FloorRecommendations(Definitions):
         # TODO: To be completed
         self.exposed_floor_non_insulation_materials = []
 
-    def recommend(self, phase=0):
+    def recommend(self, phase=0, measures=None):
+
+        measures = MEASURE_MAP["floor_insulation"] if measures is None else measures
+
+        if not measures:
+            return
+
         u_value = self.property.floor["thermal_transmittance"]
         property_type = self.property.data["property-type"]
         floor_area = self.property.insulation_floor_area
@@ -124,7 +131,7 @@ class FloorRecommendations(Definitions):
             self.property.floor["is_suspended"] or
             self.property.floor["is_to_unheated_space"] or
             self.property.floor["is_to_external_air"]
-        ):
+        ) and "suspended_floor_insulation" in measures:
             # Given the U-value, we recommend underfloor insulation
             self.recommend_floor_insulation(
                 phase=phase,
@@ -134,7 +141,7 @@ class FloorRecommendations(Definitions):
             )
             return
 
-        if self.property.floor["is_solid"]:
+        if self.property.floor["is_solid"] and "solid_floor_insulation" in measures:
             # Given the U-value, we recommend solid floor insulation options which are usually solid foam
             self.recommend_floor_insulation(
                 u_value=u_value,
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 34586e01..e81e7998 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -127,7 +127,7 @@ class Recommendations:
                 property_recommendations.append(self.ventilation_recomender.recommendation)
 
         if "floor_insulation" in measures:
-            self.floor_recommender.recommend(phase=phase)
+            self.floor_recommender.recommend(phase=phase, measures=measures)
             if self.floor_recommender.recommendations:
                 property_recommendations.append(self.floor_recommender.recommendations)
                 phase += 1

From fffdaa0392c56e703cfbd11833afba992f123253 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 5 Sep 2024 13:46:57 +0100
Subject: [PATCH 166/182] removed exposed floor insulation

---
 recommendations/FloorRecommendations.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/recommendations/FloorRecommendations.py b/recommendations/FloorRecommendations.py
index 0e6c46ad..a1f63f96 100644
--- a/recommendations/FloorRecommendations.py
+++ b/recommendations/FloorRecommendations.py
@@ -64,13 +64,6 @@ class FloorRecommendations(Definitions):
             ]
         ]
 
-        self.exposed_floor_insulation_materials = [
-            part for part in materials if part["type"] == "exposed_floor_insulation"
-        ]
-
-        # TODO: To be completed
-        self.exposed_floor_non_insulation_materials = []
-
     def recommend(self, phase=0, measures=None):
 
         measures = MEASURE_MAP["floor_insulation"] if measures is None else measures

From 0eb81b6e7cedfaa91c675e03cc8a90b1240ad086 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 5 Sep 2024 13:51:13 +0100
Subject: [PATCH 167/182] added measures recs into heating

---
 backend/Property.py                           |   4 +-
 recommendations/HeatingRecommender.py         |  23 ++--
 recommendations/Recommendations.py            | 100 +++++++++---------
 recommendations/VentilationRecommendations.py |   5 +-
 4 files changed, 66 insertions(+), 66 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 19e5cb2e..3cc8350d 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -1231,12 +1231,12 @@ class Property:
             else:
                 raise Exception("Investiage me")
 
-    def is_ashp_valid(self, exclusions):
+    def is_ashp_valid(self, measures):
 
         if "air_source_heat_pump" in self.non_invasive_recommendations:
             return True
 
-        if "air_source_heat_pump" in exclusions:
+        if "air_source_heat_pump" not in measures:
             return False
 
         suitable_property_type = self.data["property-type"] in ["House", "Bungalow"]
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 78dce329..ac4f3000 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -1,6 +1,7 @@
 from recommendations.Costs import Costs, BOILER_UPGRADE_SCHEME_ASHP_VALUE
 from recommendations.recommendation_utils import check_simulation_difference, override_costs
 from backend.Property import Property
+from backend.app.plan.schemas import MEASURE_MAP
 from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes
 from etl.epc_clean.epc_attributes.HotWaterAttributes import HotWaterAttributes
 from etl.epc_clean.epc_attributes.MainFuelAttributes import MainFuelAttributes
@@ -28,7 +29,7 @@ class HeatingRecommender:
             self.property.main_heating["clean_description"] in self.ELECTRIC_HEATING_DESCRIPTIONS
         )
 
-    def is_high_heat_retention_valid(self, ashp_only_heating_recommendation, exclusions):
+    def is_high_heat_retention_valid(self, ashp_only_heating_recommendation, measures):
         """
         Check conditions if high heat retention storage is valid
         :return:
@@ -43,10 +44,11 @@ class HeatingRecommender:
         has_electric = self.has_electric_heating_description or electric_heating_assumed
 
         return (
-            has_electric and (not ashp_only_heating_recommendation) and ("boiler_upgrade" not in exclusions)
+            has_electric and (not ashp_only_heating_recommendation) and
+            ("high_heat_retention_storage_heater" in measures)
         )
 
-    def is_boiler_upgrade_suitable(self, exclusions, ashp_only_heating_recommendation):
+    def is_boiler_upgrade_suitable(self, measures, ashp_only_heating_recommendation):
         """
         These are the conditions we apply to recommend a boiler installation
         :return:
@@ -84,12 +86,12 @@ class HeatingRecommender:
                 portable_heaters_has_mains
             ) and
             (not ashp_only_heating_recommendation) and
-            ("boiler_upgrade" not in exclusions)
+            ("boiler_upgrade" in measures)
         )
 
         return is_valid, has_boiler
 
-    def recommend(self, has_cavity_or_loft_recommendations, phase=0, exclusions=None):
+    def recommend(self, has_cavity_or_loft_recommendations, phase=0, measures=None):
         """
         Produces heating recommendations
 
@@ -97,16 +99,17 @@ class HeatingRecommender:
         recommendation. If there are cavity or loft recommendations, the property would need to complete those measures
         before being able to get the boiler upgrade scheme benefits. The messaging in the front end would be to
         :param phase: indicates the phase of the retrofit programme
-        :param exclusions: A list of exclusions for the recommendations
+        :param measures: A list of measures for the recommendations
         """
 
+        measures = MEASURE_MAP["heating"] if measures is None else measures
+
         # TODO: We could have a system flush recommendation for an existing boiler, where there is no need to replace
         #       the boiler, but instead flushing the system will make it run more efficiently. There is a cost for this
         #       in the Costs class, stored as SYSTEM_FLUSH_COST
 
         # TODO: Right now, we don't have recommendations for electric boilers - we should probably have one
 
-        exclusions = [] if exclusions is None else exclusions
         non_invasive_ashp_recommendation = next(
             (r for r in self.property.non_invasive_recommendations if r["type"] == "air_source_heat_pump"),
             {"suitable": True}
@@ -122,7 +125,7 @@ class HeatingRecommender:
         # This first iteration of the recommender will provide very basic recommendation
         # We recommend heating controls based on the main heating system
 
-        hhr_valid = self.is_high_heat_retention_valid(ashp_only_heating_recommendation, exclusions)
+        hhr_valid = self.is_high_heat_retention_valid(ashp_only_heating_recommendation, measures)
 
         if hhr_valid:
             # Recommend high heat retention storage heaters
@@ -131,7 +134,7 @@ class HeatingRecommender:
             self.recommend_hhr_storage_heaters(phase=phase, system_change=True, heating_controls_only=False)
 
         gas_boiler_suitable, has_boiler = self.is_boiler_upgrade_suitable(
-            exclusions=exclusions, ashp_only_heating_recommendation=ashp_only_heating_recommendation
+            measures=measures, ashp_only_heating_recommendation=ashp_only_heating_recommendation
         )
 
         if gas_boiler_suitable:
@@ -153,7 +156,7 @@ class HeatingRecommender:
         # In the future, we'll allow overrides, so that non-intrusive surveys can contradict these conditions
         # and either allow or prevent the recommendation of an air source heat pump
 
-        if self.property.is_ashp_valid(exclusions=exclusions) and non_invasive_ashp_recommendation["suitable"]:
+        if self.property.is_ashp_valid(measures=measures) and non_invasive_ashp_recommendation["suitable"]:
             self.recommend_air_source_heat_pump(
                 phase=phase,
                 has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations,
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index e81e7998..4bcc2a40 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -121,8 +121,11 @@ class Recommendations:
         # real impact on the SAP score. Therefore, we don't need to include phasing for ventilation. If we
         # have any
         # wall or roof recommendations, we will ensure that ventilation is included in the simulation
-        if self.wall_recomender.recommendations or self.roof_recommender.recommendations:
-            self.ventilation_recomender.recommend(measures=measures)
+        if (
+            (self.wall_recomender.recommendations or self.roof_recommender.recommendations) and
+            ("ventilation" in measures)
+        ):
+            self.ventilation_recomender.recommend()
             if self.ventilation_recomender.recommendation:
                 property_recommendations.append(self.ventilation_recomender.recommendation)
 
@@ -144,57 +147,54 @@ class Recommendations:
                 property_recommendations.append(self.fireplace_recommender.recommendation)
                 phase += 1
 
-        # Heating and Electical systems
-        if "heating" in measures:
+        cavity_or_loft_recommendations = [
+            r for r in self.wall_recomender.recommendations + self.roof_recommender.recommendations
+            if r["type"] in ["cavity_wall_insulation", "loft_insulation"]
+        ]
+        has_cavity_or_loft_recommendations = len(cavity_or_loft_recommendations) > 0
 
-            cavity_or_loft_recommendations = [
-                r for r in self.wall_recomender.recommendations + self.roof_recommender.recommendations
-                if r["type"] in ["cavity_wall_insulation", "loft_insulation"]
-            ]
-            has_cavity_or_loft_recommendations = len(cavity_or_loft_recommendations) > 0
+        self.heating_recommender.recommend(
+            phase=phase,
+            measures=measures,
+            has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations,
+        )
+        if (
+            self.heating_recommender.heating_recommendations or
+            self.heating_recommender.heating_control_recommendations
+        ):
 
-            self.heating_recommender.recommend(
-                phase=phase,
-                has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations,
-                exclusions=self.exclusions
-            )
-            if (
-                self.heating_recommender.heating_recommendations or
-                self.heating_recommender.heating_control_recommendations
-            ):
-
-                # We split into first and second phase recommendations
-                first_phase_recommendations = [
-                    r for r in (
-                        self.heating_recommender.heating_recommendations +
-                        self.heating_recommender.heating_control_recommendations
-                    )
-                    if r["phase"] == phase
-                ]
-                second_phase_recommendations = [
-                    r for r in (
-                        self.heating_recommender.heating_recommendations +
-                        self.heating_recommender.heating_control_recommendations
-                    )
-                    if r["phase"] == phase + 1
-                ]
-
-                if first_phase_recommendations:
-                    property_recommendations.append(first_phase_recommendations)
-
-                if second_phase_recommendations:
-                    property_recommendations.append(second_phase_recommendations)
-
-                # We check if we have distinct heating and heating controls recommendations
-                # If so, we increment by 2 (one of the heating system, one for the heating controls)
-                # otherwise we incremenet by 1
-                max_used_phase = max(
-                    [rec["phase"] for rec in
-                     self.heating_recommender.heating_recommendations +
-                     self.heating_recommender.heating_control_recommendations]
+            # We split into first and second phase recommendations
+            first_phase_recommendations = [
+                r for r in (
+                    self.heating_recommender.heating_recommendations +
+                    self.heating_recommender.heating_control_recommendations
                 )
-                amount_to_increment = max_used_phase - phase + 1
-                phase += amount_to_increment
+                if r["phase"] == phase
+            ]
+            second_phase_recommendations = [
+                r for r in (
+                    self.heating_recommender.heating_recommendations +
+                    self.heating_recommender.heating_control_recommendations
+                )
+                if r["phase"] == phase + 1
+            ]
+
+            if first_phase_recommendations:
+                property_recommendations.append(first_phase_recommendations)
+
+            if second_phase_recommendations:
+                property_recommendations.append(second_phase_recommendations)
+
+            # We check if we have distinct heating and heating controls recommendations
+            # If so, we increment by 2 (one of the heating system, one for the heating controls)
+            # otherwise we incremenet by 1
+            max_used_phase = max(
+                [rec["phase"] for rec in
+                 self.heating_recommender.heating_recommendations +
+                 self.heating_recommender.heating_control_recommendations]
+            )
+            amount_to_increment = max_used_phase - phase + 1
+            phase += amount_to_increment
 
         # Hot water
         if "hot_water" in measures:
diff --git a/recommendations/VentilationRecommendations.py b/recommendations/VentilationRecommendations.py
index 4f88b953..1120654a 100644
--- a/recommendations/VentilationRecommendations.py
+++ b/recommendations/VentilationRecommendations.py
@@ -29,7 +29,7 @@ class VentilationRecommendations(Definitions):
     def identify_ventilation(self):
         self.has_ventilaion = self.property.data["mechanical-ventilation"] in self.VENTILATION_DESCRIPTIONS
 
-    def recommend(self, measures=None):
+    def recommend(self):
         """
         If there is no ventilation, we recommend installing ventilation
 
@@ -37,9 +37,6 @@ class VentilationRecommendations(Definitions):
         ventilation if there is natural ventilation
         :return:
         """
-        measures = ["ventilation"] if measures is None else measures
-        if "ventilation" not in measures:
-            return
 
         self.identify_ventilation()
         if self.has_ventilaion:

From 85b92bfecdc8cfe61583b07a002e1f41eb7ed0a6 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 5 Sep 2024 14:00:31 +0100
Subject: [PATCH 168/182] remove non-invasive recommendations from wall
 measures

---
 recommendations/Recommendations.py     |  4 +++-
 recommendations/WallRecommendations.py | 12 +++++++-----
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 4bcc2a40..d48cf6ed 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -74,6 +74,9 @@ class Recommendations:
         if self.property_instance.non_invasive_recommendations:
             raise Exception("IMPLEMENT ME")
 
+        # Generally, inclusions is a global option and will overrule specific property non-invasive recommendations.
+        # This is done so that we can use inclusions to specify scenarios.
+
         inclusions_full = [MEASURE_MAP[x] if x in MEASURE_MAP else x for x in self.inclusions]
         exclusions_full = [MEASURE_MAP[x] if x in MEASURE_MAP else x for x in self.exclusions]
 
@@ -99,7 +102,6 @@ class Recommendations:
 
         property_recommendations = []
         phase = 0
-        # TODO: We should form measures form non-intrusive recommendations too
         measures = self.find_included_measures()
 
         # Building Fabric
diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py
index 18e269ab..d78d74a4 100644
--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@@ -163,7 +163,7 @@ class WallRecommendations(Definitions):
         )
 
         # Test filling cavity
-        self.find_cavity_insulation(u_value, insulation_thickness, phase)
+        self.find_cavity_insulation(u_value, insulation_thickness, phase, measures)
 
         return self.recommendations
 
@@ -212,7 +212,7 @@ class WallRecommendations(Definitions):
             or self.property.walls["is_filled_cavity"]
         ) and (
             "cavity_extract_and_refill"
-            not in self.property.non_invasive_recommendations
+            not in measures
         ):
             return
 
@@ -267,7 +267,7 @@ class WallRecommendations(Definitions):
         if (is_cavity_wall and "cavity_wall_insulation" in measures) or "cavity_extract_and_refill" in measures:
             if u_value >= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
                 # Test filling cavity
-                self.find_cavity_insulation(u_value, insulation_thickness, phase)
+                self.find_cavity_insulation(u_value, insulation_thickness, phase, measures)
 
             return
 
@@ -279,7 +279,7 @@ class WallRecommendations(Definitions):
         # If the u-value is within regulations, we don't do anything
         return
 
-    def find_cavity_insulation(self, u_value, insulation_thickness, phase):
+    def find_cavity_insulation(self, u_value, insulation_thickness, phase, measures):
         """
         This method tests different materials to fill the cavity wall, determining which
         material will give us the best U-value.
@@ -299,6 +299,8 @@ class WallRecommendations(Definitions):
         :param u_value: u_value of the starting wall
         :param insulation_thickness: describes the insulation level of the wall. If "below average", we have a partially
         filled cavity wall
+        :param phase: The phase of the recommendation
+        :param measures: The measures we're considering
         """
 
         insulation_materials = pd.DataFrame(self.cavity_wall_insulation_materials)
@@ -333,7 +335,7 @@ class WallRecommendations(Definitions):
 
                 is_extraction_and_refill = (
                     "cavity_extract_and_refill"
-                    in self.property.non_invasive_recommendations
+                    in measures
                 )
 
                 cost_result = self.costs.cavity_wall_insulation(

From 3e7a1813201ba79bdaeac6906c0b1665e6968d35 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 5 Sep 2024 14:01:26 +0100
Subject: [PATCH 169/182] remove non-invasive recommendations from roof
 recommendations

---
 recommendations/RoofRecommendations.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py
index 8878b465..c37c4d85 100644
--- a/recommendations/RoofRecommendations.py
+++ b/recommendations/RoofRecommendations.py
@@ -79,13 +79,13 @@ class RoofRecommendations:
 
         return self.recommendations
 
-    def is_loft_already_insulated(self):
+    def is_loft_already_insulated(self, measures):
         """
         Check if the loft is already insulated
         """
 
         # If we have a non-invasive recommendation for the loft insulation, we can assume that the loft is not insulated
-        if "loft_insulation" in self.property.non_invasive_recommendations:
+        if "loft_insulation" in measures:
             return False
 
         return (self.insulation_thickness > self.MINIMUM_LOFT_ISULATION_MM) and self.property.roof["is_pitched"]
@@ -123,7 +123,7 @@ class RoofRecommendations:
         # Building regulations part L recommend installing at least 270mm of insulation, however generally we
         # experience diminishing returns in terms of SAP once we go beyond around 150mm of insulation
         # This only holds true for pitched roofs.
-        if self.is_loft_already_insulated():
+        if self.is_loft_already_insulated(measures):
             return
 
         if (self.insulation_thickness >= self.MINIMUM_FLAT_ROOF_ISULATION_MM) and self.property.roof["is_flat"]:

From a6bd5f8ff24d3e0d72c6be53b6a5557474e33ffc Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 5 Sep 2024 14:03:06 +0100
Subject: [PATCH 170/182] remove references to non-intrusive recommendations

---
 recommendations/HeatingRecommender.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index ac4f3000..dc433806 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -110,6 +110,7 @@ class HeatingRecommender:
 
         # TODO: Right now, we don't have recommendations for electric boilers - we should probably have one
 
+        # if we have a non-invasive ashp recommendation, we get the configuration directly from the property instance
         non_invasive_ashp_recommendation = next(
             (r for r in self.property.non_invasive_recommendations if r["type"] == "air_source_heat_pump"),
             {"suitable": True}

From 00f3a175faafb8b39d7dd9e5dcd631bc926c6c94 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 5 Sep 2024 14:10:57 +0100
Subject: [PATCH 171/182] Added trickle vents recommendation

---
 recommendations/Recommendations.py            |  4 ++
 recommendations/VentilationRecommendations.py | 39 +++++++++++++++++++
 2 files changed, 43 insertions(+)

diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index d48cf6ed..d910ddfe 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -131,6 +131,10 @@ class Recommendations:
             if self.ventilation_recomender.recommendation:
                 property_recommendations.append(self.ventilation_recomender.recommendation)
 
+        if "trickle_vents" in measures:
+            # This is a recommendatin that typically comes from an energy assessment
+            property_recommendations.append(self.ventilation_recomender.recommend_trickle_vents())
+
         if "floor_insulation" in measures:
             self.floor_recommender.recommend(phase=phase, measures=measures)
             if self.floor_recommender.recommendations:
diff --git a/recommendations/VentilationRecommendations.py b/recommendations/VentilationRecommendations.py
index 1120654a..31119168 100644
--- a/recommendations/VentilationRecommendations.py
+++ b/recommendations/VentilationRecommendations.py
@@ -81,3 +81,42 @@ class VentilationRecommendations(Definitions):
                 "labour_days": labour_days  # Assume 8 hour day
             }
         ]
+
+    def recommend_trickle_vents(self):
+        """
+        This is not something that we can identify completely non-invasively, however a recommendation which may come
+        about as a result of an energy assessment is the installation of trickle vents. This function handles that
+        """
+
+        trickle_vents_recommendation_config = [
+            r for r in self.property.non_invasive_recommendations if r["type"] == "trickle_vents"
+        ][0]
+
+        description = (
+            "Install trickle vents to windows without them" if
+            not trickle_vents_recommendation_config.get("description")
+            else trickle_vents_recommendation_config["description"]
+        )
+
+        # We recommend installing two mechanical ventilation systems
+        self.recommendation = [
+            {
+                "phase": None,
+                "parts": [],
+                "type": "trickle_vents",
+                "description": description,
+                "starting_u_value": None,
+                "new_u_value": None,
+                "already_installed": False,
+                "sap_points": trickle_vents_recommendation_config["sap_points"],
+                "heat_demand": 0,
+                "kwh_savings": 0,
+                "co2_equivalent_savings": 0,
+                "energy_cost_savings": 0,
+                "total": trickle_vents_recommendation_config["cost"],
+                # We use a very simple and rough estimate of 4 hours per unit
+                "labour_hours": trickle_vents_recommendation_config.get("labour_hours", 8),
+                "labour_days": trickle_vents_recommendation_config.get("labour_days", 1),  # Assume 8 hour day
+                "survey": True
+            }
+        ]

From 8e8961a89d2dd4e576a792f5d76642aec88830a8 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 5 Sep 2024 14:20:23 +0100
Subject: [PATCH 172/182] Created draught proofing recommendations

---
 .../DraughtProofingRecommendations.py         | 49 +++++++++++++++++++
 recommendations/Recommendations.py            |  8 +++
 2 files changed, 57 insertions(+)
 create mode 100644 recommendations/DraughtProofingRecommendations.py

diff --git a/recommendations/DraughtProofingRecommendations.py b/recommendations/DraughtProofingRecommendations.py
new file mode 100644
index 00000000..fd6d4ee9
--- /dev/null
+++ b/recommendations/DraughtProofingRecommendations.py
@@ -0,0 +1,49 @@
+from backend.Property import Property
+
+
+class DraughtProofingRecommendations:
+
+    def __init__(self, property_instance: Property):
+        self.property = property_instance
+
+        self.recommendation = []
+
+    def recommend(self):
+        """
+        In some cases, we can identify the need for draught proofing from the EPC recommendations, however the initial
+        implementation of this class will just assume that we are picking up a non-invasive recommendation from the
+        survey
+        """
+
+        draught_proofing_recommendation_config = [
+            r for r in self.property.non_invasive_recommendations if r["type"] == "draught_proofing"
+        ][0]
+
+        description = (
+            "Draught proof doors and windows to improve energy efficiency" if
+            not draught_proofing_recommendation_config.get("description")
+            else draught_proofing_recommendation_config["description"]
+        )
+
+        # We recommend installing two mechanical ventilation systems
+        self.recommendation = [
+            {
+                "phase": None,
+                "parts": [],
+                "type": "draught_proofing",
+                "description": description,
+                "starting_u_value": None,
+                "new_u_value": None,
+                "already_installed": False,
+                "sap_points": draught_proofing_recommendation_config["sap_points"],
+                "heat_demand": 0,
+                "kwh_savings": 0,
+                "co2_equivalent_savings": 0,
+                "energy_cost_savings": 0,
+                "total": draught_proofing_recommendation_config["cost"],
+                # We use a very simple and rough estimate of 4 hours per unit
+                "labour_hours": draught_proofing_recommendation_config.get("labour_hours", 8),
+                "labour_days": draught_proofing_recommendation_config.get("labour_days", 1),  # Assume 8 hour day
+                "survey": True
+            }
+        ]
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index d910ddfe..070036af 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -14,6 +14,7 @@ from recommendations.WindowsRecommendations import WindowsRecommendations
 from recommendations.HeatingRecommender import HeatingRecommender
 from recommendations.HotwaterRecommendations import HotwaterRecommendations
 from recommendations.SecondaryHeating import SecondaryHeating
+from recommendations.DraughtProofingRecommendations import DraughtProofingRecommendations
 from backend.ml_models.AnnualBillSavings import AnnualBillSavings
 from backend.apis.GoogleSolarApi import GoogleSolarApi
 import backend.app.assumptions as assumptions
@@ -58,6 +59,7 @@ class Recommendations:
         self.ventilation_recomender = VentilationRecommendations(
             property_instance=property_instance, materials=materials
         )
+        self.draught_proofing_recommender = DraughtProofingRecommendations(property_instance=property_instance)
         self.fireplace_recommender = FireplaceRecommendations(property_instance=property_instance)
         self.lighting_recommender = LightingRecommendations(property_instance=property_instance, materials=materials)
         self.windows_recommender = WindowsRecommendations(property_instance=property_instance, materials=materials)
@@ -135,6 +137,12 @@ class Recommendations:
             # This is a recommendatin that typically comes from an energy assessment
             property_recommendations.append(self.ventilation_recomender.recommend_trickle_vents())
 
+        if "draught_proofing" in measures:
+            # This is a recommendation that in some instances we can recommend, by deducing it from the SAP
+            # recommendations, however we will implement this later
+            self.draught_proofing_recommender.recommend()
+            property_recommendations.append(self.draught_proofing_recommender.recommendation)
+
         if "floor_insulation" in measures:
             self.floor_recommender.recommend(phase=phase, measures=measures)
             if self.floor_recommender.recommendations:

From 607b150070167d86cc75f0c2bcb396882f5cbb35 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 5 Sep 2024 14:54:10 +0100
Subject: [PATCH 173/182] added mixed glazing recommendation

---
 recommendations/Recommendations.py            |  4 ++
 recommendations/VentilationRecommendations.py |  3 +-
 recommendations/WindowsRecommendations.py     | 61 ++++++++++++++++++-
 3 files changed, 65 insertions(+), 3 deletions(-)

diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 070036af..609fd245 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -155,6 +155,10 @@ class Recommendations:
                 property_recommendations.append(self.windows_recommender.recommendation)
                 phase += 1
 
+        if "mixed_glazing" in measures:
+            # This is a recommendation that comes exclusively from an energy assessment
+            property_recommendations.append(self.windows_recommender.recommend_mixed_glazing())
+
         if "fireplace" in measures:
             self.fireplace_recommender.recommend(phase=phase)
             if self.fireplace_recommender.recommendation:
diff --git a/recommendations/VentilationRecommendations.py b/recommendations/VentilationRecommendations.py
index 31119168..e3b66226 100644
--- a/recommendations/VentilationRecommendations.py
+++ b/recommendations/VentilationRecommendations.py
@@ -98,8 +98,7 @@ class VentilationRecommendations(Definitions):
             else trickle_vents_recommendation_config["description"]
         )
 
-        # We recommend installing two mechanical ventilation systems
-        self.recommendation = [
+        return [
             {
                 "phase": None,
                 "parts": [],
diff --git a/recommendations/WindowsRecommendations.py b/recommendations/WindowsRecommendations.py
index 3826a470..c74c2fac 100644
--- a/recommendations/WindowsRecommendations.py
+++ b/recommendations/WindowsRecommendations.py
@@ -3,8 +3,9 @@ from typing import List
 import numpy as np
 
 from backend.Property import Property
+from etl.epc_clean.epc_attributes.WindowAttributes import WindowAttributes
 from recommendations.Costs import Costs
-from recommendations.recommendation_utils import override_costs
+from recommendations.recommendation_utils import override_costs, check_simulation_difference
 
 
 class WindowsRecommendations:
@@ -128,3 +129,61 @@ class WindowsRecommendations:
                 }
             }
         ]
+
+    def recommend_mixed_glazing(self):
+        """
+        This function will recommend mixed glazing to the property. This is a more specific recommendation than
+        the general windows recommendation, but is almost certain to arise from a survey
+        :return:
+        """
+
+        mixed_glazing_recommendation_config = [
+            r for r in self.property.non_invasive_recommendations if r["type"] == "mixed_glazing"
+        ][0]
+
+        description = (
+            "Install a combination of secondary and double glazing to single glazed windows" if
+            not mixed_glazing_recommendation_config.get("description")
+            else mixed_glazing_recommendation_config["description"]
+        )
+
+        windows_ending_config = WindowAttributes("Multiple glazing throughout").process()
+
+        windows_simulation_config = check_simulation_difference(
+            new_config=windows_ending_config, old_config=self.property.windows, prefix="windows_"
+        )
+
+        windows_simulation_config = {
+            **windows_simulation_config,
+            "windows_energy_eff": "Average",
+            "glazed_type_ending": "secondary glazing"
+        }
+
+        return [
+            {
+                "phase": None,
+                "parts": [],
+                "type": "mixed_glazing",
+                "description": description,
+                "starting_u_value": None,
+                "new_u_value": None,
+                "already_installed": False,
+                "sap_points": mixed_glazing_recommendation_config["sap_points"],
+                "heat_demand": None,  # We will predict this
+                "kwh_savings": None,  # We will predict this
+                "co2_equivalent_savings": None,  # We will predict this
+                "energy_cost_savings": None,  # We will predict this
+                "total": mixed_glazing_recommendation_config["cost"],
+                # We use a very simple and rough estimate of 4 hours per unit
+                "labour_hours": mixed_glazing_recommendation_config.get("labour_hours", 8),
+                "labour_days": mixed_glazing_recommendation_config.get("labour_days", 1),  # Assume 8 hour day
+                "survey": mixed_glazing_recommendation_config["survey"],
+                "simulation_config": windows_simulation_config,
+                "description_simulation": {
+                    "multi-glaze-proportion": 100,
+                    "windows-energy-eff": "Average",
+                    "windows-description": "Multiple glazing throughout",
+                    "glazed-type": "secondary glazing",
+                },
+            }
+        ]

From 304e19fca4ad0260ec3941d875157f88c465b5f0 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 5 Sep 2024 15:18:17 +0100
Subject: [PATCH 174/182] Added non-intrusive rec to leds

---
 backend/Property.py                        |  4 +++-
 recommendations/LightingRecommendations.py | 19 +++++++++++++++++--
 recommendations/Recommendations.py         |  3 ---
 3 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 3cc8350d..5507876a 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -1279,9 +1279,11 @@ class Property:
         """
 
         exclusions = [] if exclusions is None else exclusions
+        if "air_source_heat_pump" in exclusions:
+            return self.current_energy_consumption
 
         # If the property currently has an ASHP, we don't gain from any efficiency improvements
-        if not self.is_ashp_valid(exclusions=exclusions):
+        if not self.is_ashp_valid(measures=["air_source_heat_pump"]):
             return self.current_energy_consumption
 
         # If the property currently has an electric boiler, it will still benefit from the ASHP efficiency gain
diff --git a/recommendations/LightingRecommendations.py b/recommendations/LightingRecommendations.py
index 1186b0a9..b9456f8d 100644
--- a/recommendations/LightingRecommendations.py
+++ b/recommendations/LightingRecommendations.py
@@ -66,6 +66,11 @@ class LightingRecommendations:
         if self.property.lighting["low_energy_proportion"] == 100:
             return
 
+        leds_recommendation_config = next(
+            (r for r in self.property.non_invasive_recommendations if r["type"] == "low_energy_lighting"),
+            {}
+        )
+
         number_lighting_outlets = self.property.number_lighting_outlets
 
         # Number non lel outlets
@@ -79,6 +84,9 @@ class LightingRecommendations:
             return
 
         # Get the cost of the fittings
+        if leds_recommendation_config.get("cost"):
+            raise NotImplementedError("Costs from for low energy lighting have not been implemented")
+
         cost_result = self.costs.low_energy_lighting(
             number_of_lights=number_non_lel_outlets,
             number_current_lel_lights=number_lighting_outlets - number_non_lel_outlets,
@@ -97,6 +105,12 @@ class LightingRecommendations:
             cost_result = override_costs(cost_result)
             description = "Low energy lighting has already been installed, no further action required"
 
+        if leds_recommendation_config.get("sap_points") is not None:
+            # This could be zero points
+            sap_points = leds_recommendation_config["sap_points"]
+        else:
+            sap_points = round(2 * (number_non_lel_outlets / number_lighting_outlets), 2)
+
         self.recommendation = [
             {
                 "phase": phase,
@@ -108,13 +122,14 @@ class LightingRecommendations:
                 "already_installed": already_installed,
                 # For SAP points, we use the fact that lighting is usually worth 2 points and we scale this to
                 # the proportion of lights that will be set to low energy
-                "sap_points": round(2 * (number_non_lel_outlets / number_lighting_outlets), 2),
+                "sap_points": sap_points,
                 "kwh_savings": heat_demand_change,
                 "co2_equivalent_savings": carbon_change,
                 "description_simulation": {
                     "lighting-energy-eff": "Very Good",
                     "lighting-description": "Low energy lighting in all fixed outlets",
                 },
-                **cost_result
+                **cost_result,
+                "survey": leds_recommendation_config.get("survey", False)
             }
         ]
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 609fd245..184f0240 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -73,9 +73,6 @@ class Recommendations:
         Determines the set of measures to be included in recommendations
         """
 
-        if self.property_instance.non_invasive_recommendations:
-            raise Exception("IMPLEMENT ME")
-
         # Generally, inclusions is a global option and will overrule specific property non-invasive recommendations.
         # This is done so that we can use inclusions to specify scenarios.
 

From 33482b65042b467001a22fef330bea53c8dd9779 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 5 Sep 2024 16:11:10 +0100
Subject: [PATCH 175/182] adding non-invasive recommendations for iwi

---
 recommendations/WallRecommendations.py | 28 ++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py
index d78d74a4..a0c71860 100644
--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@@ -454,6 +454,16 @@ class WallRecommendations(Definitions):
 
         lowest_selected_u_value = None
         recommendations = []
+
+        iwi_non_invasive_recommendations = next(
+            (r for r in self.property.non_invasive_recommendations if r["type"] == "internal_wall_insulation"), {}
+        )
+        ewi_non_invasive_recommendations = next(
+            (r for r in self.property.non_invasive_recommendations if r["type"] == "external_wall_insulation"), {}
+        )
+        if ewi_non_invasive_recommendations:
+            raise NotImplementedError("Implement ewi non-invasive recommendations")
+
         for _, insulation_material_group in insulation_materials.groupby("description"):
 
             for _, material in insulation_material_group.iterrows():
@@ -486,6 +496,15 @@ class WallRecommendations(Definitions):
                     )
 
                     if material["type"] == "internal_wall_insulation":
+
+                        if iwi_non_invasive_recommendations.get("cost") is not None:
+                            raise NotImplementedError(
+                                "Not handled passing costs from non-invasive recommendations for iwi"
+                            )
+
+                        sap_points = iwi_non_invasive_recommendations.get("sap_points", None)
+                        survey = iwi_non_invasive_recommendations.get("survey", False)
+
                         cost_result = self.costs.internal_wall_insulation(
                             wall_area=self.property.insulation_wall_area,
                             material=material.to_dict(),
@@ -503,6 +522,10 @@ class WallRecommendations(Definitions):
                         )
 
                     elif material["type"] == "external_wall_insulation":
+
+                        sap_points = ewi_non_invasive_recommendations.get("sap_points", None)
+                        survey = ewi_non_invasive_recommendations.get("survey", False)
+
                         cost_result = self.costs.external_wall_insulation(
                             wall_area=self.property.insulation_wall_area,
                             material=material.to_dict(),
@@ -553,13 +576,14 @@ class WallRecommendations(Definitions):
                             "starting_u_value": u_value,
                             "new_u_value": new_u_value,
                             "already_installed": already_installed,
-                            "sap_points": None,
+                            "sap_points": sap_points,
                             "simulation_config": simulation_config,
                             "description_simulation": {
                                 "walls-description": new_description,
                                 "walls-energy-eff": simulation_config["walls_energy_eff_ending"]
                             },
-                            **cost_result
+                            **cost_result,
+                            "survey": survey
                         }
                     )
 

From 743c9bdbdfb9c411d8ebc917b930186f45b6db9d Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 5 Sep 2024 16:27:11 +0100
Subject: [PATCH 176/182] added non-intrusive rir recommendations

---
 recommendations/RoofRecommendations.py | 27 ++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py
index c37c4d85..fe027371 100644
--- a/recommendations/RoofRecommendations.py
+++ b/recommendations/RoofRecommendations.py
@@ -155,20 +155,24 @@ class RoofRecommendations:
         )
 
         self.estimated_u_value = u_value
-        if (u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE) and (
+        if (u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE) or (
             "loft_insulation" not in measures
         ):
             # The Roof is already compliant
             return
 
         if (self.property.roof["is_pitched"] and "loft_insulation" in measures) or (
-            self.property.roof["is_flat"] and "flat_roof_insulation"
+            self.property.roof["is_flat"] and "flat_roof_insulation" in measures
         ):
             insulation_thickness = 0 if "loft_insulation" not in measures else self.insulation_thickness
             self.recommend_roof_insulation(u_value, insulation_thickness, self.property.roof, phase)
             return
 
-        if self.property.roof["is_roof_room"] and ("room_roof_insulation" in measures):
+        # There are cases where the property might have a room roof as the second roof, but we have a recommendation for
+        # it, so we allow this override
+        if self.property.roof["is_roof_room"] and ("room_roof_insulation" in measures) or (
+            "room_roof_insulation" in [x["type"] for x in self.property.non_invasive_recommendations]
+        ):
             self.recommend_room_roof_insulation(u_value, phase)
             return
 
@@ -421,6 +425,10 @@ class RoofRecommendations:
             }
         ]
 
+        rir_non_invasive_recommendation = next(
+            (x for x in self.property.non_invasive_recommendations if x["type"] == "room_roof_insulation"), {}
+        )
+
         # lowest_selected_u_value = None
         recommendations = []
         for material in roof_roof_insulation_materials:
@@ -445,7 +453,13 @@ class RoofRecommendations:
                 # if new_u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
                 # lowest_selected_u_value = update_lowest_selected_u_value(lowest_selected_u_value, new_u_value)
 
-                estimated_cost = cost_per_unit * self.property.insulation_floor_area
+                estimated_cost = (
+                    cost_per_unit * self.property.insulation_floor_area if
+                    rir_non_invasive_recommendation.get("cost") is None else
+                    rir_non_invasive_recommendation.get("cost")
+                )
+
+                sap_points = rir_non_invasive_recommendation.get("sap_points", None)
 
                 # Could also be Roof room(s), ceiling insulated
                 new_descriptin = "Pitched, insulated at rafters"
@@ -483,14 +497,15 @@ class RoofRecommendations:
                         "description": "Insulate room in roof at rafters and re-decorate",
                         "starting_u_value": u_value,
                         "new_u_value": None,
-                        "sap_points": None,
+                        "sap_points": sap_points,
                         "simulation_config": simulation_config,
                         "description_simulation": {
                             "roof-description": new_descriptin,
                             "roof-energy-eff": new_efficiency
                         },
                         **cost_result,
-                        "already_installed": already_installed
+                        "already_installed": already_installed,
+                        "survey": rir_non_invasive_recommendation.get("survey", None)
                     }
                 )
 

From c450d1564e16b5ec492f734975df18ef0e026d96 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 5 Sep 2024 16:33:34 +0100
Subject: [PATCH 177/182] Made trickle vents worth zero sap points

---
 recommendations/VentilationRecommendations.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/recommendations/VentilationRecommendations.py b/recommendations/VentilationRecommendations.py
index e3b66226..163cdb8a 100644
--- a/recommendations/VentilationRecommendations.py
+++ b/recommendations/VentilationRecommendations.py
@@ -107,7 +107,7 @@ class VentilationRecommendations(Definitions):
                 "starting_u_value": None,
                 "new_u_value": None,
                 "already_installed": False,
-                "sap_points": trickle_vents_recommendation_config["sap_points"],
+                "sap_points": 0,
                 "heat_demand": 0,
                 "kwh_savings": 0,
                 "co2_equivalent_savings": 0,

From 7c902c6c823a502c3906414869ed8f52333436e7 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 5 Sep 2024 16:37:22 +0100
Subject: [PATCH 178/182] handling divising by zero sap points

---
 recommendations/Recommendations.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 184f0240..fa02c2c8 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -242,13 +242,13 @@ class Recommendations:
 
         # We also need to create the representative recommendations for each recommendation type
         property_representative_recommendations = self.create_representative_recommendations(
-            property_recommendations, non_invasive_recommendations=self.property_instance.non_invasive_recommendations
+            property_recommendations,
         )
 
         return property_recommendations, property_representative_recommendations
 
     @staticmethod
-    def create_representative_recommendations(property_recommendations, non_invasive_recommendations):
+    def create_representative_recommendations(property_recommendations):
         """
         This method will create a representative recommendation for each recommendation type
         In order to create a representative recommendation, we choose the recommendation that has:
@@ -302,7 +302,10 @@ class Recommendations:
                 elif not has_u_value and has_sap_points:
                     # Sort the options by the cost per SAP point improvement - the lower the better
                     for rec in recommendations:
-                        rec["efficiency"] = rec["total"] / rec["sap_points"]
+                        if rec["sap_points"] == 0:
+                            rec["efficiency"] = 0
+                        else:
+                            rec["efficiency"] = rec["total"] / rec["sap_points"]
                 elif has_rank:
                     # Sort the options by rank - the lower the better
                     for rec in recommendations:
@@ -581,7 +584,7 @@ class Recommendations:
                 "heating_cop": mapped["cop"], "hotwater_cop": 1
             }
 
-        mapped_hotwater = DESCRIPTIONS_TO_FUEL_TYPES[hotwater_description]
+        mapped_hotwater = assumptions.DESCRIPTIONS_TO_FUEL_TYPES[hotwater_description]
 
         return {
             "heating_fuel_type": heating_fuel, "hotwater_fuel_type": mapped_hotwater["fuel"],

From 388b6673c7915ac8024b6c96702138f12f3ded17 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 5 Sep 2024 16:42:01 +0100
Subject: [PATCH 179/182] added phase to non-invasive recs

---
 backend/Property.py                               | 2 +-
 recommendations/DraughtProofingRecommendations.py | 4 ++--
 recommendations/Recommendations.py                | 6 ++++--
 recommendations/WindowsRecommendations.py         | 4 ++--
 4 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 5507876a..fe848c26 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -365,7 +365,7 @@ class Property:
             for rec in property_recommendations_by_phase:
                 # We simulate the impact of the recommendation at this current phase, and all of the prior phases
 
-                if rec["type"] == "mechanical_ventilation":
+                if rec["type"] in ["mechanical_ventilation", "trickle_vents", "draught_proofing"]:
                     continue
 
                 scoring_dict = self.create_recommendation_scoring_data(
diff --git a/recommendations/DraughtProofingRecommendations.py b/recommendations/DraughtProofingRecommendations.py
index fd6d4ee9..9ce9aa1b 100644
--- a/recommendations/DraughtProofingRecommendations.py
+++ b/recommendations/DraughtProofingRecommendations.py
@@ -8,7 +8,7 @@ class DraughtProofingRecommendations:
 
         self.recommendation = []
 
-    def recommend(self):
+    def recommend(self, phase):
         """
         In some cases, we can identify the need for draught proofing from the EPC recommendations, however the initial
         implementation of this class will just assume that we are picking up a non-invasive recommendation from the
@@ -28,7 +28,7 @@ class DraughtProofingRecommendations:
         # We recommend installing two mechanical ventilation systems
         self.recommendation = [
             {
-                "phase": None,
+                "phase": phase,
                 "parts": [],
                 "type": "draught_proofing",
                 "description": description,
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index fa02c2c8..45169b30 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -137,8 +137,9 @@ class Recommendations:
         if "draught_proofing" in measures:
             # This is a recommendation that in some instances we can recommend, by deducing it from the SAP
             # recommendations, however we will implement this later
-            self.draught_proofing_recommender.recommend()
+            self.draught_proofing_recommender.recommend(phase=phase)
             property_recommendations.append(self.draught_proofing_recommender.recommendation)
+            phase += 1
 
         if "floor_insulation" in measures:
             self.floor_recommender.recommend(phase=phase, measures=measures)
@@ -154,7 +155,8 @@ class Recommendations:
 
         if "mixed_glazing" in measures:
             # This is a recommendation that comes exclusively from an energy assessment
-            property_recommendations.append(self.windows_recommender.recommend_mixed_glazing())
+            property_recommendations.append(self.windows_recommender.recommend_mixed_glazing(phase=phase))
+            phase += 1
 
         if "fireplace" in measures:
             self.fireplace_recommender.recommend(phase=phase)
diff --git a/recommendations/WindowsRecommendations.py b/recommendations/WindowsRecommendations.py
index c74c2fac..137b721e 100644
--- a/recommendations/WindowsRecommendations.py
+++ b/recommendations/WindowsRecommendations.py
@@ -130,7 +130,7 @@ class WindowsRecommendations:
             }
         ]
 
-    def recommend_mixed_glazing(self):
+    def recommend_mixed_glazing(self, phase):
         """
         This function will recommend mixed glazing to the property. This is a more specific recommendation than
         the general windows recommendation, but is almost certain to arise from a survey
@@ -161,7 +161,7 @@ class WindowsRecommendations:
 
         return [
             {
-                "phase": None,
+                "phase": phase,
                 "parts": [],
                 "type": "mixed_glazing",
                 "description": description,

From b26adff63c76681c4d3c2977ba779a540c9295ec Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 5 Sep 2024 17:12:10 +0100
Subject: [PATCH 180/182] Adding simulation code for new recommendations

---
 backend/Property.py                       | 25 +++++------------------
 recommendations/Recommendations.py        |  8 +++-----
 recommendations/WindowsRecommendations.py |  5 +++--
 3 files changed, 11 insertions(+), 27 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index fe848c26..c4e2c6bc 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -480,7 +480,6 @@ class Property:
         """
 
         output = recommendation_record.copy()
-        non_invasive_recommendations = [] if non_invasive_recommendations is None else non_invasive_recommendations
 
         for col in [
             "walls_insulation_thickness",
@@ -537,28 +536,14 @@ class Property:
                 "heating", "hot_water_tank_insulation", "heating_control", "secondary_heating",
                 "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation",
                 "cylinder_thermostat", "loft_insulation", "room_roof_insulation", "flat_roof_insulation",
-                "solid_floor_insulation", "suspended_floor_insulation",
+                "solid_floor_insulation", "suspended_floor_insulation", "mixed_glazing"
             ]:
                 # We update the data, as defined in the recommendaton
-                if output["walls_insulation_thickness_ending"] is None:
-                    output["walls_insulation_thickness_ending"] = "none"
+                for prefix in ["walls", "roof", "floor"]:
+                    if output[f"{prefix}_insulation_thickness_ending"] is None:
+                        output[f"{prefix}_insulation_thickness_ending"] = "none"
 
-                if output["walls_thermal_transmittance_ending"] is None:
-                    raise ValueError("We should not have a None value for the u value")
-
-                if output["roof_insulation_thickness_ending"] is None:
-                    output["roof_insulation_thickness_ending"] = "none"
-
-                if output["roof_thermal_transmittance_ending"] is None:
-                    raise ValueError("We should not have a None value for the u value")
-
-                if output["floor_thermal_transmittance_ending"] is None:
-                    raise ValueError("We should not have a None value for the u value")
-
-                if output["floor_insulation_thickness_ending"] is None:
-                    output["floor_insulation_thickness_ending"] = "none"
-
-                simulation_config = recommendation["simulation_config"]
+                simulation_config = recommendation["simulation_config"].copy()
                 # If any entries in simulation_config are None, we will set them to "Unknown" which is the cleaning
                 # value
                 for key, value in simulation_config.items():
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 45169b30..44c6b4f2 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -267,12 +267,10 @@ class Recommendations:
 
             # If the property was initially surveyed as filled, but the cavity was only partially filled, we don't
             # want to include the cavity wall insulation recommendation in the defaults
-            # if (recommendations_by_type[0].get("type") == "cavity_wall_insulation") and (
-            #     "cavity_surveyed_as_filled_is_partial" in non_invasive_recommendations
-            # ):
-            #     continue
 
-            if recommendations_by_type[0].get("type") == "mechanical_ventilation":
+            if recommendations_by_type[0].get("type") in [
+                "mechanical_ventilation", "trickle_vents", "draught_proofing"
+            ]:
                 continue
 
             has_u_value = recommendations_by_type[0].get("new_u_value") is not None
diff --git a/recommendations/WindowsRecommendations.py b/recommendations/WindowsRecommendations.py
index 137b721e..d7643393 100644
--- a/recommendations/WindowsRecommendations.py
+++ b/recommendations/WindowsRecommendations.py
@@ -155,8 +155,9 @@ class WindowsRecommendations:
 
         windows_simulation_config = {
             **windows_simulation_config,
-            "windows_energy_eff": "Average",
-            "glazed_type_ending": "secondary glazing"
+            "windows_energy_eff_ending": "Average",
+            "glazed_type_ending": "secondary glazing",
+            "multi_glaze_proportion_ending": 100,
         }
 
         return [

From ed4440f946a8e71fb5f4f5581deda7dc19ba7600 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 5 Sep 2024 18:13:04 +0100
Subject: [PATCH 181/182] energy assessment structure working - needs to make
 sure we use led sap points

---
 backend/Property.py                           |  2 +-
 .../DraughtProofingRecommendations.py         | 17 +++++---
 recommendations/Recommendations.py            | 40 +++++++++----------
 recommendations/VentilationRecommendations.py | 11 +++--
 recommendations/WindowsRecommendations.py     | 10 +++--
 5 files changed, 44 insertions(+), 36 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index c4e2c6bc..704e4f0a 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -561,7 +561,7 @@ class Property:
                 "loft_insulation", "room_roof_insulation", "flat_roof_insulation",
                 "solid_floor_insulation", "suspended_floor_insulation",
                 "windows_glazing", "solar_pv", "heating", "hot_water_tank_insulation",
-                "heating_control", "secondary_heating", "cylinder_thermostat"
+                "heating_control", "secondary_heating", "cylinder_thermostat", "mixed_glazing"
             ]:
                 raise NotImplementedError(
                     "Implement me, given type %s" % recommendation["type"]
diff --git a/recommendations/DraughtProofingRecommendations.py b/recommendations/DraughtProofingRecommendations.py
index 9ce9aa1b..197d80cc 100644
--- a/recommendations/DraughtProofingRecommendations.py
+++ b/recommendations/DraughtProofingRecommendations.py
@@ -8,16 +8,23 @@ class DraughtProofingRecommendations:
 
         self.recommendation = []
 
-    def recommend(self, phase):
+    def recommend(self):
         """
         In some cases, we can identify the need for draught proofing from the EPC recommendations, however the initial
         implementation of this class will just assume that we are picking up a non-invasive recommendation from the
         survey
         """
 
-        draught_proofing_recommendation_config = [
-            r for r in self.property.non_invasive_recommendations if r["type"] == "draught_proofing"
-        ][0]
+        # For the moment, draught proofing doesn't have a phase impact
+
+        draught_proofing_recommendation_config = next(
+            (r for r in self.property.non_invasive_recommendations if
+             r["type"] == "draught_proofing"),
+            {}
+        )
+
+        if not draught_proofing_recommendation_config:
+            return
 
         description = (
             "Draught proof doors and windows to improve energy efficiency" if
@@ -28,7 +35,7 @@ class DraughtProofingRecommendations:
         # We recommend installing two mechanical ventilation systems
         self.recommendation = [
             {
-                "phase": phase,
+                "phase": None,
                 "parts": [],
                 "type": "draught_proofing",
                 "description": description,
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 44c6b4f2..ce8225a1 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -132,14 +132,16 @@ class Recommendations:
 
         if "trickle_vents" in measures:
             # This is a recommendatin that typically comes from an energy assessment
-            property_recommendations.append(self.ventilation_recomender.recommend_trickle_vents())
+            trickle_vents_rec = self.ventilation_recomender.recommend_trickle_vents()
+            if trickle_vents_rec:
+                property_recommendations.append(trickle_vents_rec)
 
         if "draught_proofing" in measures:
             # This is a recommendation that in some instances we can recommend, by deducing it from the SAP
             # recommendations, however we will implement this later
-            self.draught_proofing_recommender.recommend(phase=phase)
-            property_recommendations.append(self.draught_proofing_recommender.recommendation)
-            phase += 1
+            self.draught_proofing_recommender.recommend()
+            if self.draught_proofing_recommender.recommendation:
+                property_recommendations.append(self.draught_proofing_recommender.recommendation)
 
         if "floor_insulation" in measures:
             self.floor_recommender.recommend(phase=phase, measures=measures)
@@ -155,8 +157,10 @@ class Recommendations:
 
         if "mixed_glazing" in measures:
             # This is a recommendation that comes exclusively from an energy assessment
-            property_recommendations.append(self.windows_recommender.recommend_mixed_glazing(phase=phase))
-            phase += 1
+            mixed_glazing_rec = self.windows_recommender.recommend_mixed_glazing(phase=phase)
+            if mixed_glazing_rec:
+                property_recommendations.append(mixed_glazing_rec)
+                phase += 1
 
         if "fireplace" in measures:
             self.fireplace_recommender.recommend(phase=phase)
@@ -442,8 +446,9 @@ class Recommendations:
         impact_summary = []
         for recommendations_by_type in property_recommendations:
             for rec in recommendations_by_type:
-                if rec["type"] == "mechanical_ventilation":
-                    # We don't have a percieved sap impact of mechanical ventilation
+                if rec["type"] in ["mechanical_ventilation", "trickle_vents", "draught_proofing"]:
+                    # We don't have a percieved sap impact of mechanical ventilation or trickle vents, and we don't
+                    # have the capacity to score draught proofing
                     continue
 
                 phase_energy_efficiency_metrics = {
@@ -530,7 +535,9 @@ class Recommendations:
                     )
 
                 # Insert this information into the recommendation
-                rec["sap_points"] = property_phase_impact["sap"]
+                if rec.get("survey", False):
+                    rec["sap_points"] = property_phase_impact["sap"]
+
                 rec["co2_equivalent_savings"] = property_phase_impact["carbon"]
                 rec["heat_demand"] = property_phase_impact["heat_demand"]
 
@@ -704,18 +711,6 @@ class Recommendations:
             pd.isnull(kwh_impact_table["hotwater_fuel_type"]).sum()):
             raise Exception("Fuel type is missing")
 
-        # kwh_impact_table["heating_fuel_type"] = np.where(
-        #     kwh_impact_table["id"] == STARTING_DUMMY_ID_VALUE,
-        #     property_instance.heating_energy_source,
-        #     kwh_impact_table["heating_fuel_type"]
-        # )
-        #
-        # kwh_impact_table["hotwater_fuel_type"] = np.where(
-        #     kwh_impact_table["id"] == STARTING_DUMMY_ID_VALUE,
-        #     property_instance.hot_water_energy_source,
-        #     kwh_impact_table["hotwater_fuel_type"]
-        # )
-
         # We now calculate the fuel cost
         for k in ["heating", "hotwater"]:
             kwh_impact_table[f"{k}_cost"] = kwh_impact_table.apply(
@@ -727,7 +722,8 @@ class Recommendations:
         # We now deduce if any of the recommendations result in a change of fuel type
         for recs in property_recommendations:
             for rec in recs:
-                if rec["type"] == "mechanical_ventilation":
+                if rec["type"] in ["mechanical_ventilation", "trickle_vents", "draught_proofing"]:
+                    # We cannot score the impact on draught proofing
                     continue
 
                 rec_impact = kwh_impact_table[kwh_impact_table["recommendation_id"] == rec["recommendation_id"]]
diff --git a/recommendations/VentilationRecommendations.py b/recommendations/VentilationRecommendations.py
index 163cdb8a..34439827 100644
--- a/recommendations/VentilationRecommendations.py
+++ b/recommendations/VentilationRecommendations.py
@@ -88,12 +88,15 @@ class VentilationRecommendations(Definitions):
         about as a result of an energy assessment is the installation of trickle vents. This function handles that
         """
 
-        trickle_vents_recommendation_config = [
-            r for r in self.property.non_invasive_recommendations if r["type"] == "trickle_vents"
-        ][0]
+        trickle_vents_recommendation_config = next(
+            (r for r in self.property.non_invasive_recommendations if r["type"] == "trickle_vents"), {}
+        )
+
+        if not trickle_vents_recommendation_config:
+            return
 
         description = (
-            "Install trickle vents to windows without them" if
+            "Install trickle vents on your windows" if
             not trickle_vents_recommendation_config.get("description")
             else trickle_vents_recommendation_config["description"]
         )
diff --git a/recommendations/WindowsRecommendations.py b/recommendations/WindowsRecommendations.py
index d7643393..ae7f7057 100644
--- a/recommendations/WindowsRecommendations.py
+++ b/recommendations/WindowsRecommendations.py
@@ -137,9 +137,11 @@ class WindowsRecommendations:
         :return:
         """
 
-        mixed_glazing_recommendation_config = [
-            r for r in self.property.non_invasive_recommendations if r["type"] == "mixed_glazing"
-        ][0]
+        mixed_glazing_recommendation_config = next(
+            (r for r in self.property.non_invasive_recommendations if r["type"] == "mixed_glazing"), {}
+        )
+        if not mixed_glazing_recommendation_config:
+            return
 
         description = (
             "Install a combination of secondary and double glazing to single glazed windows" if
@@ -147,7 +149,7 @@ class WindowsRecommendations:
             else mixed_glazing_recommendation_config["description"]
         )
 
-        windows_ending_config = WindowAttributes("Multiple glazing throughout").process()
+        windows_ending_config = WindowAttributes("Full secondary glazing").process()
 
         windows_simulation_config = check_simulation_difference(
             new_config=windows_ending_config, old_config=self.property.windows, prefix="windows_"

From e434b665d54a0b29acd53eb90443fc29ab75aeb2 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 9 Sep 2024 12:43:50 +0100
Subject: [PATCH 182/182] allowing retrofit energy assessments bucket to be a
 presignable bucket

---
 infrastructure/terraform/main.tf   | 2 +-
 recommendations/Recommendations.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf
index 972722bb..9c2b7d47 100644
--- a/infrastructure/terraform/main.tf
+++ b/infrastructure/terraform/main.tf
@@ -176,7 +176,7 @@ module "retrofit_hotwater_kwh_predictions" {
 }
 
 module "retrofit_energy_assessments" {
-  source          = "./modules/s3"
+  source          = "./modules/s3_presignable_bucket"
   bucketname      = "retrofit-energy-assessments-${var.stage}"
   allowed_origins = var.allowed_origins
 }
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index ce8225a1..45498a8a 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -534,8 +534,8 @@ class Recommendations:
                         property_phase_impact["carbon"], rec["co2_equivalent_savings"]
                     )
 
-                # Insert this information into the recommendation
-                if rec.get("survey", False):
+                # Insert this information into the recommendation.
+                if not rec.get("survey", False):
                     rec["sap_points"] = property_phase_impact["sap"]
 
                 rec["co2_equivalent_savings"] = property_phase_impact["carbon"]