fixed merge conflict with refactor

2026-06-08 11:17:27 +00:00 · 2024-01-16 16:57:45 +00:00 · 2024-01-16 16:57:45 +00:00 · 90ba851cc0
commit 90ba851cc0
parent b82fa4cef7 8904467788
57 changed files with 5589 additions and 419 deletions
--- a/.gitignore
+++ b/.gitignore
@ -265,4 +265,7 @@ model_data/simulation_system/predictions/
 .idea/misc.iml
 adhoc
-adhoc/*
+adhoc/*
 etl-router-venv/
 refactor_datasets/
--- a/backend/DbClient.py
+++ b/backend/DbClient.py
@ -0,0 +1,7 @@
 class DbClient:
    def __init__(self):
        """
        This class handles interaction with the database
        """
        pass
--- a/backend/OrdnanceSurvey.py
+++ b/backend/OrdnanceSurvey.py
@ -0,0 +1,105 @@
 from functools import lru_cache
 import urllib.parse
 import requests
 from utils.logger import setup_logger
 logger = setup_logger()
 class OrdnanceSuveyClient:
    def __init__(self, address, postcode, api_key):
        """
        This class is tasked with interaction with the ordnance survey API.
        :param address: The address for the property to search for
        :param postcode: The postcode for the property to search for
        """
        self.address = address
        self.postcode = postcode
        self.full_address = ", ".join([self.address, self.postcode])
        self.api_key = api_key
        self.results = None
        self.most_relevant_result = None
        self.property_type = None
        self.built_form = None
        # This will be postcode and address, as returned by the ordnance survey
        self.address_os = None
        self.postcode_os = None
    def set_places_address(self):
        """
        Given a response from the places api, this function will set the address and postcode of the property
        """
        if self.most_relevant_result is None:
            raise ValueError("No results found - run get_places_api first")
        self.address_os = self.most_relevant_result["ADDRESS"]
        self.postcode_os = self.most_relevant_result["POSTCODE"]
        # We strip out the postcode from the address as this is already stored separately
        self.address_os = self.address_os.replace(self.postcode_os, "").strip()
        # Remove trailing comma
        self.address_os = self.address_os.rstrip(",").strip()
        # Convert to title case
        self.address_os = self.address_os.title()
        # Make sure postcode is upper case
        self.postcode_os = self.postcode_os.upper()
    @lru_cache(maxsize=128)
    def get_places_api(self):
        """
        This method is tasked with getting the places api from the Ordnance Survey.
        """
        if not self.api_key:
            raise ValueError("Ordnance Survey API key not specified")
        encoded_address_query = urllib.parse.quote(self.full_address)
        url = (f"https://api.os.uk/search/places/v1/find?query={encoded_address_query}&key="
               f"{self.api_key}")
        response = requests.get(url)
        if response.status_code == 200:
            data = response.json()
            results = data['results']
            self.results = results
            # Extract some details about the best match
            self.most_relevant_result = self.results[0]["DPA"]
            self.parse_classification_code(self.most_relevant_result["CLASSIFICATION_CODE"])
            self.set_places_address()
        else:
            logger.info("Could not find any results for the provided address and postcode")
        return {"status": response.status_code}
    def parse_classification_code(self, classification_code: str):
        """
        This function will convert the classification code, returned by the OS places api, to a property type that is
        compatible with the EPC database.
        The various classifications cane be found here:
        https://osdatahub.os.uk/docs/places/technicalSpecification
        Under LPI Output, CLASSIFICATION_CODE is described, and a link is provided to the full table of classifications
        For these purposes, we do not need the full classification as this includes non-residential properties. We only
        parse the ones of interest to us
        :return:
        """
        value_map = {
            # In the OS api, "RD" is a "Dwelling" however this is not valid property type in the EPC database
            'RD': {},
            'RD02': {'property_type': 'House', 'built_form': 'Detatched'},
            'RD03': {'property_type': 'House', 'built_form': 'Semi-Detatched'},
            'RD04': {'property_type': 'House', 'built_form': 'Mid-Terrace'},
            'RD06': {'property_type': 'Flat'},
        }
        mapped = value_map.get(classification_code, {})
        self.property_type = mapped.get("property_type", "")
        self.built_form = mapped.get("built_form", "")
--- a/backend/Property.py
+++ b/backend/Property.py
@ -9,18 +9,17 @@ from etl.epc.DataProcessor import EPCDataProcessor
 from etl.epc.Dataset import TrainingDataset
 from etl.epc.settings import LATEST_FIELD, MANDATORY_FIXED_FEATURES, POTENTIAL_COLUMNS, EFFICIENCY_FEATURES, BUILT_FORM_REMAP
 from etl.epc_clean.epc_attributes.all_cleaners import all_cleaner_map
 from etl.solar.SolarPhotoSupply import SolarPhotoSupply
 from utils.logger import setup_logger
 from utils.s3 import read_dataframe_from_s3_parquet
 from epc_api.client import EpcClient
 from BaseUtility import Definitions
 from recommendations.rdsap_tables import england_wales_age_band_lookup, FLOOR_LEVEL_MAP
 from recommendations.recommendation_utils import (
-    estimate_perimeter, get_wall_type, estimate_external_wall_area, esimtate_pitched_roof_area
+    estimate_perimeter, get_wall_type, estimate_external_wall_area, esimtate_pitched_roof_area, estimate_windows
 )
 ENVIRONMENT = os.environ.get('ENVIRONMENT', 'dev')
 EPC_AUTH_TOKEN = os.environ.get('EPC_AUTH_TOKEN')
 DATA_BUCKET = os.environ.get('DATA_BUCKET', 'retrofit-data-dev' if ENVIRONMENT == 'dev' else None)
 logger = setup_logger()
@ -51,13 +50,14 @@ class Property(Definitions):
    spatial = None
-    def __init__(self, id, postcode, address1, epc_record, data=None):
+    def __init__(self, id, postcode, address, epc_record, data=None):
        self.epc_record = epc_record
        self.id = id
        self.address = address
        self.postcode = postcode
        self.address1 = address1
        self.data = {k.replace("_", "-"): v for k,v in epc_record.get("prepared_epc").items()}
        self.old_data = epc_record.get("old_data")
        self.property_dimensions = None
@ -112,6 +112,9 @@ class Property(Definitions):
        self.insulation_floor_area = None
        self.number_lighting_outlets = epc_record.prepared_epc.get("fixed_lighting_outlets_count")
        self.floor_level = None
        self.number_of_windows = None
        self.solar_pv_roof_area = None
        self.solar_pv_percentage = None
        self.current_adjusted_energy = None
        self.expected_adjusted_energy = None
@ -177,81 +180,51 @@ class Property(Definitions):
            recommendation_record["walls_insulation_thickness_ending"] = "above average"
            recommendation_record["walls_energy_eff_ending"] = "Good"
        else:
-            if recommendation_record["walls_thermal_transmittance_ending"] is None:
+            wind_turbine_count = int(wind_turbine_count)
                raise ValueError("We should not have a None value for the u value")
-            if recommendation_record["walls_insulation_thickness_ending"] is None:
+        self.wind_turbine = {
-                recommendation_record["walls_insulation_thickness_ending"] = "none"
+            "wind_turbine": wind_turbine_count,
        }
-        # Update description to indicate it's insulate
+    def set_count_variables(self):
        if recommendation["type"] in ["solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation"]:
            if len(recommendation["parts"]) > 1:
                raise NotImplementedError("Have more than 1 floor insulation part - handle this case")
-            recommendation_record["floor_thermal_transmittance_ending"] = recommendation["new_u_value"]
+        """
-            # We don't really see above average for this in the training data
+        For EPC fields that are just counts, we'll set them here
-            recommendation_record["floor_insulation_thickness_ending"] = "average"
+        These are fields that are integers but may contain additional values such as "" so we can't do a direct
-            recommendation_record["floor_energy_eff_ending"] = "Good"
+        conversion straight to an integer
-        else:
+        :return:
-            if recommendation_record["floor_thermal_transmittance_ending"] is None:
+        """
                raise ValueError("We should not have a None value for the u value")
-            if recommendation_record["floor_insulation_thickness_ending"] is None:
+        fields = {
-                recommendation_record["floor_insulation_thickness_ending"] = "none"
+            "number_of_open_fireplaces": "number-open-fireplaces",
            "number_of_extensions": "extension-count",
            "number_of_storeys": "flat-storey-count",
            "number_of_rooms": "number-habitable-rooms",
        }
-        if recommendation["type"] in ["loft_insulation", "room_roof_insulation", "flat_roof_insulation"]:
+        null_attributes = ["number_of_storeys", "number_of_rooms"]
            recommendation_record["roof_thermal_transmittance_ending"] = recommendation["new_u_value"]
-            parts = recommendation["parts"]
+        for attribute, epc_field in fields.items():
-            if len(parts) != 1:
+            value = self.data["extension-count"]
-                raise ValueError("More than one part for roof insulation - investiage me")
+            if value == "" or value in self.DATA_ANOMALY_MATCHES:
                if attribute in null_attributes:
                    value = None
                else:
                    value = 0
            else:
                value = int(value)
-            # This is based on the values we have in the training data
+            setattr(self, attribute, value)
            valid_numeric_values = [
                12, 25, 50, 75, 100, 150, 200, 250, 270, 300, 350, 400
            ]
-            proposed_depth = int(parts[0]["depth"])
+    def get_components(self, cleaned, photo_supply_lookup, floor_area_decile_thresholds):
            if proposed_depth not in valid_numeric_values:
                # Take the nearest value for scoring
                proposed_depth = min(valid_numeric_values, key=lambda x: abs(x - proposed_depth))
            recommendation_record["roof_insulation_thickness_ending"] = str(proposed_depth)
            recommendation_record["roof_energy_eff_ending"] = "Very Good"
        else:
            # Fill missing roof u-values - this fill is not based on recommended upgrades
            if recommendation_record["roof_thermal_transmittance_ending"] is None:
                raise ValueError("We should not have a None value for the u value")
            if recommendation_record["roof_insulation_thickness_ending"] is None:
                recommendation_record["roof_insulation_thickness_ending"] = "none"
        if recommendation["type"] == "mechanical_ventilation":
            recommendation_record["mechanical_ventilation_ending"] = 'mechanical, extract only'
        if recommendation["type"] == "sealing_open_fireplace":
            recommendation_record["number_open_fireplaces_ending"] = 0
        if recommendation["type"] == "low_energy_lighting":
            recommendation_record["low_energy_lighting_ending"] = 100
            recommendation_record["lighting_energy_eff_starting"] = "Very Good"
        if recommendation["type"] not in [
            "mechanical_ventilation", "sealing_open_fireplace", "low_energy_lighting",
            "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation",
            "loft_insulation", "room_roof_insulation", "flat_roof_insulation",
            "solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation"
        ]:
            raise NotImplementedError("Implement me")
        return recommendation_record
    def get_components(self, cleaned):
        """
        Given the cleaning that has been performed, we'll use this to identify the property
        components, from roof to walls to windows, heating and hot water
        :param cleaned: This is the dictionary of components found in cleaner.cleaned
        :param photo_supply_lookup: This is the lookup table for the photo supply, used to estimate the percentage
                                    of the roof that is suitable for solar panels
        :param floor_area_decile_thresholds: This is the decile thresholds for the floor area, used in estimating the
                                             solar pv roof area
        :return:
        """
@ -301,6 +274,10 @@ class Property(Definitions):
        self.set_wall_type()
        self.set_floor_type()
        self.set_floor_level()
        self.set_windows_count()
        self.set_solar_panel_area(
            photo_supply_lookup=photo_supply_lookup, floor_area_decile_thresholds=floor_area_decile_thresholds
        )
    def set_spatial(self, spatial: pd.DataFrame):
        """
@ -368,7 +345,7 @@ class Property(Definitions):
        """
        Utility function for usage in the lambda, for preparing the _rating fields
        """
-        return rating_lookup[field].value if field not in cls.DATA_ANOMALY_MATCHES else None
+        return rating_lookup[field].value if (field not in cls.DATA_ANOMALY_MATCHES) and (field is not None) else None
    def get_property_details_epc(self, portfolio_id: int, rating_lookup):
@ -409,6 +386,7 @@ class Property(Definitions):
            "primary_energy_consumption": self.energy["primary_energy_consumption"],
            "co2_emissions": self.energy["co2_emissions"],
            "adjusted_energy_consumption": self.current_adjusted_energy,
            "estimated": self.data.get("estimated", False)
        }
        return property_details_epc
@ -664,7 +642,7 @@ class Property(Definitions):
        :return:
        """
-        if self.data["fixed-lighting-outlets-count"] == "":
+        if self.data["fixed-lighting-outlets-count"] in [None, ""]:
            # We check old EPCs and the full SAP EPC
@ -693,3 +671,52 @@ class Property(Definitions):
        """
        self.current_adjusted_energy = current_adjusted_energy
        self.expected_adjusted_energy = expected_adjusted_energy
    def set_windows_count(self):
        """
        Using the estimate_windows function, this method will set the number of windows in the property
        :return:
        """
        self.number_of_windows = estimate_windows(
            property_type=self.data["property-type"],
            built_form=self.data["built-form"],
            construction_age_band=self.construction_age_band,
            floor_area=self.floor_area,
            number_habitable_rooms=self.number_of_rooms,
            extension_count=float(self.data["extension-count"]),
        )
    def set_solar_panel_area(self, photo_supply_lookup, floor_area_decile_thresholds):
        """
        Sets the approximate area of the solar panels
        :return:
        """
        if (self.insulation_floor_area is None) and (self.pitched_roof_area is None):
            raise ValueError(
                "Need to set insulation floor area and pitched roof area before setting solar pv roof area"
            )
        photo_supply_matched = SolarPhotoSupply.filter_photo_supply_lookup(
            photo_supply_lookup=photo_supply_lookup,
            floor_area_decile_thresholds=floor_area_decile_thresholds,
            tenure=self.data["tenure"],
            built_form=self.data["built-form"],
            property_type=self.data["property-type"],
            construction_age_band=self.construction_age_band,
            is_flat=self.roof["is_flat"],
            is_pitched=self.roof["is_pitched"],
            is_roof_room=self.roof["is_roof_room"],
            floor_area=self.floor_area
        )
        percentage_of_roof = photo_supply_matched["photo_supply_median"].mean()
        percentage_of_roof = percentage_of_roof / 100
        self.solar_pv_roof_area = (
            self.insulation_floor_area * percentage_of_roof if self.roof["is_flat"] else
            self.pitched_roof_area * percentage_of_roof
        )
        self.solar_pv_percentage = percentage_of_roof
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@ -1,12 +1,114 @@
 import os
 import time
 import re
 import usaddress
 import pandas as pd
 import numpy as np
 from epc_api.client import EpcClient
 from backend.OrdnanceSurvey import OrdnanceSuveyClient
 from BaseUtility import Definitions
 from utils.logger import setup_logger
 from typing import List
 from fuzzywuzzy import process
 logger = setup_logger()
 vartypes = {
    'low-energy-fixed-light-count': "Int64",
    # 'address': 'str',
    # 'uprn-source': 'str',
    'floor-height': 'float',
    'heating-cost-potential': 'float',
    'unheated-corridor-length': 'float',
    'hot-water-cost-potential': 'float',
    'construction-age-band': 'str',
    'potential-energy-rating': 'str',
    'mainheat-energy-eff': 'str',
    'windows-env-eff': 'str',
    'lighting-energy-eff': 'str',
    'environment-impact-potential': "Int64",
    'glazed-type': 'str',
    'heating-cost-current': 'float',
    'address3': 'str',
    'mainheatcont-description': 'str',
    'sheating-energy-eff': 'str',
    'property-type': 'str',
    'local-authority-label': 'str',
    'fixed-lighting-outlets-count': "Int64",
    'energy-tariff': 'str',
    'mechanical-ventilation': 'str',
    'hot-water-cost-current': 'str',
    'county': 'str',
    'postcode': 'str',
    'solar-water-heating-flag': 'str',
    'constituency': 'str',
    'co2-emissions-potential': 'float',
    'number-heated-rooms': 'float',
    'floor-description': 'str',
    'energy-consumption-potential': 'float',
    'local-authority': 'str',
    'built-form': 'str',
    'number-open-fireplaces': "Int64",
    'windows-description': 'str',
    'glazed-area': 'str',
    # 'inspection-date': str,
    'mains-gas-flag': 'str',
    'co2-emiss-curr-per-floor-area': 'float',
    'address1': 'str',
    'heat-loss-corridor': 'str',
    'flat-storey-count': "Int64",
    'constituency-label': 'str',
    'roof-energy-eff': 'str',
    'total-floor-area': 'float',
    'building-reference-number': 'str',
    'environment-impact-current': 'float',
    'co2-emissions-current': 'float',
    'roof-description': 'str',
    'floor-energy-eff': 'str',
    'number-habitable-rooms': 'float',
    'address2': 'str',
    'hot-water-env-eff': 'str',
    'posttown': 'str',
    'mainheatc-energy-eff': 'str',
    'main-fuel': 'str',
    'lighting-env-eff': 'str',
    'windows-energy-eff': 'str',
    'floor-env-eff': 'str',
    'sheating-env-eff': 'str',
    'lighting-description': 'str',
    'roof-env-eff': 'str',
    'walls-energy-eff': 'str',
    'photo-supply': 'float',
    'lighting-cost-potential': 'float',
    'mainheat-env-eff': 'str',
    'multi-glaze-proportion': 'float',
    'main-heating-controls': 'str',
    # 'lodgement-datetime',
    'flat-top-storey': 'str',
    'current-energy-rating': 'str',
    'secondheat-description': 'str',
    'walls-env-eff': 'str',
    'transaction-type': 'str',
    # 'uprn': "Int64",
    'current-energy-efficiency': 'float',
    'energy-consumption-current': 'float',
    'mainheat-description': 'str',
    'lighting-cost-current': 'float',
    # 'lodgement-date',
    'extension-count': "Int64",
    'mainheatc-env-eff': 'str',
    'lmk-key': 'str',
    'wind-turbine-count': "Int64",
    'tenure': 'str',
    'floor-level': 'str',
    'potential-energy-efficiency': "Int64",
    'hot-water-energy-eff': 'str',
    'low-energy-lighting': 'float',
    'walls-description': 'str',
    'hotwater-description': 'str'
 }
 class SearchEpc:
    """
@ -38,53 +140,127 @@ class SearchEpc:
        self,
        address1: str,
        postcode: str,
-        address2: str = None,
+        auth_token: str,
-        address3: str = None,
+        os_api_key: str,
-        address4: str = None,
+        full_address: str | None = None,
-        max_retries: int = None
+        max_retries: int = None,
        uprn: [int, None] = None,
        size=None,
        property_type=None,
    ):
        """
        Address lines 1 and postcode are mandatory fields. The other address lines are optional
        but can be used to find the epc for the home, if address1 and postcode are insufficient
        :param address1: string, propery's address line 1
        :param postcode: string, propery's postcode
-        :param address2: string, optional, propery's address line 2
+        :param full_address: string, optional parameter, the full address of the property
-        :param address3: string, optional, propery's address line 3
+        :param max_retries: int, optional, number of retries to make when searching the api
-        :param address4: string, optional, propery's address line 4
+        :param uprn: int, optional, the uprn of the property
        :param size: int, optional, the number of results to return. If not provided, defaults to 25 which is the api's
                        default
        :param property_type: str, optional, the property type of the property, if known before hand
        """
        self.address1 = address1
        self.postcode = postcode
-        self.address2 = address2
+        self.full_address = full_address
-        self.address3 = address3
+        self.uprn = uprn
-        self.address4 = address4
+        self.house_number = self.get_house_number(self.address1)
        self.numeric_house_number = self.extract_numeric_housenumber_part(self.house_number)
        self.max_retries = max_retries if max_retries is not None else self.MAX_RETRIES
-        self.client = EpcClient(auth_token=os.getenv("EPC_AUTH_TOKEN"))
+        self.client = EpcClient(auth_token=auth_token)
        self.ordnance_survey_client = OrdnanceSuveyClient(
            address=self.address1, postcode=self.postcode, api_key=os_api_key
        )
        self.data = None
        self.newest_epc = None
        self.older_epcs = None
        self.full_sap_epc = None
-    def search(self):
+        # These are the address and postcode values, which we store in the database
        self.address_clean = None
        self.postcode_clean = None
        self.size = size if size is not None else 25
        self.property_type = property_type
    @classmethod
    def get_house_number(cls, address: str) -> str | None:
        """
        This method will use the usaddress library to parse an address and extract the house number
        :return:
        """
        parsed = usaddress.parse(address)
        parsed_house_number = [x for x in parsed if (x[1] == "AddressNumber")]
        parsed_house_number = parsed_house_number[0][0] if parsed_house_number else None
        if parsed_house_number is None:
            # Because usaddress isn't optimal for parsing addresses with some prefixes such as 'Flat',
            # we also add a custom approach
            # Pattern to look for 'Flat' or 'Apartment' followed by a number, or just a number at the beginning
            pattern = r'(?i)(?:flat|apartment)\s*(\d+)|^\s*(\d+)'
            match = re.search(pattern, address)
            if match:
                # Return the first non-None group found
                return next(g for g in match.groups() if g is not None)
            else:
                return None
        # Remove training commas
        parsed_house_number = parsed_house_number.replace(",", "")
        return parsed_house_number
    @staticmethod
    def extract_numeric_housenumber_part(house_number: str | None) -> int | None:
        # Regular expression to find the first occurrence of one or more digits
        if house_number is None:
            return None
        match = re.search(r'\d+', house_number)
        if match:
            return int(match.group())
        else:
            return None
    def get_epc(self, params=None, size=None):
        # Get the EPC data with retries
        size = size if size is not None else self.size
        if params is None:
            if self.uprn:
                params = {"uprn": self.uprn}
            else:
                params = {"address": self.address1, "postcode": self.postcode}
        for retry in range(self.max_retries):
            try:
-                response = self.client.domestic.search(
+
-                    params={"address": self.address1, "postcode": self.postcode}
+                if "uprn" in params:
-                )
+                    # We use the direct call method inside, since we need to implement uprn as a valid
                    # parameter for the search function
                    url = os.path.join(self.client.domestic.host, "search")
                    response = self.client.domestic.call(method="get", url=url, params=params)
                else:
                    response = self.client.domestic.search(params=params, size=size)
                if response:
                    self.data = response
                    return self.SUCCESS
                if retry > 0:
-                    print("Failed previous attempt but retry successful")
+                    logger.info("Failed previous attempt but retry successful")
                # If we got nothing, final try
                if not response:
                    # TODO: Make a call to OS uprn service and get the address' uprn, just in case there is an
                    #       issue with how we are searching the api
                    return {
                        "status": 204,
                        "message": "no data",
@ -127,7 +303,6 @@ class SearchEpc:
        if len(uprns) == 1:
            return rows
        logger.error("Multiple UPRNS found - we should use an alternate method of searching - TODO")
        if property_type is not None:
            # We can do a filter on the property type
            rows_filtered = [r for r in rows if r["property-type"] == property_type]
@ -147,7 +322,24 @@ class SearchEpc:
            return rows
-    def retrieve(self, property_type=None, address=None):
+    @staticmethod
    def format_address(newest_epc):
        """
        Format address and postcode for storage in the database
        """
        postcode = newest_epc["postcode"]
        address = newest_epc["address"]
        # Format them
        address = address.replace(postcode, "").strip()
        address = address.rstrip(",").strip()
        address = address.title()
        postcode = postcode.upper()
        return address, postcode
    def extract_epc_data(self, address=None):
        """
        Given a successful search, this method will format the data and return it
@ -163,7 +355,7 @@ class SearchEpc:
        # Firstly, we should only have 1 urpn so if we have multiple, we'll need to filter down the
        # property further
-        rows = self.filter_rows(rows, property_type=property_type, address=None)
+        rows = self.filter_rows(rows, property_type=self.property_type, address=None)
        rows = self.filter_rows(rows, property_type=None, address=address)
        # We now check for a full sap epc:
@ -173,7 +365,26 @@ class SearchEpc:
        # Finally, we identify the newest epc and the rest, and then return
        newest_epc, older_epcs = self.filter_newest_epc(list_of_epcs=rows)
-        return newest_epc, older_epcs, full_sap_epc
+        # Retrieve postcode and address
        address_epc, postcode_epc = self.format_address(newest_epc=newest_epc)
        # Ge the uprn from the newest record for this home
        uprns = {r["uprn"] for r in rows if r["uprn"]}
        # We can sometimes have no uprn for a property
        if (len(uprns) == 0) and len(rows) > 0:
            logger.warning("Found data but missing uprn")
        elif len(uprns) != 1:
            # There is a possibility that we have multiple UPRNs for a single property, which is an error
            addresses = {r["address"] for r in rows}
            if len(addresses) == 1:
                # Take the uprn from the most recent
                uprns = {newest_epc["uprn"]}
            else:
                raise ValueError("Multiple UPRNs found - investigate me")
        uprn = uprns.pop() if uprns else None
        return newest_epc, older_epcs, full_sap_epc, address_epc, postcode_epc, uprn
    @staticmethod
    def filter_newest_epc(list_of_epcs: List):
@ -186,8 +397,334 @@ class SearchEpc:
            return {}, []
        if len(newest_response) != 1:
-            raise Exception("More than one result found for this address - investigate me")
+            # It is possible (but rare, and likely an error on EPC lodgement) that we have multiple EPCs that
            # were lodged at the exact same time. In this case, we will take the first one
            newest_response = [newest_response[0]]
        older_epcs = [epc for epc in list_of_epcs if epc["lmk-key"] != newest_response[0]["lmk-key"]]
        return newest_response[0], older_epcs
    @staticmethod
    def _get_epc_mode(col: str, epc_data: pd.DataFrame):
        """
        Simple method to extract the mode value from the EPC data
        :param col: name of the column to take the mode of
        :param epc_data: pandas dataframe of epc data
        """
        mode_value = epc_data[[col]].mode(dropna=True)
        if len(mode_value) != 1:
            raise NotImplementedError("TODO: Handle multiple modes")
        mode_value = mode_value.iloc[0][col]
        return mode_value
    def fetch_nearby_epcs(
        self, initial_postcode: str,
        lmks_to_drop: list[str] | None = None,
        built_form: str = "",
        property_type: str = ""
    ):
        """
        Fetches and processes EPC data for a given initial postcode, applying successive trimming
        to the postcode and filtering the data until a non-empty result set is found.
        The function queries the EPC API with the provided postcode, and if no data is found or
        if the data doesn't meet certain criteria, it progressively shortens the postcode by
        removing the last character and retries the query. This process continues until a valid
        set of EPC data is obtained or the postcode is exhausted.
        Additional filtering is applied to the obtained EPC data based on 'lmk-key', 'built-form',
        and 'property-type'. The data is also processed to extract and numerically interpret house
        numbers, calculate house number distances, and apply weights based on these distances.
        :param initial_postcode: The initial full postcode for the EPC data query.
        :param lmks_to_drop: List of 'lmk-key' values to be excluded from the EPC data.
        :param built_form: The 'built-form' value to be used for filtering the EPC data.
        :param property_type: The 'property-type' value to be used for filtering the EPC data.
        :return:
        """
        property_type_api_map = {
            "Bungalow": "bungalow",
            "Flat": "flat",
            "House": "house",
            "Maisonette": "maisonette",
            "Park home": "park home",
        }
        postcode = initial_postcode
        while postcode:
            # Fetch data from EPC API
            params = {"postcode": postcode}
            if property_type:
                params["property-type"] = property_type_api_map[property_type]
            # We take the 20 nearest homes of the relevant type, so not to pull in too many irrelevant homes
            epc_response = self.get_epc(params=params, size=100)
            if epc_response["status"] == 200:
                epc_data = pd.DataFrame(self.data["rows"])
                if lmks_to_drop is not None:
                    epc_data = epc_data[~epc_data["lmk-key"].isin(lmks_to_drop)]
                if not epc_data.empty:
                    # Further processing of the EPC data
                    epc_data['lodgement-datetime'] = pd.to_datetime(epc_data['lodgement-datetime'], format='mixed')
                    epc_data = epc_data.sort_values("lodgement-datetime", ascending=False).groupby("uprn").head(1)
                    epc_data["house_number"] = epc_data["address"].apply(lambda add1: self.get_house_number(add1))
                    epc_data["numeric_house_number"] = epc_data["house_number"].apply(
                        lambda house_num: self.extract_numeric_housenumber_part(house_num)
                    )
                    if self.numeric_house_number is None:
                        # If we don't have a house number, we treat all weights as equal
                        epc_data["weight"] = 1
                    else:
                        epc_data["house_number_distance"] = abs(
                            epc_data["numeric_house_number"] - self.numeric_house_number
                        )
                        # # We add 1, just in case we have a 0 weight (e.g. comparing house number 7a to 7b, or 9A to 9)
                        # epc_data["weight"] = 1 / (epc_data["house_number_distance"] + 1)
                        # # If we have a home without a house number, fill that weight with average
                        # epc_data["weight"] = epc_data["weight"].fillna(epc_data["weight"].mean())
                        # # Finally, we might not have any house numbers whatsoever so everything could be
                        # # missing, so we fill with 1
                        # epc_data["weight"] = epc_data["weight"].fillna(1)
                        # TODO: Testing
                        # If the postcode is different from the initial postcode, it doesn't make sense to have
                        # any weightings
                        if all(pd.isnull(epc_data["house_number_distance"])) or (postcode != initial_postcode):
                            epc_data["weight"] = 1
                        else:
                            epc_data["weight"] = 1 / np.sqrt(epc_data["house_number_distance"] + 1)
                            epc_data["weight"] = epc_data["weight"].fillna(epc_data["weight"].mean())
                    estimation_property_type = self._estimate_str(
                        key="property-type", estimation_data=epc_data
                    ) if property_type == "" else property_type
                    epc_built_form = self._estimate_str(
                        key="built-form",
                        estimation_data=epc_data[epc_data["property-type"] == estimation_property_type]
                    )
                    if built_form == "Semi-Detached" and epc_built_form in ["End-Terraced", "Mid-Terraced"]:
                        estimation_built_form = "End-Terraced"
                    elif (built_form == "") or (pd.isnull(built_form)):
                        estimation_built_form = epc_built_form
                    else:
                        estimation_built_form = built_form
                    # We handle some edge cases experiences with maisonettes - if built form is detatched, just filter
                    # on maisonette
                    # We also add some additional logic for Park homes, because they are far less common than other
                    # property types
                    is_maisonette_with_bad_built_form = (estimation_property_type == "Maisonette") & (
                        estimation_built_form in ["Detached", "Semi-Detached"]
                    )
                    is_park_home_without_built_form = (estimation_property_type == "Park home") & (
                        sum(epc_data["built-form"] == estimation_built_form) == 0
                    )
                    has_missing_built_form = not estimation_built_form
                    if is_maisonette_with_bad_built_form or is_park_home_without_built_form or has_missing_built_form:
                        epc_data = epc_data[epc_data["property-type"] == estimation_property_type]
                    else:
                        epc_data = epc_data[
                            (epc_data["built-form"] == estimation_built_form) & (
                                epc_data["property-type"] == estimation_property_type)
                            ]
                    if not epc_data.empty:
                        return epc_data  # Return the filtered data if it's not empty
            # Shorten the postcode by one character for the next iteration
            postcode = postcode[:-1].rstrip()
        # If loop finishes without a valid response, raise an exception
        raise Exception("Unable to find postcode data after trimming - investigate me")
    def estimate_epc(self, property_type, built_form, lmks_to_drop=None):
        """
        For a property that does not have an EPC, we retrieve the EPC data for the closest properties
        and estimate the EPC for the property in question.
        Note - do we have postcodes with just a single address? We would need to use a different approach
        to find the closest homes
        :param property_type:   This is the property type of the property we are estimating, that can be retrieved from
                                the ordnance survey api
        :param built_form:      This is the built form of the property we are estimating, that can be retrieved from
                                the ordnance survey api
        :param lmks_to_drop:    This is a list of LMK keys that should be dropped from the estimation process. This
                                is used as an override for testing, to drop EPCs for the property we are testing
        :return:
        """
        # From the ordnance survey data, we want to determine the property type and then use only similar property
        # types for the estimation process
        epc_data = self.fetch_nearby_epcs(
            initial_postcode=self.postcode,
            lmks_to_drop=lmks_to_drop,
            built_form=built_form,
            property_type=property_type
        )
        # For each attribute, we need to determine the datatype and use an appropriate method
        # to estimate.
        estimated_epc = {}
        for key, vartype in vartypes.items():
            epc_data[key] = np.where(pd.isnull(epc_data[key]), None, epc_data[key])
            epc_data[key] = np.where(epc_data[key] == "", None, epc_data[key])
            estimation_data = epc_data[[key, "weight", "lodgement-datetime"]].copy()
            estimation_data = estimation_data[~pd.isnull(estimation_data[key])]
            estimation_data = estimation_data[~estimation_data[key].isin(Definitions.DATA_ANOMALY_MATCHES)]
            if vartype == "Int64":
                # We have some edge cases where we get the error "invalid literal for int() with base 10: '1.0'"
                # so this handles this
                estimation_data[key] = estimation_data[key].astype(float).astype(vartype)
            else:
                estimation_data[key] = estimation_data[key].astype(vartype)
            if estimation_data.shape[0] == 0:
                estimated_epc[key] = None
                continue
            if vartype == "Int64":
                estimated_value = self._estimate_int(estimation_data, key)
            elif vartype == "float":
                estimated_value = self._estimate_float(estimation_data, key)
            elif vartype == "str":
                estimated_value = self._estimate_str(estimation_data, key)
            else:
                raise NotImplementedError("estimation method not implemented for type")
            estimated_epc[key] = estimated_value
        # Insert an estimated lodgement datetime, with a weighted average
        estimated_epc["lodgement-datetime"] = self.calculate_weighted_lodgement_datetime(epc_data=epc_data)
        # Extract logement date
        estimated_epc["lodgement-date"] = estimated_epc["lodgement-datetime"].strftime("%Y-%m-%d")
        estimated_epc["postcode"] = self.postcode
        estimated_epc["uprn"] = self.uprn
        estimated_epc["address"] = self.full_address
        # Indicate that this epc was estimated
        estimated_epc["estimated"] = True
        return estimated_epc
    @staticmethod
    def calculate_weighted_lodgement_datetime(epc_data):
        numeric_dates = pd.to_datetime(epc_data['lodgement-datetime']).view('int64')
        # Calculate the weighted sum of dates
        weighted_sum = (numeric_dates * epc_data['weight']).sum()
        # Calculate the sum of weights
        total_weights = epc_data['weight'].sum()
        # Calculate the weighted mean in numeric format
        weighted_mean_numeric = weighted_sum / total_weights
        # Convert the numeric weighted mean back to datetime
        weighted_mean_datetime = pd.to_datetime(weighted_mean_numeric)
        return weighted_mean_datetime
    @staticmethod
    def _estimate_int(estimation_data, key):
        return round(np.average(a=estimation_data[key], weights=estimation_data["weight"]))
    @staticmethod
    def _estimate_float(estimation_data, key):
        return round(np.average(a=estimation_data[key], weights=estimation_data["weight"]), 2)
    @staticmethod
    def _estimate_str(estimation_data, key):
        agg = estimation_data.groupby(key)["weight"].sum().reset_index()
        agg = agg[agg["weight"] == agg["weight"].max()]
        if agg.shape[0] != 1:
            # If we have multiple modes, we take the more recent data on average
            recent_grouped = estimation_data[
                estimation_data[key].isin(agg[key].values)
            ].groupby(key)["lodgement-datetime"].mean()
            newest_group = recent_grouped.idxmax()
            return newest_group
        return agg[key].values[0]
    def find_property(self, skip_os=False):
        """
        This method will attempt to identify a property. It will, at first, use the EPC api to try and
        find the EPC for the property and the associated UPRN. If this fails, it will use the Ordnance Survey API to
        find the UPRN of the address.
        Because no result may have been provided by the EPC api because of formatting issues with the address,
        if the ordnance survey api is used and the uprn retrieved, the EPC api is queried again with the UPRN, just
        as a final check to see if there is any EPC data.
        If there is no EPC data, the epc data will be estimated based on the surrounding properties
        """
        # Step 1: use the epc api to find the property and uprn
        response = self.get_epc()
        if response["status"] == 200:
            (
                self.newest_epc, self.older_epcs, self.full_sap_epc, self.address_clean, self.postcode_clean, self.uprn
            ) = self.extract_epc_data(address=self.full_address)
            return
        # Step 2: If we don't have an EPC, we use the ordnance survey api to find the uprn
        if skip_os:
            if self.ordnance_survey_client.property_type is not None:
                # We can try and estimate
                estimated_epc = self.estimate_epc(
                    property_type=self.ordnance_survey_client.property_type,
                    built_form=self.ordnance_survey_client.built_form
                )
                self.newest_epc = estimated_epc
                self.older_epcs = []
                self.full_sap_epc = {}
                # Finally, set a standardised address 1 and postcode
                self.address_clean = self.ordnance_survey_client.address_os
                self.postcode_clean = self.ordnance_survey_client.postcode_os
            return
        os_response = self.ordnance_survey_client.get_places_api()
        if os_response["status"] != 200:
            # Investigate this if it happens
            raise Exception("Unable to find property - investigate me")
        # Step 3: Now that we have a urpn, do another check against the epc api, this time searching with the uprn
        self.uprn = self.ordnance_survey_client.most_relevant_result["UPRN"]
        response = self.get_epc()
        if response["status"] == 200:
            (
                self.newest_epc, self.older_epcs, self.full_sap_epc, self.address_clean, self.postcode_clean, self.uprn
            ) = self.extract_epc_data()
            return
        # Step 4: If we still don't have an EPC, we estimate the EPC data
        self.full_address = self.ordnance_survey_client.most_relevant_result["ADDRESS"]
        estimated_epc = self.estimate_epc(
            property_type=self.ordnance_survey_client.property_type,
            built_form=self.ordnance_survey_client.built_form
        )
        self.newest_epc = estimated_epc
        self.older_epcs = []
        self.full_sap_epc = {}
        # Finally, set a standardised address 1 and postcode
        self.address_clean = self.ordnance_survey_client.address_os
        self.postcode_clean = self.ordnance_survey_client.postcode_os
        return
--- a/backend/app/config.py
+++ b/backend/app/config.py
@ -13,6 +13,7 @@ class Settings(BaseSettings):
    HEAT_PREDICTIONS_BUCKET: str
    PLAN_TRIGGER_BUCKET: str
    EPC_AUTH_TOKEN: str
    ORDNANCE_SURVEY_API_KEY: str
    DB_HOST: str
    DB_PASSWORD: str
    DB_USERNAME: str
--- a/backend/app/db/functions/property_functions.py
+++ b/backend/app/db/functions/property_functions.py
@ -11,7 +11,7 @@ from backend.app.db.models.portfolio import (
 from sqlalchemy.orm.exc import NoResultFound
-def create_property(session: Session, portfolio_id: int, address: str, postcode: str) -> (int, bool):
+def create_property(session: Session, portfolio_id: int, address: str, postcode: str, uprn: str) -> (int, bool):
    """
    This function will create a record for the property in the database if it does not exist.
    If it does exist, it will just update the updated_at field.
@ -25,7 +25,7 @@ def create_property(session: Session, portfolio_id: int, address: str, postcode:
    try:
        # Attempt to fetch the existing property
        existing_property = session.query(PropertyModel).filter_by(
-            address=address, postcode=postcode, portfolio_id=portfolio_id
+            uprn=uprn, portfolio_id=portfolio_id
        ).one()
        # Update the 'updated_at' field
@ -43,6 +43,7 @@ def create_property(session: Session, portfolio_id: int, address: str, postcode:
            address=address,
            postcode=postcode,
            portfolio_id=portfolio_id,
            uprn=uprn,
            creation_status=PropertyCreationStatus.LOADING,
            status=PortfolioStatus.ASSESSMENT.value,
            has_pre_condition_report=False,
--- a/backend/app/db/models/materials.py
+++ b/backend/app/db/models/materials.py
@ -19,7 +19,6 @@ class MaterialType(enum.Enum):
    flat_roof_insulation = "flat_roof_insulation"
    room_roof_insulation = "room_roof_insulation"
    windows_glazing = "windows_glazing"
    iwi_wall_demolition = "iwi_wall_demolition"
    iwi_vapour_barrier = "iwi_vapour_barrier"
--- a/backend/app/db/models/portfolio.py
+++ b/backend/app/db/models/portfolio.py
@ -153,6 +153,7 @@ class PropertyDetailsEpcModel(Base):
    primary_energy_consumption = Column(Float)
    co2_emissions = Column(Float)
    adjusted_energy_consumption = Column(Float)
    estimated = Column(Boolean, default=False)
 class PropertyDetailsSpatial(Base):
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@ -4,6 +4,7 @@ import numpy as np
 import pandas as pd
 from epc_api.client import EpcClient
 from etl.epc.Record import EPCRecord
 from backend.SearchEpc import SearchEpc
 from fastapi import APIRouter, Depends
 from sqlalchemy.exc import IntegrityError, OperationalError
 from sqlalchemy.orm import sessionmaker
@ -30,6 +31,8 @@ from backend.ml_models.api import ModelApi
 from backend.Property import Property
 from etl.epc.DataProcessor import EPCDataProcessor
 from etl.epc.settings import COLUMNS_TO_MERGE_ON
 from etl.solar.SolarPhotoSupply import SolarPhotoSupply
 from recommendations.optimiser.CostOptimiser import CostOptimiser
 from recommendations.optimiser.GainOptimiser import GainOptimiser
 from recommendations.optimiser.optimiser_functions import prepare_input_measures
@ -43,54 +46,6 @@ logger = setup_logger()
 BATCH_SIZE = 5
 class DummyDownloader:
    def __init__(self, postcode, address1, id, epc_client):
        self.id = id
        self.postcode = postcode
        self.address1 = address1
        self.data = None
        self.old_data = None
        self.epc_client = epc_client
    def search_address_epc(self):
        """
        This method searches for an address in the EPC database and returns the first result
        :return: property data
        """
        if self.data:
            return
        # This will fail if a property does not have an EPC - this has been documented as a case to handle
        response = self.epc_client.domestic.search(params={"address": self.address1, "postcode": self.postcode})
        # Check if we have a full sap EPC
        self.full_sap_epc = [r for r in response["rows"] if r["transaction-type"] == "new dwelling"]
        self.full_sap_epc = self.full_sap_epc[0] if self.full_sap_epc else self.full_sap_epc
        if len(response["rows"]) > 1:
            newest_response = [
                r for r in response["rows"] if
                r["lodgement-datetime"] == max([x["lodgement-datetime"] for x in response["rows"]])
            ]
            if len(newest_response) > 1:
                raise Exception("More than one result found for this address - investigate me")
            # We'll keep old EPCs in case it contains information, not present on the newest one
            self.old_data = [epc for epc in response["rows"] if epc["lmk-key"] != newest_response[0]["lmk-key"]]
            response["rows"] = newest_response
        self.data = response["rows"][0]
        # For the moment, if we don't have a UPRN, we don't do anything about it, however we'll handle this in
        # the future by using the Ordnance Survey places API
        if not self.data["uprn"]:
            logger.warning("We do not have a UPRN for this property")
        else:
            self.uprn = int(self.data["uprn"])
 router = APIRouter(
    prefix="/plan",
    tags=["plan"],
@ -103,37 +58,34 @@ router = APIRouter(
@router.post("/trigger")
 async def trigger_plan(body: PlanTriggerRequest):
    logger.info("Connecting to db")
-    # session = sessionmaker(bind=db_engine)()
+    session = sessionmaker(bind=db_engine)()
    created_at = datetime.now().isoformat()
    try:
        session.begin()
        logger.info("Getting the inputs")
        Body = {'portfolio_id': '56', 'housing_type': 'Social', 'goal': 'Increase EPC', 'goal_value': 'A', 'trigger_file_path': '8/56/windows_portfolio_inputs.csv'}
        body = PlanTriggerRequest(**Body)
        epc_client = EpcClient(auth_token=get_settings().EPC_AUTH_TOKEN)
        plan_input = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path)
        uprn_filenames = read_dataframe_from_s3_parquet(
            bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet"
        )
        cleaning_data = read_parquet_from_s3(
            bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
        )
        input_properties = []
        for config in plan_input:
            # We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
-            # TODO: implment validation. We should also standardise postcode and address in some fashion as
+
-            #       a postcode of abcdef would be considered different to ABCDEF
+            epc_searcher = SearchEpc(
                address1=config["address"],
                postcode=config["postcode"],
                auth_token=get_settings().EPC_AUTH_TOKEN,
                os_api_key=get_settings().ORDNANCE_SURVEY_API_KEY
            )
            epc_searcher.find_property()
            # Create a record in db
            property_id, is_new = create_property(
-                session, portfolio_id=body.portfolio_id, address=config['address'], postcode=config['postcode']
+                session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn
            )
            # if a new record was not created, we don't produduce recommendations
            if not is_new:
                continue
            # TODO: Need to add heat demand target
            create_property_targets(
                session,
@ -143,29 +95,21 @@ async def trigger_plan(body: PlanTriggerRequest):
                heat_demand_target=None
            )
            epc_downloader = DummyDownloader(id=0, epc_client=epc_client, postcode=config['postcode'], address1=config['address'])
            epc_downloader.search_address_epc()
            epc_records ={
-                'original_epc': epc_downloader.data.copy(),
+                'original_epc': epc_searcher.newest_epc,
-                'full_sap_epc': epc_downloader.full_sap_epc.copy() if epc_downloader.full_sap_epc else [],
+                'full_sap_epc': epc_searcher.full_sap_epc,
-                'old_data': epc_downloader.old_data.copy() if epc_downloader.old_data else []
+                'old_data': epc_searcher.old_data,
            }
            prepared_epc = EPCRecord(epc_records=epc_records, run_mode="newdata", cleaning_data=cleaning_data) # This uses all the epc records to clean the data
-            
+          
-            p = Property(
+            input_properties.append(
                Property(
                    id=property_id,
                    address1=config['address'],
                    postcode=config['postcode'],
                    epc_record=prepared_epc,
                )
            logger.info("Getting spatial data")
            p.get_spatial_data(uprn_filenames)
            input_properties.append(
                p
            )
@ -180,10 +124,19 @@ async def trigger_plan(body: PlanTriggerRequest):
        materials = get_materials(session)
        cleaned = get_cleaned()
-        logger.info("Getting components and epc recommendations")
+        uprn_filenames = read_dataframe_from_s3_parquet(
            bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet"
        )
        cleaning_data = read_dataframe_from_s3_parquet(
            bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
        )
        photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket=get_settings().DATA_BUCKET)
-        # TODO: Move this to a class. We probably want a Recommender class which takes the injects the optimisers
+        logger.info("Getting spatial data")
-        #      in as a dependency and then the optimisers can take the input measures in as part of the setup() method
+        for p in input_properties:
            p.get_spatial_data(uprn_filenames)
        logger.info("Getting components and epc recommendations")
        recommendations = {}
        recommendations_scoring_data = []
@ -192,7 +145,7 @@ async def trigger_plan(body: PlanTriggerRequest):
        for p in input_properties:
            # Property recommendations
-            p.get_components(cleaned)
+            p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
            recommender = Recommendations(property_instance=p, materials=materials)
            property_recommendations = recommender.recommend()
--- a/backend/app/plan/utils.py
+++ b/backend/app/plan/utils.py
@ -175,11 +175,34 @@ def create_recommendation_scoring_data(
        scoring_dict["LOW_ENERGY_LIGHTING_ENDING"] = 100
        scoring_dict["LIGHTING_ENERGY_EFF_STARTING"] = "Very Good"
    if recommendation["type"] == "windows_glazing":
        scoring_dict["MULTI_GLAZE_PROPORTION_ENDING"] = 100
        scoring_dict["WINDOWS_ENERGY_EFF_ENDING"] = "Average"
        is_secondary_glazing = recommendation["is_secondary_glazing"]
        if scoring_dict["glazing_type_ENDING"] == "multiple":
            pass
        elif scoring_dict["glazing_type_ENDING"] == "single":
            scoring_dict["glazing_type_ENDING"] = "secondary" if is_secondary_glazing else "double"
        elif scoring_dict["glazing_type_ENDING"] == "double":
            scoring_dict["glazing_type_ENDING"] = "multiple" if is_secondary_glazing else "double"
        elif scoring_dict["glazing_type_ENDING"] == "secondary":
            scoring_dict["glazing_type_ENDING"] = "secondary" if is_secondary_glazing else "multiple"
        elif scoring_dict["glazing_type_ENDING"] in ["triple", "high performance"]:
            scoring_dict["glazing_type_ENDING"] = "multiple"
        else:
            raise ValueError("Invalid glazing type - implement me")
    if recommendation["type"] == "solar_pv":
        scoring_dict["PHOTO_SUPPLY_ENDING"] = recommendation["photo_supply"]
    if recommendation["type"] not in [
        "mechanical_ventilation", "sealing_open_fireplace", "low_energy_lighting",
        "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation",
        "loft_insulation", "room_roof_insulation", "flat_roof_insulation",
-        "solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation"
+        "solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation",
        "windows_glazing", "solar_pv"
    ]:
        raise NotImplementedError("Implement me")
--- a/backend/app/utils.py
+++ b/backend/app/utils.py
@ -121,19 +121,6 @@ def epc_to_sap_lower_bound(epc: str):
        raise ValueError("EPC rating should be between A and G")
 def read_parquet_from_s3(bucket_name, file_key):
    client = boto3.client('s3')
    # Get the object
    s3_object = client.get_object(Bucket=bucket_name, Key=file_key)
    # Read the CSV body into a DataFrame
    csv_body = s3_object["Body"].read()
    df = pd.read_parquet(BytesIO(csv_body))
    return df
 def save_dataframe_to_s3_parquet(df, bucket_name, file_key):
    """
    Save a pandas DataFrame to S3 as a Parquet file.
--- a/backend/ml_models/Valuation.py
+++ b/backend/ml_models/Valuation.py
@ -19,7 +19,9 @@ class PropertyValuation:
        100070505235: 344000,  # Based on Zoopla's estimation of 131 School road, which is also semi-detached
        100070513306: 182000,  # Based on Zoopla's estimation of 61 Simmons Drive
        100071306896: 77000,  # Based on Flat 2 of 44 Wedgewood Road on Zoopla
-        100021192109: 650000  # Based on Zoopla
+        100021192109: 650000,  # Based on Zoopla
        766249482: 358000,  # Based on Zoopla estimate for 19 Spring Lane, 3 bedroom semi-detached
        100120703802: 277000,  # Based on Zoopla
    }
    # We base our valuation uplifts on a number of sources
@ -93,7 +95,13 @@ class PropertyValuation:
        value = cls.UPRN_VALUE_LOOKUP.get(property_instance.uprn)
        if not value:
-            raise ValueError("Have not implemented valuation for this property")
+            return {
                "current_value": None,
                "lower_bound_increased_value": None,
                "upper_bound_increased_value": None,
                "average_increased_value": None,
                "average_increase": None
            }
        current_epc = property_instance.data["current-energy-rating"]
        # We get the spectrum of ratings between the current and target EPC
@ -119,4 +127,5 @@ class PropertyValuation:
            "lower_bound_increased_value": value * (1 + min_increase),
            "upper_bound_increased_value": value * (1 + max_increase),
            "average_increased_value": value * (1 + avg_increase),
            "average_increase": value * (1 + avg_increase) - value
        }
--- a/backend/ml_models/api.py
+++ b/backend/ml_models/api.py
@ -2,8 +2,7 @@ import pandas as pd
 import requests
 from requests.exceptions import RequestException
 from utils.logger import setup_logger
-from utils.s3 import save_dataframe_to_s3_parquet
+from utils.s3 import save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet
 from backend.app.utils import read_parquet_from_s3
 logger = setup_logger()
@ -125,7 +124,7 @@ class ModelApi:
            # Retrieve the predictions
            predictions_df = pd.DataFrame(
-                read_parquet_from_s3(
+                read_dataframe_from_s3_parquet(
                    bucket_name=predictions_bucket,
                    file_key=response["storage_filepath"].split(predictions_bucket + "/")[1]
                )
--- a/backend/requirements/base.txt
+++ b/backend/requirements/base.txt
@ -35,4 +35,5 @@ mip==1.15.0
 boto3==1.28.3
 pandas==1.5.3
 pyarrow==12.0.1
-textblob
+textblob
 usaddress==0.5.10
--- a/backend/tests/test_property.py
+++ b/backend/tests/test_property.py
@ -9,6 +9,7 @@ from etl.epc_clean.EpcClean import EpcClean
 mock_epc_response = {
    "rows": [
        {
            "tenure": "rental (social)",
            "lmk-key": 1,
            "uprn": 1,
            "number-habitable-rooms": 5,
@ -17,7 +18,7 @@ mock_epc_response = {
            "inspection-date": "2023-06-01",
            'lodgement-datetime': '2023-06-01 20:29:01',
            "some-other-key": "some-value",
-            "roof-description": "Roof Description",
+            "roof-description": "pitched, no insulation",
            "walls-description": "Walls Description",
            "windows-description": "Windows Description",
            "mainheat-description": "Main Heating Description",
@ -37,7 +38,8 @@ mock_epc_response = {
            "floor-height": 2.5,
            "total-floor-area": 100,
            "construction-age-band": "England and Wales: 1967-1975",
-            "floor-description": "Floor Description"
+            "floor-description": "Floor Description",
            "floor-level": "Ground"
        },
        {
            "lmk-key": 2,
@ -68,7 +70,8 @@ mock_epc_response = {
            "floor-height": 2.5,
            "total-floor-area": 100,
            "construction-age-band": "England and Wales: 1967-1975",
-            "floor-description": "Floor Description"
+            "floor-description": "Floor Description",
            "floor-level": "Ground"
        }
    ]
 }
@ -100,7 +103,8 @@ mock_epc_response_dupe = {
            "floor-height": 2.5,
            "total-floor-area": 100,
            "construction-age-band": "England and Wales: 1967-1975",
-            "floor-description": "Floor Description"
+            "floor-description": "Floor Description",
            "floor-level": "Ground"
        },
        {
            "lmk-key": 2,
@ -128,7 +132,8 @@ mock_epc_response_dupe = {
            "floor-height": 2.5,
            "total-floor-area": 100,
            "construction-age-band": "England and Wales: 1967-1975",
-            "floor-description": "Floor Description"
+            "floor-description": "Floor Description",
            "floor-level": "Ground"
        },
        {
            "lmk-key": 3,
@ -156,36 +161,62 @@ mock_epc_response_dupe = {
            "floor-height": 2.5,
            "total-floor-area": 100,
            "construction-age-band": "England and Wales: 1967-1975",
-            "floor-description": "Floor Description"
+            "floor-description": "Floor Description",
            "floor-level": "Ground"
        }
    ]
 }
 class TestProperty:
    @pytest.fixture(autouse=True)
-    def property_instance(self, mock_epc_client, mock_cleaner):
+    def mock_photo_supply_lookup(self):
-        property_instance = Property(1, "AB12CD", "Test Address", epc_client=mock_epc_client)
+        return pd.DataFrame(
            [
                dict(
                    tenure="rental (social)",
                    built_form="Detached",
                    property_type="House",
                    construction_age_band="England and Wales: 1967-1975",
                    is_flat=False,
                    is_pitched=True,
                    is_roof_room=False,
                    floor_area_decile=2,
                    photo_supply_median=40
                )
            ]
        )
    @pytest.fixture(autouse=True)
    def mock_floor_area_decile_thresholds(self):
        return pd.DataFrame(
            {"floor_area_decile_thresholds": [0, 10, 30, 50]}
        )
    @pytest.fixture(autouse=True)
    def property_instance(self, mock_cleaner):
        property_instance = Property(id=1, postcode="AB12CD", address="Test Address", data=mock_epc_response["rows"][0])
        return property_instance
    @pytest.fixture(autouse=True)
-    def property_instance_dupe_data(self, mock_epc_client_dupe_data):
+    def property_instance_dupe_data(self):
-        property_instance_dupe_data = Property(2, "AB12CD", "Test Address", epc_client=mock_epc_client_dupe_data)
+        property_instance_dupe_data = Property(id=2, postcode="AB12CD", address="Test Address")
        return property_instance_dupe_data
-    @pytest.fixture
+    # @pytest.fixture
-    def mock_epc_client(self):
+    # def mock_epc_client(self):
-        mock_epc_client = Mock(spec=EpcClient(auth_token="mocked_auth_token"))
+    #     mock_epc_client = Mock(spec=EpcClient(auth_token="mocked_auth_token"))
-        mock_epc_client.domestic.search.return_value = mock_epc_response.copy()
+    #     mock_epc_client.domestic.search.return_value = mock_epc_response.copy()
-        mock_epc_client.auth_token = "mocked_auth_token"
+    #     mock_epc_client.auth_token = "mocked_auth_token"
-        return mock_epc_client
+    #     return mock_epc_client
-
+    #
-    @pytest.fixture
+    # @pytest.fixture
-    def mock_epc_client_dupe_data(self):
+    # def mock_epc_client_dupe_data(self):
-        mock_epc_client_dupe_data = Mock(spec=EpcClient(auth_token="mocked_auth_token"))
+    #     mock_epc_client_dupe_data = Mock(spec=EpcClient(auth_token="mocked_auth_token"))
-        mock_epc_client_dupe_data.domestic.search.return_value = mock_epc_response_dupe.copy()
+    #     mock_epc_client_dupe_data.domestic.search.return_value = mock_epc_response_dupe.copy()
-        mock_epc_client_dupe_data.auth_token = "mocked_auth_token"
+    #     mock_epc_client_dupe_data.auth_token = "mocked_auth_token"
-        return mock_epc_client_dupe_data
+    #     return mock_epc_client_dupe_data
    @pytest.fixture
    def mock_cleaner(self):
@ -224,7 +255,11 @@ class TestProperty:
        }
        mock_cleaner.cleaned = {
-            "roof-description": [{"original_description": "Roof Description"}],
+            "roof-description": [
                {"original_description": "Roof Description"},
                {"original_description": "pitched, no insulation", "is_pitched": True, "is_flat": False,
                 "is_roof_room": False}
            ],
            "walls-description": [walls_data],
            "windows-description": [{"original_description": "Windows Description"}],
            "mainheat-description": [{"original_description": "Main Heating Description"}],
@ -235,37 +270,32 @@ class TestProperty:
        }
        return mock_cleaner
-    def test_init(self, mock_epc_client):
+    def test_init(self):
-        inst1 = Property(0, "AB12CD", "Test Address", epc_client=mock_epc_client)
+        inst1 = Property(0, postcode="AB12CD", address="Test Address")
        # Should be mocked auth token
        assert inst1.epc_client.auth_token == "mocked_auth_token"
-        inst2 = Property(3, "AB12CD", "Test Address", epc_client=mock_epc_client)
+        assert inst1.data is None
        assert inst2.epc_client.auth_token
-        inst3 = Property(4, "AB12CD", "Test Address", data={"some": "data"}, epc_client=mock_epc_client)
+        inst2 = Property(3, "AB12CD", "Test Address")
-        assert inst3.data == {"some": "data"}
+        assert inst2.id == 3
-        data = inst3.search_address_epc()
+        inst3 = Property(4, "AB12CD", "Test Address", data={"some": "data", "uprn": 123})
-        assert data is None
+        assert inst3.data == {"some": "data", "uprn": 123}
-    def test_search_address_epc(self, property_instance):
+    def test_get_components(
-        # Call the method to test
+        self, property_instance, mock_cleaner, mock_photo_supply_lookup, mock_floor_area_decile_thresholds
-        property_instance.search_address_epc()
+    ):
-
+        property_instance.get_components(
-        # Verify that the correct data is being returned
+            mock_cleaner.cleaned,
-        assert property_instance.data == mock_epc_response["rows"][0]
+            photo_supply_lookup=mock_photo_supply_lookup,
-
+            floor_area_decile_thresholds=mock_floor_area_decile_thresholds
-    def test_search_address_epc_multiple_results(self, property_instance_dupe_data, mock_epc_client_dupe_data):
+        )
        with pytest.raises(Exception, match="More than one result found for this address - investigate me"):
            property_instance_dupe_data.search_address_epc()
    def test_get_components(self, property_instance, mock_cleaner, mock_epc_client):
        property_instance.search_address_epc()
        property_instance.get_components(mock_cleaner.cleaned)
        # Verify that the components are set correctly
-        assert property_instance.roof == {"original_description": "Roof Description"}
+        assert property_instance.roof == {
            'original_description': 'pitched, no insulation', 'is_pitched': True,
            'is_flat': False, 'is_roof_room': False
        }
        assert property_instance.walls == {
            "original_description": "Walls Description",
            "is_cavity_wall": True,
@ -289,24 +319,15 @@ class TestProperty:
        # Verify that ValueError is raised when EpcClean doesn't contain cleaned data
        with pytest.raises(ValueError, match="Cleaner does not contain cleaned data"):
-            property_instance.get_components(mock_cleaner.cleaned)
+            property_instance.get_components(mock_cleaner.cleaned, pd.DataFrame(), pd.DataFrame())
-    def test_get_components_no_data(self, property_instance, mock_cleaner):
+    def test_get_components_no_attributes(
        self, property_instance, mock_cleaner, mock_photo_supply_lookup, mock_floor_area_decile_thresholds
    ):
        # Modify the mock cleaner to have no attributes for a specific description
        mock_cleaner.cleaned = {
            "roof-description": []
        }
        # Verify that ValueError is raised when no attributes are found
        with pytest.raises(ValueError, match="Property does not contain data"):
            property_instance.get_components(mock_cleaner.cleaned)
    def test_get_components_no_attributes(self, property_instance, mock_cleaner):
        # Modify the mock cleaner to have no attributes for a specific description
        mock_cleaner.cleaned = {
            "roof-description": []
        }
        property_instance.search_address_epc()
        property_instance.data["roof-description"] = "Pitched, no insulation"
        property_instance.walls = {
            "original_description": "Walls Description",
@ -327,14 +348,17 @@ class TestProperty:
        }
        # Assert backup cleaning has been applied
-        property_instance.get_components(mock_cleaner.cleaned)
+        property_instance.get_components(
            mock_cleaner.cleaned, mock_photo_supply_lookup, mock_floor_area_decile_thresholds
        )
        assert property_instance.roof["clean_description"] == "Pitched, no insulation"
        assert property_instance.roof["is_pitched"]
-    def test_get_components_multiple_attributes(self, property_instance, mock_cleaner):
+    def test_get_components_multiple_attributes(
        self, property_instance, mock_cleaner, mock_photo_supply_lookup, mock_floor_area_decile_thresholds
    ):
        # This shouldn't happen - it would mean a cleaning error
        property_instance.search_address_epc()
        property_instance.data["roof-description"] = "Roof Description"
        cleaned = {
            "roof-description": [
@ -345,10 +369,10 @@ class TestProperty:
        # Verify that ValueError is raised when multiple attributes are found
        with pytest.raises(ValueError, match="Either No attributes or multiple found for roof-description"):
-            property_instance.get_components(cleaned)
+            property_instance.get_components(cleaned, mock_photo_supply_lookup, mock_floor_area_decile_thresholds)
-    def test_set_spatial(self, mock_epc_client):
+    def test_set_spatial(self):
-        prop = Property(1, "AB12CD", "Test Address", mock_epc_client)
+        prop = Property(1, postcode="AB12CD", address="Test Address")
        spatial1 = pd.DataFrame([{
            'X_COORDINATE': 411143.0, 'Y_COORDINATE': 281701.0, 'LATITUDE': 52.4331896, 'LONGITUDE': -1.8375238,
@ -362,7 +386,7 @@ class TestProperty:
        assert prop.is_heritage
        assert prop.restricted_measures
-        prop2 = Property(1, "AB12CD", "Test Address", mock_epc_client)
+        prop2 = Property(1, "AB12CD", "Test Address")
        spatial2 = pd.DataFrame([{
            'X_COORDINATE': 411143.0, 'Y_COORDINATE': 281701.0, 'LATITUDE': 52.4331896, 'LONGITUDE': -1.8375238,
@ -376,10 +400,10 @@ class TestProperty:
        assert not prop2.is_heritage
        assert not prop2.restricted_measures
-    def test_set_floor_level(self, mock_epc_client):
+    def test_set_floor_level(self):
        # In this case, we have a flat which looks looks it's on the first floor, but it's actually on the ground
        # floor, so we should set floor_level to 0
-        prop = Property(1, "AB12CD", "Test Address", mock_epc_client)
+        prop = Property(1, postcode="AB12CD", address="Test Address")
        prop.data = {'floor-level': '01', 'property-type': 'Flat'}
        prop.floor = {
            'original_description': 'Solid, no insulation (assumed)', 'clean_description': 'Solid, no insulation',
@ -395,7 +419,7 @@ class TestProperty:
        # This property is labelled as being on the ground floor but actually has another property below
        # so we set floor level to 1
-        prop2 = Property(1, "AB12CD", "Test Address", mock_epc_client)
+        prop2 = Property(1, postcode="AB12CD", address="Test Address")
        prop2.data = {'floor-level': 'Ground', 'property-type': 'Flat'}
        prop2.floor = {
            'original_description': '(Another dwelling below)', 'clean_description': 'Solid, no insulation',
@ -410,7 +434,7 @@ class TestProperty:
        assert prop2.floor_level == 1
        # this property is correctly labelled as being on the 2nd floor
-        prop3 = Property(1, "AB12CD", "Test Address", mock_epc_client)
+        prop3 = Property(1, postcode="AB12CD", address="Test Address")
        prop3.data = {'floor-level': '02', 'property-type': 'Flat'}
        prop3.floor = {
            'original_description': '(Another dwelling below)', 'clean_description': 'Solid, no insulation',
@ -425,7 +449,7 @@ class TestProperty:
        assert prop3.floor_level == 2
        # Example of a house
-        prop4 = Property(1, "AB12CD", "Test Address", mock_epc_client)
+        prop4 = Property(1, postcode="AB12CD", address="Test Address")
        prop4.data = {'floor-level': '', 'property-type': 'House'}
        prop4.floor = {
            'original_description': '(Another dwelling below)', 'clean_description': 'Solid, no insulation',
--- a/backend/tests/test_sap_model_prep.py
+++ b/backend/tests/test_sap_model_prep.py
@ -2,13 +2,11 @@ from backend.Property import Property
 from etl.epc.DataProcessor import DataProcessor
 from backend.app.plan.utils import create_recommendation_scoring_data, get_cleaned
 from etl.epc.settings import COLUMNS_TO_MERGE_ON
 from epc_api.client import EpcClient
 import pandas as pd
 import pytest
 import msgpack
 from utils.s3 import read_dataframe_from_s3_parquet, read_from_s3
 from tqdm import tqdm
 # Handy code for selecting testing data
@ -122,7 +120,21 @@ class TestSapModelPrep:
        cleaned = msgpack.unpackb(cleaned, raw=False)
        return cleaned
-    def test_fill_cavity_wall(self, cleaned, cleaning_data):
+    @pytest.fixture
    def photo_supply_lookup(self):
        photo_supply_lookup = read_dataframe_from_s3_parquet(
            bucket_name="retrofit-data-dev", file_key="solar_pv_supply/photo_supply_lookup.parquet",
        )
        return photo_supply_lookup
    @pytest.fixture
    def floor_area_decile_thresholds(self):
        floor_area_decile_thresholds = read_dataframe_from_s3_parquet(
            bucket_name="retrofit-data-dev", file_key="solar_pv_supply/floor_area_decile_thresholds.parquet",
        )
        return floor_area_decile_thresholds
    def test_fill_cavity_wall(self, cleaned, cleaning_data, photo_supply_lookup, floor_area_decile_thresholds):
        """
        We ensure that the process that prepares the data in the engine code results in the same data as
        the model is trained on
@ -288,11 +300,10 @@ class TestSapModelPrep:
        home = Property(
            id=0,
            postcode=starting_epc["postcode"],
-            address1=starting_epc["address1"],
+            address=starting_epc["address1"],
            epc_client=EpcClient(auth_token="notoken"),
            data=starting_epc
        )
-        home.get_components(cleaned)
+        home.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
        data_processor = DataProcessor(None, newdata=True)
        data_processor.insert_data(pd.DataFrame([home.get_model_data()]))
@ -356,7 +367,7 @@ class TestSapModelPrep:
            assert test_record[c].values[0] == row[c]
-    def test_internal_wall_insulation(self, cleaned, cleaning_data):
+    def test_internal_wall_insulation(self, cleaned, cleaning_data, photo_supply_lookup, floor_area_decile_thresholds):
        starting_epc2 = {
            'low-energy-fixed-light-count': '2', 'address': 'FLAT 12, WAREHOUSE W, 3 WESTERN GATEWAY',
@ -508,11 +519,10 @@ class TestSapModelPrep:
        home2 = Property(
            id=0,
            postcode=starting_epc2["postcode"],
-            address1=starting_epc2["address1"],
+            address=starting_epc2["address1"],
            epc_client=EpcClient(auth_token="notoken"),
            data=starting_epc2
        )
-        home2.get_components(cleaned)
+        home2.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
        home2.set_number_lighting_outlets(None)
        data_processor2 = DataProcessor(None, newdata=True)
@ -578,7 +588,7 @@ class TestSapModelPrep:
            assert test_record2[c].values[0] == row2[c]
-    def test_ventilation(self, cleaned, cleaning_data):
+    def test_ventilation(self, cleaned, cleaning_data, photo_supply_lookup, floor_area_decile_thresholds):
        starting_epc3 = {
            'low-energy-fixed-light-count': '', 'address': '45 Shepperson Road', 'uprn-source': 'Energy Assessor',
@ -728,11 +738,10 @@ class TestSapModelPrep:
        home3 = Property(
            id=0,
            postcode=starting_epc3["postcode"],
-            address1=starting_epc3["address1"],
+            address=starting_epc3["address1"],
            epc_client=EpcClient(auth_token="notoken"),
            data=starting_epc3
        )
-        home3.get_components(cleaned)
+        home3.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
        home3.set_number_lighting_outlets(None)
        data_processor3 = DataProcessor(None, newdata=True)
@ -782,7 +791,7 @@ class TestSapModelPrep:
            assert test_record3[c].values[0] == row3[c]
-    def test_fireplaces(self, cleaned, cleaning_data):
+    def test_fireplaces(self, cleaned, cleaning_data, photo_supply_lookup, floor_area_decile_thresholds):
        starting_epc4 = {
            'low-energy-fixed-light-count': '', 'address': '9 Glebe Road, Asfordby Hill',
@ -937,11 +946,10 @@ class TestSapModelPrep:
        home4 = Property(
            id=0,
            postcode=starting_epc4["postcode"],
-            address1=starting_epc4["address1"],
+            address=starting_epc4["address1"],
            epc_client=EpcClient(auth_token="notoken"),
            data=starting_epc4
        )
-        home4.get_components(cleaned)
+        home4.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
        home4.set_number_lighting_outlets(None)
        data_processor4 = DataProcessor(None, newdata=True)
--- a/etl/costs/app.py
+++ b/etl/costs/app.py
@ -75,6 +75,7 @@ def app():
    ewi_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="external_wall_insulation", header=0)
    lel_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="low_energy_lighting", header=0)
    flat_roof_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="flat_roof_insulation", header=0)
    window_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="window_glazing", header=0)
    # Form a single table to be uploaded
    costs = pd.concat(
--- a/etl/eligibility/Eligibility.py
+++ b/etl/eligibility/Eligibility.py
@ -33,6 +33,7 @@ class Eligibility:
    # If the loft has less than 100mm of insulation, we classify the home has needing loft insulation
    LOFT_INSULATION_THRESHOLD = 100
    HIGH_LOFT_INSULATION_THRESHOLD = 269
    # Because EPCS have different values for tenure, we need to remap them to a common set of values
    tenure_remap = {
@ -104,6 +105,8 @@ class Eligibility:
            self.LOFT_INSULATION_THRESHOLD if loft_thickness_threshold is None else loft_thickness_threshold
        )
        high_loft_thickness_threshold = self.HIGH_LOFT_INSULATION_THRESHOLD
        # We firstly check if the roof is a loft
        is_loft = self.roof["is_pitched"] and (not self.roof["is_roof_room"])
@ -122,7 +125,22 @@ class Eligibility:
            is_flat=self.roof["is_flat"]
        )
-        if insulation_thickness > loft_thickness_threshold:
+        if insulation_thickness <= loft_thickness_threshold:
            self.loft = {
                "suitability": True,
                "thickness": insulation_thickness,
                "reason": None
            }
        if insulation_thickness <= high_loft_thickness_threshold:
            self.loft = {
                "suitability": True,
                "thickness": insulation_thickness,
                "reason": "high loft thickness but below regulation"
            }
            return
        if insulation_thickness > high_loft_thickness_threshold:
            # Insulation is already thick enough
            self.loft = {
                "suitability": False,
@ -131,12 +149,6 @@ class Eligibility:
            }
            return
        self.loft = {
            "suitability": True,
            "thickness": insulation_thickness,
            "reason": None
        }
    def cavity_insulation(self):
        """
@ -152,9 +164,25 @@ class Eligibility:
        is_partial_filled = (
            self.walls["is_as_built"] and self.walls["insulation_thickness"] not in ["below average"]
        )
        # We look for potentially under performing cavities - anything that is assumed, as built and insulated
        is_underperforming = (
            self.walls["is_as_built"] and self.walls["insulation_thickness"] in ["average"] and self.walls["is_assumed"]
        )
        is_unfilled_cavity = is_cavity and is_empty
        is_partial_filled_cavity = is_cavity and is_partial_filled
        is_underperforming_cavity = is_cavity and is_underperforming
        # Check if it has internal or external wall insulation
        has_internal_wall_insulation = self.walls["internal_insulation"]
        has_external_wall_insulation = self.walls["external_insulation"]
        if has_internal_wall_insulation or has_external_wall_insulation:
            self.cavity = {
                "suitability": False,
                "type": "internal or external wall insulation"
            }
            return
        if is_unfilled_cavity:
            self.cavity = {
@ -170,6 +198,13 @@ class Eligibility:
            }
            return
        if is_underperforming_cavity:
            self.cavity = {
                "suitability": True,
                "type": "underperforming"
            }
            return
        self.cavity = {
            "suitability": False,
            "type": "full"
@ -223,6 +258,14 @@ class Eligibility:
        }
    def suspended_floor_insulation(self):
        if "no_data" in self.floor.keys():
            if self.floor["no_data"]:
                self.suspended_floor = {
                    "suitability": False,
                }
                return
        is_suspended = self.floor["is_suspended"]
        is_insulated = self.floor["insulation_thickness"] in ["average", "above average"]
@ -232,6 +275,14 @@ class Eligibility:
        return
    def solid_floor_insulation(self):
        if "no_data" in self.floor.keys():
            if self.floor["no_data"]:
                self.solid_floor = {
                    "suitability": False,
                }
                return
        is_solid = self.floor["is_solid"]
        is_insulated = self.floor["insulation_thickness"] in ["average", "above average"]
@ -305,7 +356,8 @@ class Eligibility:
        """
        current_sap = int(self.epc["current-energy-efficiency"])
-        if current_sap > 54:
+
        if current_sap >= 69:
            self.eco4_warmfront = {
                "eligible": False,
                "message": "sap too high"
@ -319,9 +371,22 @@ class Eligibility:
        is_eligible = self.cavity["suitability"] & self.loft["suitability"]
        if post_retrofit_sap is None:
            if current_sap >= 55:
                message = "Possibly eligible but property currently EPC D"
            else:
                message = "subject to post retrofit sap" if is_eligible else "not eligible"
            # Update the message to flag properties that failed just because of a full cavity.
            # We need to double check that the wall is a cavity, that the loft is suitable and that the
            # sap is within reason
            # We can then estimate the age of the cavity fill
            if not is_eligible and (current_sap < 69) and self.loft["suitability"] and self.walls["is_cavity_wall"]:
                message = "Failed due to full cavity - check cavity age"
            self.eco4_warmfront = {
                "eligible": is_eligible,
-                "message": "subject to post retrofit sap"
+                "message": message
            }
            return
--- a/etl/eligibility/ha_15_32/app.py
+++ b/etl/eligibility/ha_15_32/app.py
@ -11,13 +11,12 @@ import numpy as np
 import msgpack
 from datetime import datetime, timedelta
 from utils.logger import setup_logger
-from utils.s3 import read_from_s3
+from utils.s3 import read_from_s3, read_dataframe_from_s3_parquet
 from dotenv import load_dotenv
 from backend.SearchEpc import SearchEpc
 from backend.Property import Property
 from etl.eligibility.Eligibility import Eligibility
 from etl.epc.DataProcessor import DataProcessor
 from backend.app.utils import read_parquet_from_s3
 from backend.app.plan.utils import create_recommendation_scoring_data
 from etl.epc.settings import COLUMNS_TO_MERGE_ON
 from backend.ml_models.api import ModelApi
@ -247,6 +246,8 @@ def merge_ha_15(asset_list, identified_addresses):
        identified_addresses = identified_addresses.drop_duplicates("merge_key")
    # We pull out raw counts for the survey lists
    # Check asset list for dupes
    asset_list_dupes = asset_list["merge_key"].duplicated()
    if asset_list_dupes.sum():
@ -336,7 +337,10 @@ def merge_ha_15(asset_list, identified_addresses):
    return merged_data, dropped_identified_merge_keys
-def prepare_model_data_row(property_id, modelling_epc, cleaned, cleaning_data, created_at):
+def prepare_model_data_row(
    property_id, modelling_epc, cleaned, cleaning_data, created_at,
    photo_supply_lookup, floor_area_decile_thresholds, old_data=None, full_sap_epc=None,
 ):
    """
    This function prepares the data for modelling, in the same fashion as the recommendation engine
    With up-coming refactoring, this will change
@ -346,15 +350,24 @@ def prepare_model_data_row(property_id, modelling_epc, cleaned, cleaning_data, c
    p = Property(
        id=property_id,
        postcode=modelling_epc["postcode"],
-        address1=modelling_epc["address1"],
+        address=modelling_epc["address1"],
-        epc_client=None,
+        data=modelling_epc,
-        data=modelling_epc
+        old_data=old_data,
        full_sap_epc=full_sap_epc
    )
-    p.get_components(cleaned)
+    p.get_components(cleaned, photo_supply_lookup=photo_supply_lookup,
                     floor_area_decile_thresholds=floor_area_decile_thresholds)
    # THIS IS TEMP AND SHOULDN'T BE HERE
    data_to_clean = p.get_model_data()
    if data_to_clean["NUMBER_HEATED_ROOMS"] in ['', None]:
        data_to_clean["NUMBER_HEATED_ROOMS"] = data_to_clean["NUMBER_HABITABLE_ROOMS"]
        p.data["number-heated-rooms"] = data_to_clean["NUMBER_HABITABLE_ROOMS"]
    # This is temp - this should happen after scoring
    cleaned_property_data = DataProcessor.apply_averages_cleaning(
-        data_to_clean=pd.DataFrame([dict(**p.get_model_data(), LOCAL_AUTHORITY=p.data["local-authority"])]),
+        data_to_clean=pd.DataFrame([dict(**data_to_clean, LOCAL_AUTHORITY=p.data["local-authority"])]),
        cleaning_data=cleaning_data,
        cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
    )
@ -829,6 +842,18 @@ def analyse_ha_32_results(results, ha32, no_house_numbers):
        results_df["warmfront_identified"]
    ]
    # Aggregates of no eco and gbis jobs identified
    n_eco = results_df["eco4_eligible"].sum()
    # Gbis is rows where eco4 is not eligible
    n_gbis = results_df[
        (results_df["gbis_eligible"] == True) & (results_df["eco4_eligible"] == False)
        ]["gbis_eligible"].sum()
    pipeline_potential = results_df[
        (results_df["warmfront_identified"] == True) | (results_df["eco4_eligible"] == True) | (
            results_df["gbis_eligible"] == True)
        ]
    success_rate = warmfront_identified["gbis_eligible"].sum() / warmfront_identified.shape[0]
    # For HA32, this is 89%
@ -886,8 +911,16 @@ def analyse_ha_32_results(results, ha32, no_house_numbers):
    new_possibilities = results_df[
        (~results_df["warmfront_identified"]) &
-        (results_df["gbis_eligible"] | results_df["eco4_eligible"]) &
+        (results_df["gbis_eligible"] | results_df["eco4_eligible"])
-        (results_df["tenure"] == "Rented (social)")
+        ].copy()
    new_possibilities_eco = results_df[
        (~results_df["warmfront_identified"]) &
        (results_df["eco4_eligible"] == True)
        ].copy()
    new_possibilities_gbis = results_df[
        (~results_df["warmfront_identified"]) &
        (results_df["eco4_eligible"] == False) & (results_df["gbis_eligible"] == True)
        ].copy()
    future_possibilities_eco = results_df[
@ -947,6 +980,8 @@ def analyse_ha_15_results(results_df, ha15, no_house_numbers):
        results_df["warmfront_identified"]
    ]
    warmfront_identified = warmfront_identified
    n_identified = (warmfront_identified["gbis_eligible"] | warmfront_identified["eco4_eligible"]).sum()
    success_rate = n_identified / warmfront_identified.shape[0]
@ -955,6 +990,11 @@ def analyse_ha_15_results(results_df, ha15, no_house_numbers):
        "eligibility_classification"].value_counts()
    # For HA15 this is 50.3%
    pipeline_potential = results_df[
        (results_df["warmfront_identified"] == True) | (results_df["eco4_eligible"] == True) | (
            results_df["gbis_eligible"] == True)
        ]
    # of the properties we identify, what is the mix of confidenc
    missed = results_df[
@ -973,32 +1013,37 @@ def analyse_ha_15_results(results_df, ha15, no_house_numbers):
        missed["sap"] < 69
        ]
-    sap_low_enough["walls"].value_counts()
+    # Aggregates of no eco and gbis jobs identified
-    z = ha15[ha15["row_id"].isin(sap_too_high["row_id"].values)]
+    n_eco = results_df["eco4_eligible"].sum()
-
+    # Gbis is rows where eco4 is not eligible
-    investigate_1 = ha15[ha15["row_id"].isin(sap_too_high["row_id"])][
+    n_gbis = results_df[
-        ["row_id", "Postcode", "Address Line 1", "Address Line 2", "Address Line 3"]]
+        (results_df["gbis_eligible"] == True) & (results_df["eco4_eligible"] == False)
-
+        ]["gbis_eligible"].sum()
    investigate_2 = ha15[ha15["row_id"].isin(sap_low_enough["row_id"])][
        ["row_id", "Postcode", "Address Line 1", "Address Line 2", "Address Line 3"]]
    missed["message"].value_counts()
    # We now look for properties that we identified, that were not identified by Warmfront
    new_possibilities = results_df[
        (~results_df["warmfront_identified"]) &
-        ((results_df["gbis_eligible"] == True) | (results_df["eco4_eligible"] == True)) &
+        ((results_df["gbis_eligible"] == True) | (results_df["eco4_eligible"] == True))
-        (results_df["tenure"] == "Rented (social)")
+        ].copy()
    new_possibilities_eco = results_df[
        (~results_df["warmfront_identified"]) &
        (results_df["eco4_eligible"] == True)
        ].copy()
    new_possibilities_gbis = results_df[
        (~results_df["warmfront_identified"]) &
        (results_df["eco4_eligible"] == False) & (results_df["gbis_eligible"] == True)
        ].copy()
    # These are future possibilityies
-    new_possibilities_eco = results_df[
+    future_possibilities_eco = results_df[
        (~results_df["warmfront_identified"]) &
        (results_df["eco4_eligible_future"] == True) & (~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
        ].copy()
-    new_possibilities_gbis = results_df[
+    future_possibilities_gbis = results_df[
        (~results_df["warmfront_identified"]) &
        (results_df["gbis_eligible_future"] == True) & (results_df["eco4_eligible_future"] == False) & (
            ~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
@ -1058,7 +1103,7 @@ def app():
    )
    cleaned = msgpack.unpackb(cleaned, raw=False)
-    cleaning_data = read_parquet_from_s3(
+    cleaning_data = read_dataframe_from_s3_parquet(
        bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
    )
--- a/etl/eligibility/ha_15_32/ha16_app.py
+++ b/etl/eligibility/ha_15_32/ha16_app.py
@ -0,0 +1,647 @@
 import os
 import msgpack
 import openpyxl
 from pathlib import Path
 from datetime import datetime
 import pandas as pd
 import numpy as np
 from utils.s3 import read_from_s3
 from utils.logger import setup_logger
 from dotenv import load_dotenv
 from utils.s3 import read_dataframe_from_s3_parquet
 from tqdm import tqdm
 from backend.SearchEpc import SearchEpc
 from etl.eligibility.Eligibility import Eligibility
 from etl.eligibility.ha_15_32.app import prepare_model_data_row
 from etl.epc.DataProcessor import DataProcessor
 from etl.epc.settings import COLUMNS_TO_MERGE_ON
 from backend.ml_models.api import ModelApi
 from etl.solar.SolarPhotoSupply import SolarPhotoSupply
 from recommendations.recommendation_utils import calculate_cavity_age
 from recommendation_utils import convert_thickness_to_numeric
 import re
 ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
 EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
 logger = setup_logger()
 load_dotenv(ENV_FILE)
 def load_data():
    # This asset list is spread across two sheets, which we need to combine
    asset_list_filenames = [
        "HESTIA - HA 16 ASSET LIST PART 1 OF 2.xlsx",
        "HESTIA - HA 16 ASSET LIST PART 2 OF 2.xlsx",
    ]
    # Prepare lists to collect rows data and their colors
    rows_data = []
    rows_colors = []
    colnames = []
    for asset_list_filename in asset_list_filenames:
        workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/{asset_list_filename}')
        sheet = workbook.active
        sheet_colnames = [cell.value for cell in sheet[1]]
        colnames.append(sheet_colnames)
        for row in sheet.iter_rows(min_row=2, values_only=False):  # Assuming the first row is headers
            row_data = [cell.value for cell in row]  # This will get you the cell values
            row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
            # row_color = COLOR_INDEX[row_color]
            rows_data.append(row_data)
            rows_colors.append(row_color)
    asset_list = pd.DataFrame(rows_data, columns=colnames[0])
    # Remove None columns
    asset_list = asset_list.iloc[:, 0:12]
    asset_list['row_color'] = rows_colors
    asset_list["row_colour_name"] = np.where(
        asset_list["row_color"] == "FFFF0000", "red",
        np.where(asset_list["row_color"] == "FF92D050", "green", "yellow")
    )
    # Split up the address on commas, which is useful for matching later
    split_addresses = asset_list['Address'].str.split(',', expand=True)
    split_addresses.columns = ['temp', 'address2', 'address3', 'address4', 'address5']
    asset_list = pd.concat([asset_list, split_addresses], axis=1)
    # There is no commas separating house number and address 1
    split_addresses2 = asset_list['temp'].str.split(' ', expand=True)
    split_addresses2.columns = ['HouseNo', 'part1', 'part2', "part3", "part4"]
    # We could re-concatenate but we only care about HouseNo for the moment
    asset_list = pd.concat([asset_list, split_addresses2[["HouseNo"]]], axis=1)
    # We now read in the survey list
    survey_workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/HESTIA- HA 16 ECO4 SURVEY LIST.xlsx')
    survey_sheet = survey_workbook.active
    survey_rows = []
    survey_colors = []
    for row in survey_sheet.iter_rows(min_row=2, values_only=False):  # Assuming the first row is headers
        row_data = [cell.value for cell in row]  # This will get you the cell values
        row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
        # row_color = COLOR_INDEX[row_color]
        survey_rows.append(row_data)
        survey_colors.append(row_color)
    survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]])
    # For the survey list, we don't need the colours, since there is a column called "INSTALLED OR CANCELLED"
    # which describes the status of the property
    survey_list["row_colour"] = survey_colors
    survey_list["survey_key"] = ["survey_" + str(i) for i in range(0, len(survey_list))]
    # Tidy up the street/block name a bit
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("/", ", ")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.lower()
    survey_list["Street / Block Name"] = np.where(
        survey_list["Street / Block Name"] == "REEDS RD",
        "Reeds ROAD",
        survey_list["Street / Block Name"]
    )
    # Replace " rd " with "road"
    survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(r'\brd\b', 'road', regex=True)
    # Replace " , " with ", "
    survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(
        " , ", ', ',
    )
    # Fix "{place} ,{place}" with "{place}, {place}"
    survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(r'\s*,\s*', ', ', regex=True)
    # Strip whitespace
    survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.strip()
    # Correct errors
    survey_list["Post Code"] = np.where(
        survey_list["Post Code"] == "M38 0SA",
        "M38 9SA",
        survey_list["Post Code"]
    )
    survey_list["Post Code"] = np.where(
        (survey_list["Street / Block Name"] == "nelson drive") & (survey_list["Post Code"] == "M44 5JE"),
        "M44 5JF",
        survey_list["Post Code"]
    )
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("eccels", "eccles")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("chatley, road", "chatley road")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("vaughen", "Vaughan")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("cresent", "crescent")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("plantation road",
                                                                                        "plantation avenue")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("how clough drive",
                                                                                        "howclough drive")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brockhurst lane",
                                                                                        "brookhurst lane")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("biirch road",
                                                                                        "birch road")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hadson road",
                                                                                        "hodson road")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("harbonne avennue",
                                                                                        "narbonne avenue")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("cumberland road, cadishead",
                                                                                        "cumberland avenue, cadishead")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("aston field drive",
                                                                                        "ashton field drive")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("wedgewood road",
                                                                                        "wedgwood road")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hamilton close",
                                                                                        "hamilton avenue")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("lichens crescent, fitton hill",
                                                                                        "lichens crescent")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("south croft, fitton hill",
                                                                                        "south croft")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(", fitton hill", "")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("firtree dr", "fir tree avenue")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hawthorne road",
                                                                                        "hawthorn crescent")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("rein lee avenue",
                                                                                        "reins lee avenue")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("westerhill road",
                                                                                        "wester hill road")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("st martins road",
                                                                                        "saint martins road")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("timperley avenue",
                                                                                        "timperley close")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("eastwood road",
                                                                                        "eastwood avenue")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("new road", "new street")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("grassmere road",
                                                                                        "grasmere road")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hulton road",
                                                                                        "hulton avenue")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("beechfield avenue",
                                                                                        "beechfield road")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("princess avenue",
                                                                                        "princes avenue")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("edge ford crecent",
                                                                                        "edge fold crescent")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("conniston avenue",
                                                                                        "coniston avenue")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("blackthorne crescent",
                                                                                        "blackthorn crescent")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("wellstock road",
                                                                                        "wellstock lane")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brackley avenue",
                                                                                        "brackley street")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brook avenue swinton",
                                                                                        "brook avenue, swinton")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("green avenue swinton",
                                                                                        "green avenue, swinton")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("grasmere avenue wardley",
                                                                                        "grasmere avenue, wardley")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("mardale avenue wardle",
                                                                                        "mardale avenue, wardle")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("carleach grove",
                                                                                        "cartleach Grove")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("arbour grove",
                                                                                        "arbor Grove")
    # Replacement for clively avenue 66-68
    survey_list["NO."] = np.where(
        survey_list["NO."] == "66-68",
        "66",
        survey_list["NO."]
    )
    # asset_list[asset_list["Address"].str.lower().str.contains("clively")]
    # We now need to merge the survey list onto the asset list
    # Could be easier just to do a search on each row, even though it's much slower
    matched = []
    for _, row in tqdm(survey_list.iterrows(), total=len(survey_list)):
        house_number = row["NO."]
        if isinstance(house_number, str):
            house_number = house_number.lower()
        # Filter on the first line of the address
        df = asset_list[asset_list["Address"].str.lower().str.contains(row["Street / Block Name"].lower())].copy()
        # df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())]
        df = df[df["Address"].str.lower().str.contains(str(house_number))]
        if df.shape[0] != 1:
            df = df[df["HouseNo"] == str(house_number)]
            if df.shape[0] != 1:
                df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())]
                if df.shape[0] != 1:
                    raise ValueError("Investigate")
        matched.append(
            {
                "survey_key": row["survey_key"],
                "matched_address": df["Address"].values[0],
                "survey_house_no": row["NO."],
                "survey_street_name": row["Street / Block Name"],
                "survey_postcode": row["Post Code"],
                "survey_status": row["INSTALLED OR CANCELLED"]
            }
        )
    matched = pd.DataFrame(matched)
    matched["warmfront_identified"] = True
    # Combine asset list and surveys
    data = asset_list.merge(
        matched, how="left", left_on="Address", right_on="matched_address",
    )
    data["warmfront_identified"] = data["warmfront_identified"].fillna(False)
    return data, survey_list
 def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
    scoring_data = []
    results = []
    nodata = []
    property_type_lookup = {
        'Semi Detached Bungalow': {"property-type": "Bungalow", "built-form": "Semi-Detached"},
        'Mid Terraced House': {"property-type": "House", "built-form": "Mid-Terrace"},
        'End Terraced House': {"property-type": "House", "built-form": "End-Terrace"},
        'Low Rise Flat': {"property-type": "Flat", "built-form": "Mid-Terrace"},
        'Semi-Detached House': {"property-type": "House", "built-form": "Semi-Detached"},
        'Detached Bungalow': {"property-type": "Bungalow", "built-form": "Detached"},
        'End Terraced Bungalow': {"property-type": "Bungalow", "built-form": "End-Terrace"},
        'Mid Terraced Bungalow': {"property-type": "Bungalow", "built-form": "Mid-Terrace"},
        'Medium Rise Flat': {"property-type": "Flat", "built-form": "Mid-Terrace"},
        'Detached House': {"property-type": "House", "built-form": "Detached"},
        'Cottage Flat': {"property-type": "Flat", "built-form": "Semi-Detached"},
        'Maisonette Medium Rise': {"property-type": "Flat", "built-form": "Mid-Terrace"},
        'Maisonette Over Shop': {"property-type": "Flat", "built-form": "Mid-Terrace"},
        'End Terraced Town House': {"property-type": "House", "built-form": "End-Terrace"},
        'Flat Over Shop': {"property-type": "Flat", "built-form": "Mid-Terrace"},
        'Mid Terraced Town House': {"property-type": "House", "built-form": "Mid-Terrace"},
    }
    for index, property_meta in tqdm(data.iterrows(), total=len(data)):
        searcher = SearchEpc(
            address1=property_meta["HouseNo"],
            postcode=property_meta["Postcode"],
            auth_token=EPC_AUTH_TOKEN,
            os_api_key=None,
            full_address=property_meta["Address"]
        )
        searcher.ordnance_survey_client.property_type = property_type_lookup[property_meta["Type"]]["property-type"]
        searcher.ordnance_survey_client.built_form = property_type_lookup[property_meta["Type"]]["built-form"]
        searcher.find_property(skip_os=True)
        if searcher.newest_epc is None:
            nodata.append(property_meta)
            continue
        if searcher.newest_epc.get("estimated"):
            # We insert the row ID as our proxy for UPRN
            proxy_uprn = int(property_meta["row_id"].split("_")[1])
            searcher.newest_epc["uprn"] = proxy_uprn
        newest_epc = searcher.newest_epc
        older_epcs = searcher.older_epcs
        full_sap_epc = searcher.full_sap_epc
        # We also want to get the penultimate epc
        penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
        if not penultimate_epc:
            penultimate_epc = newest_epc
        eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
        eligibility.check_gbis_warmfront()
        eligibility.check_eco4_warmfront()
        if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront):
            eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
            eligibility.check_gbis_warmfront()
            eligibility.check_eco4_warmfront()
            # If this is the case, we need to update the older epcs
            # We don't update just to make data cleaning easier
            if penultimate_epc.get("estimated") is None:
                older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
        # If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity
        # Loft MUST be suitable
        cavity_age = None
        if (
            eligibility.walls["is_cavity_wall"] and
            eligibility.walls["is_filled_cavity"] and
            eligibility.loft["suitability"] and
            eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
        ):
            # We check the age of the cavity and if it's particularly old, we flag it
            cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
        # Full checks
        eligibility.check_gbis()
        eligibility.check_eco4()
        if eligibility.eco4_warmfront["eligible"]:
            if eligibility.epc["uprn"] == "":
                eligibility.epc["uprn"] = int(property_meta["row_id"].split("_")[1])
            scoring_dictionary = prepare_model_data_row(
                property_id=property_meta["row_id"],
                modelling_epc=eligibility.epc,
                cleaned=cleaned,
                cleaning_data=cleaning_data,
                created_at=created_at,
                old_data=older_epcs,
                full_sap_epc=full_sap_epc,
                photo_supply_lookup=photo_supply_lookup,
                floor_area_decile_thresholds=floor_area_decile_thresholds
            )
            scoring_data.extend(scoring_dictionary)
        results.append(
            {
                "row_id": property_meta["row_id"],
                "uprn": eligibility.epc["uprn"],
                "Address": property_meta["Address"],
                "Postcode": property_meta["Postcode"],
                "property_type": eligibility.epc["property-type"],
                "gbis_eligible": eligibility.gbis_warmfront,
                "eco4_eligible": eligibility.eco4_warmfront["eligible"],
                "eco4_message": eligibility.eco4_warmfront["message"],
                "sap": float(eligibility.epc["current-energy-efficiency"]),
                "gbis_eligible_future": eligibility.gbis["eligible"],
                "gbis_eligible_future_message": eligibility.gbis["message"],
                "eco4_eligible_future": eligibility.eco4["eligible"],
                "eco4_eligible_future_message": eligibility.eco4["message"],
                # Property components
                "roof": eligibility.roof["clean_description"],
                "walls": eligibility.walls["clean_description"],
                "cavity_type": eligibility.cavity["type"],
                "heating": eligibility.epc["mainheat-description"],
                "tenure": eligibility.tenure,
                "date_epc": eligibility.epc["lodgement-date"],
                "loft_thickness": eligibility.roof["insulation_thickness"],
                "cavity_age": cavity_age,
                **eligibility.walls,
                **eligibility.roof,
            }
        )
    scoring_df = pd.DataFrame(scoring_data)
    # Perform the same cleaning as in the model - first clean number of room variables though
    scoring_df = DataProcessor.apply_averages_cleaning(
        data_to_clean=scoring_df,
        cleaning_data=cleaning_data,
        cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
        colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
    )
    scoring_df = DataProcessor.apply_averages_cleaning(
        data_to_clean=scoring_df,
        cleaning_data=cleaning_data,
        cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
    ).drop(columns=["LOCAL_AUTHORITY"])
    scoring_df = DataProcessor.clean_missings_after_description_process(
        scoring_df,
        ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
            "insulation_thickness" in c) or ("ENERGY_EFF" in c)]
    )
    scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
    scoring_df["UPRN"] = scoring_df["UPRN"].astype(int)
    model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
    all_predictions = model_api.predict_all(
        df=scoring_df,
        bucket="retrofit-data-dev",
        prediction_buckets={
            "sap_change_predictions": "retrofit-sap-predictions-dev",
            "heat_demand_predictions": "retrofit-heat-predictions-dev",
            "carbon_change_predictions": "retrofit-carbon-predictions-dev"
        }
    )
    predictions = all_predictions["sap_change_predictions"].copy()
    results_df = pd.DataFrame(results)
    predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
        results_df[["row_id", "sap"]], how="left", on="row_id"
    )
    predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
    predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
    results_df = results_df.merge(
        predictions[["sap_uplift", "row_id"]],
        how="left",
        on="row_id"
    )
    results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
    eligibility_assessment = []
    for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
        # The upgrade requirements are dependent on the current SAP
        # If the property is an F or G, it only needs to upgrade to an %
        if row["sap"] <= 38:
            if row["post_install_sap"] >= 57:
                eligibility_classification = "highest confidence"
            elif row["post_install_sap"] >= 55:
                eligibility_classification = "high confidence"
            elif row["post_install_sap"] >= 53:
                eligibility_classification = "medium confidence"
            else:
                eligibility_classification = "unlikely"
        else:
            if row["post_install_sap"] >= 71:
                eligibility_classification = "highest confidence"
            elif row["post_install_sap"] >= 69:
                eligibility_classification = "high confidence"
            elif row["post_install_sap"] >= 67:
                eligibility_classification = "medium confidence"
            else:
                eligibility_classification = "unlikely"
        eligibility_assessment.append(
            {
                "row_id": row["row_id"],
                "eligibility_classification": eligibility_classification
            }
        )
    eligibility_assessment = pd.DataFrame(eligibility_assessment)
    results_df = results_df.merge(
        eligibility_assessment, how="left", on="row_id"
    )
    return results_df, scoring_data, nodata
 def analyse_results(results_df, data, survey_list):
    analysis_data = data[["row_id", "survey_key", "warmfront_identified", "row_colour_name"]].merge(
        results_df, how="left", on="row_id"
    ).merge(
        survey_list[["survey_key", survey_list.columns[0]]].rename(columns={survey_list.columns[0]: "funding_scheme"}),
        how="left", on="survey_key"
    )
    analysis_data["roof_insulation_thickness"] = np.where(
        pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
    )
    analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
        lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
    )
    warmfront_sold_eco4 = analysis_data[
        (analysis_data["warmfront_identified"] == True) & (
            analysis_data["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"]))
        ]  # 1407
    warmfront_sold_gbis = analysis_data[
        (analysis_data["warmfront_identified"] == True) & (
            analysis_data["funding_scheme"].isin(["ECO4 GBIS (ECO+)"]))
        ]
    ideal_eco4_warmfront_not_sold = analysis_data[
        (analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
            analysis_data["roof_insulation_thickness_numeric"] <= 100)
        ]
    secondary_eco4_warmfront_not_sold = analysis_data[
        (analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
            analysis_data["roof_insulation_thickness_numeric"] > 100)
        ]
    # underperforming cavities
    underperforming_cavities = analysis_data[
        (analysis_data["eco4_message"] == "Failed due to full cavity - check cavity age") & (
            analysis_data["cavity_age"] > 10 * 365
        ) & (analysis_data["roof_insulation_thickness_numeric"] <= 100)
        ]
    identified_gbis_not_sold = analysis_data[
        (analysis_data["gbis_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
            analysis_data["eco4_eligible"] == False
        )
        ]
    eco_eligible = analysis_data[analysis_data["eco4_eligible"] == True]
    eco_ineligible = analysis_data[analysis_data["eco4_eligible"] == False]
    eco_ineligible["eco4_message"].value_counts()
    # SAP too high:
    sap_too_high = eco_ineligible[eco_ineligible["eco4_message"] == "sap too high"].copy()
    further_possibilities = sap_too_high[
        sap_too_high["walls"].isin(
            [
                "Cavity wall, as built, insulated",
                "Cavity wall, as built, no insulation",
                "Cavity wall, as built, partial insulation",
                "Cavity wall, no insulation",
                "Cavity wall, partial insulation"
            ]
        )
    ]
    filled_cavities = eco_ineligible[
        eco_ineligible["eco4_message"] == "sap too high"
        ]
    warmfront_identified = analysis_data[analysis_data["warmfront_identified"]]
    warmfront_identified["walls"].value_counts()
    all_identified_gbis = analysis_data[
        (analysis_data["warmfront_identified"] & analysis_data["funding_scheme"].isin(
            ["ECO4 GBIS (ECO+)"])) |
        (analysis_data["gbis_eligible"] & analysis_data["eco4_eligible"].isin([False, None]))
        ]
    empty_cavity_desriptions = [
        "Cavity wall, as built, no insulation", "Cavity wall, as built, partial insulation",
        "Cavity wall, no insulation", "Cavity wall, partial insulation"
    ]
    empty_cavities = analysis_data[analysis_data["walls"].isin(empty_cavity_desriptions)]
    remaining_empty = empty_cavities[~empty_cavities["warmfront_identified"]]
    warmfront_identified = analysis_data[analysis_data["warmfront_identified"]]
    # Of the ECO jobs, what proportion to we get right
    warmfront_identified_eco = warmfront_identified[
        warmfront_identified["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"])
    ]
    eco_success_rate = warmfront_identified_eco["eco4_eligible"].sum() / warmfront_identified_eco.shape[0]
    warmfront_identified_gbis = warmfront_identified[
        warmfront_identified["funding_scheme"].isin(["ECO4 GBIS (ECO+)"])
    ]
    gbis_success_rate = warmfront_identified_gbis["gbis_eligible"].sum() / warmfront_identified_gbis.shape[0]
    # Additional identified
    additional_identified_eco = analysis_data[
        (analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False)
        ]
    additional_identified_eco["eligibility_classification"].value_counts()
    additional_identified_gbis = analysis_data[
        (analysis_data["gbis_eligible"] == True) & (analysis_data["eco4_eligible"] == False) & (
            analysis_data["warmfront_identified"] == False
        )
        ].shape[0]
    # Future
    additional_identified_eco_future = analysis_data[
        (analysis_data["eco4_eligible_future"] == True) & (analysis_data["warmfront_identified"] == False)
        ].shape[0]
    additional_identified_gbis_future = analysis_data[
        (analysis_data["gbis_eligible_future"] == True) & (analysis_data["eco4_eligible_future"] == False) & (
            analysis_data["warmfront_identified"] == False
        )
        ].shape[0]
 def app():
    data, survey_list = load_data()
    data["row_id"] = ["ha16_" + str(i) for i in range(0, len(data))]
    cleaned = read_from_s3(
        s3_file_name="cleaned_epc_data/cleaned.bson",
        bucket_name="retrofit-data-dev"
    )
    cleaned = msgpack.unpackb(cleaned, raw=False)
    cleaning_data = read_dataframe_from_s3_parquet(
        bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
    )
    created_at = datetime.now().isoformat()
    photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
    results_df, scoring_data, nodata = get_epc_data(
        data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds
    )
    # Store
    # Old file was ha16.pickle
    # import pickle
    # with open("ha16_10_jan.pickle", "wb") as f:
    #     pickle.dump(
    #         {
    #             "scoring_data": scoring_data,
    #             "results": results_df,
    #             "nodata": nodata
    #         }, f
    #     )
    # Read pickle
    # import pickle
    # with open("ha16_10_jan.pickle", "rb") as f:
    #     saved = pickle.load(f)
    # scoring_data = saved["scoring_data"]
    # results_df = saved["results"]
    # nodata = saved["nodata"]
--- a/etl/eligibility/ha_15_32/ha24_app.py
+++ b/etl/eligibility/ha_15_32/ha24_app.py
@ -0,0 +1,524 @@
 import os
 import msgpack
 import openpyxl
 from pathlib import Path
 from datetime import datetime
 import pandas as pd
 import numpy as np
 from utils.s3 import read_from_s3, read_dataframe_from_s3_parquet
 from utils.logger import setup_logger
 from dotenv import load_dotenv
 from tqdm import tqdm
 from backend.SearchEpc import SearchEpc
 from etl.eligibility.Eligibility import Eligibility
 from etl.eligibility.ha_15_32.app import prepare_model_data_row
 from etl.epc.DataProcessor import DataProcessor
 from etl.epc.settings import COLUMNS_TO_MERGE_ON
 from backend.ml_models.api import ModelApi
 from etl.solar.SolarPhotoSupply import SolarPhotoSupply
 from recommendations.recommendation_utils import calculate_cavity_age
 from recommendation_utils import convert_thickness_to_numeric
 EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
 ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
 logger = setup_logger()
 load_dotenv(ENV_FILE)
 def load_data():
    workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/HESTIA - HA 24 ASSET LIST.xlsx')
    sheet = workbook.active
    sheet_colnames = [cell.value for cell in sheet[1]]
    rows_data = []
    rows_colors = []
    for row in sheet.iter_rows(min_row=2, values_only=False):  # Assuming the first row is headers
        row_data = [cell.value for cell in row]  # This will get you the cell values
        row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
        # row_color = COLOR_INDEX[row_color]
        rows_data.append(row_data)
        rows_colors.append(row_color)
    asset_list = pd.DataFrame(rows_data, columns=sheet_colnames)
    # Remove None columns
    asset_list = asset_list.iloc[:, 0:10]
    asset_list['row_color'] = rows_colors
    asset_list["row_colour_name"] = np.where(
        asset_list["row_color"] == "FFFF0000", "red",
        np.where(asset_list["row_color"] == "FF92D050", "green", "yellow")
    )
    asset_list["row_colour_code"] = np.where(
        asset_list["row_colour_name"] == "red", "does not meet criteria",
        np.where(asset_list["row_colour_name"] == "green", "identified potential eco", "maybe in the future")
    )
    # The third column is listed as "Address" but it's actually the postcode". We have two Address columns so we
    # change just the third
    asset_list.columns.values[2] = "Postcode"
    # Split up the address on commas, which is useful for matching later
    split_addresses = asset_list['Address'].str.split(',', expand=True)
    split_addresses.columns = ['temp', 'address2', 'address3', 'address4', 'address5', 'address6']
    asset_list = pd.concat([asset_list, split_addresses], axis=1)
    # There is no commas separating house number and address 1
    split_addresses2 = asset_list['temp'].str.split(' ', expand=True)
    split_addresses2.columns = ['HouseNo', 'part1', 'part2', "part3", "part4"]
    # We could re-concatenate but we only care about HouseNo for the moment
    asset_list = pd.concat([asset_list, split_addresses2[["HouseNo"]]], axis=1)
    # Read in surveys
    survey_workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/HESTIA - HA 24 ECO4 SURVEY LIST.xlsx')
    survey_sheet = survey_workbook.active
    survey_rows = []
    survey_colors = []
    for row in survey_sheet.iter_rows(min_row=2, values_only=False):  # Assuming the first row is headers
        row_data = [cell.value for cell in row]  # This will get you the cell values
        row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
        # row_color = COLOR_INDEX[row_color]
        survey_rows.append(row_data)
        survey_colors.append(row_color)
    survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]])
    survey_list["row_colour"] = survey_colors
    survey_list["survey_key"] = ["survey_" + str(i) for i in range(0, len(survey_list))]
    # Tidy up the street/block name a bit
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("/", ", ")
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.lower()
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.strip()
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
        "council house, nidds lane", "nidds lane"
    )
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
        "wirral avenue", "wirrall avenue"
    )
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
        "st ives road", "st. ives crescent"
    )
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
        "sundringham road", "sandringham road"
    )
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
        "milton avenue", "milton road"
    )
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
        "st ives crescent", "st. ives crescent"
    )
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
        "council house, waterbelly lane", "waterbelly lane"
    )
    # Generally remove "councile house, " from the start of the street name
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
        "council house, ", ""
    )
    survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
        "st. leodegars close", "st leodegars close"
    )
    # asset_list[asset_list["Address"].str.lower().str.contains("wirral")]["Address"]
    # Drop all None rows
    survey_list = survey_list[~pd.isnull(survey_list["Street / Block Name"])]
    survey_list["survey_key"] = ["survey_" + str(i) for i in range(0, len(survey_list))]
    matched = []
    for _, row in tqdm(survey_list.iterrows(), total=len(survey_list)):
        house_number = row["NO."]
        if isinstance(house_number, str):
            house_number = house_number.lower()
        # Filter on the first line of the address
        df = asset_list[asset_list["Address"].str.lower().str.contains(row["Street / Block Name"].lower())].copy()
        # df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())]
        df = df[df["Address"].str.lower().str.contains(str(house_number))]
        if df.shape[0] != 1:
            df = df[df["HouseNo"] == str(house_number)]
            if df.shape[0] != 1:
                df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())]
                if df.shape[0] != 1:
                    print(row["Street / Block Name"])
                    print(house_number)
                    print(row["Post Code"].lower())
                    raise ValueError("Investigate")
        matched.append(
            {
                "survey_key": row["survey_key"],
                "matched_address": df["Address"].values[0],
                "survey_house_no": row["NO."],
                "survey_street_name": row["Street / Block Name"],
                "survey_postcode": row["Post Code"],
                "survey_status": row["INSTALLED OR CANCELLED"]
            }
        )
    matched = pd.DataFrame(matched)
    matched["warmfront_identified"] = True
    # Combine asset list and surveys
    data = asset_list.merge(
        matched, how="left", left_on="Address", right_on="matched_address",
    )
    data["warmfront_identified"] = data["warmfront_identified"].fillna(False)
    return data, survey_list
 def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
    scoring_data = []
    results = []
    nodata = []
    property_type_lookup = {
        "01 HOUSE": "House",
        "02 FLAT": "Flat",
        "03 BUNGALOW": "Bungalow",
        "05 BEDSIT": "Flat",
        "04 MAISONETTE": "Maisonette",
        "01 HOUSE MID": "House",
        "10 PBUNGALOW": "Bungalow",
        "14 SFLAT": "Flat",
        "12 SBEDSIT": "Flat",
        "11 PFLAT": "Flat",
        "13 SBUNGALOW": "Bungalow",
        " 01 HOUSE MID": "House",
        "09 PBEDSIT": "Flat"
    }
    for _, property_meta in tqdm(data.iterrows(), total=len(data)):
        searcher = SearchEpc(
            address1=property_meta["HouseNo"],
            postcode=property_meta["Postcode"],
            auth_token=EPC_AUTH_TOKEN,
            os_api_key=None,
            full_address=property_meta["Address"]
        )
        searcher.ordnance_survey_client.property_type = property_type_lookup[property_meta["Property Type"]]
        searcher.find_property(skip_os=True)
        if searcher.newest_epc is None:
            nodata.append(property_meta)
            continue
        newest_epc = searcher.newest_epc
        older_epcs = searcher.older_epcs
        full_sap_epc = searcher.full_sap_epc
        # We also want to get the penultimate epc
        penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
        if not penultimate_epc:
            penultimate_epc = newest_epc
        eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
        eligibility.check_gbis_warmfront()
        eligibility.check_eco4_warmfront()
        if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront):
            eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
            eligibility.check_gbis_warmfront()
            eligibility.check_eco4_warmfront()
            # If this is the case, we need to update the older epcs
            # older_epcs = [
            #     x for x in older_epcs if x["lmk-key"] not in [newest_epc["lmk-key"], penultimate_epc["lmk-key"]]
            # ]
            # If this is the case, we need to update the older epcs
            # We don't update just to make data cleaning easier
            if penultimate_epc.get("estimated") is None:
                older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
        # Loft MUST be suitable
        cavity_age = None
        if (
            eligibility.walls["is_cavity_wall"] and
            eligibility.walls["is_filled_cavity"] and
            eligibility.loft["suitability"] and
            eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
        ):
            # We check the age of the cavity and if it's particularly old, we flag it
            cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
        # Full checks
        eligibility.check_gbis()
        eligibility.check_eco4()
        if eligibility.eco4_warmfront["eligible"]:
            if eligibility.epc["uprn"] in ["", None]:
                eligibility.epc["uprn"] = int(property_meta["row_id"].split("_")[1])
            scoring_dictionary = prepare_model_data_row(
                property_id=property_meta["row_id"],
                modelling_epc=eligibility.epc,
                cleaned=cleaned,
                cleaning_data=cleaning_data,
                created_at=created_at,
                old_data=older_epcs,
                full_sap_epc=full_sap_epc,
                photo_supply_lookup=photo_supply_lookup,
                floor_area_decile_thresholds=floor_area_decile_thresholds
            )
            scoring_data.extend(scoring_dictionary)
        results.append(
            {
                "row_id": property_meta["row_id"],
                "uprn": eligibility.epc["uprn"],
                "Address": property_meta["Address"],
                "Postcode": property_meta["Postcode"],
                "property_type": eligibility.epc["property-type"],
                "gbis_eligible": eligibility.gbis_warmfront,
                "eco4_eligible": eligibility.eco4_warmfront["eligible"],
                "eco4_message": eligibility.eco4_warmfront["message"],
                "sap": float(eligibility.epc["current-energy-efficiency"]),
                "gbis_eligible_future": eligibility.gbis["eligible"],
                "gbis_eligible_future_message": eligibility.gbis["message"],
                "eco4_eligible_future": eligibility.eco4["eligible"],
                "eco4_eligible_future_message": eligibility.eco4["message"],
                # Property components
                "roof": eligibility.roof["clean_description"],
                "walls": eligibility.walls["clean_description"],
                "cavity_type": eligibility.cavity["type"],
                "heating": eligibility.epc["mainheat-description"],
                "tenure": eligibility.tenure,
                "date_epc": eligibility.epc["lodgement-date"],
                "cavity_age": cavity_age,
                **eligibility.walls,
                **eligibility.roof,
            }
        )
    scoring_df = pd.DataFrame(scoring_data)
    # Perform the same cleaning as in the model - first clean number of room variables though
    scoring_df = DataProcessor.apply_averages_cleaning(
        data_to_clean=scoring_df,
        cleaning_data=cleaning_data,
        cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
        colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
    )
    scoring_df = DataProcessor.apply_averages_cleaning(
        data_to_clean=scoring_df,
        cleaning_data=cleaning_data,
        cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
    ).drop(columns=["LOCAL_AUTHORITY"])
    scoring_df = DataProcessor.clean_missings_after_description_process(
        scoring_df,
        ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
            "insulation_thickness" in c) or ("ENERGY_EFF" in c)]
    )
    scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
    scoring_df["UPRN"] = scoring_df["UPRN"].astype(int)
    model_api = ModelApi(portfolio_id="ha24-eligibility", timestamp=created_at)
    all_predictions = model_api.predict_all(
        df=scoring_df,
        bucket="retrofit-data-dev",
        prediction_buckets={
            "sap_change_predictions": "retrofit-sap-predictions-dev",
            "heat_demand_predictions": "retrofit-heat-predictions-dev",
            "carbon_change_predictions": "retrofit-carbon-predictions-dev"
        }
    )
    predictions = all_predictions["sap_change_predictions"].copy()
    results_df = pd.DataFrame(results)
    predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
        results_df[["row_id", "sap"]], how="left", on="row_id"
    )
    predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
    predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
    results_df = results_df.merge(
        predictions[["sap_uplift", "row_id"]],
        how="left",
        on="row_id"
    )
    results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
    eligibility_assessment = []
    for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
        # The upgrade requirements are dependent on the current SAP
        # If the property is an F or G, it only needs to upgrade to an %
        if row["sap"] <= 38:
            if row["post_install_sap"] >= 57:
                eligibility_classification = "highest confidence"
            elif row["post_install_sap"] >= 55:
                eligibility_classification = "high confidence"
            elif row["post_install_sap"] >= 53:
                eligibility_classification = "medium confidence"
            else:
                eligibility_classification = "unlikely"
        else:
            if row["post_install_sap"] >= 71:
                eligibility_classification = "highest confidence"
            elif row["post_install_sap"] >= 69:
                eligibility_classification = "high confidence"
            elif row["post_install_sap"] >= 67:
                eligibility_classification = "medium confidence"
            else:
                eligibility_classification = "unlikely"
        eligibility_assessment.append(
            {
                "row_id": row["row_id"],
                "eligibility_classification": eligibility_classification
            }
        )
    eligibility_assessment = pd.DataFrame(eligibility_assessment)
    results_df = results_df.merge(
        eligibility_assessment, how="left", on="row_id"
    )
    return results_df, scoring_data, nodata
 def analyse_results(results_df, data, survey_list):
    analysis_data = data[["row_id", "survey_key", "warmfront_identified"]].merge(
        results_df, how="left", on="row_id"
    ).merge(
        survey_list[["survey_key", survey_list.columns[0]]].rename(columns={survey_list.columns[0]: "funding_scheme"}),
        how="left", on="survey_key"
    )
    # NEW
    analysis_data["roof_insulation_thickness"] = np.where(
        pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
    )
    analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
        lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
    )
    warmfront_sold_eco4 = analysis_data[
        (analysis_data["warmfront_identified"] == True) & (
            analysis_data["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"]))
        ]
    warmfront_sold_gbis = analysis_data[
        (analysis_data["warmfront_identified"] == True) & (
            analysis_data["funding_scheme"].isin(["ECO4 GBIS (ECO+)"]))
        ]
    # 1407
    additional_eco4_warmfront_not_sold = analysis_data[
        (analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
            analysis_data["roof_insulation_thickness_numeric"] <= 100)
        ]
    additional_gbis_warmfront_not_sold = analysis_data[
        (analysis_data["gbis_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
            ~analysis_data["row_id"].isin(additional_eco4_warmfront_not_sold["row_id"].values)
        )
        ]
    additional_gbis_warmfront_not_sold["walls"].value_counts()
    analysis_data["walls"].value_counts()
    # END NEW
    all_identified_eco = analysis_data[
        (analysis_data["warmfront_identified"] & analysis_data["funding_scheme"].isin(
            ["ECO4 A/W"])) |
        (analysis_data["eco4_eligible"])
        ]
    all_identified_gbis = analysis_data[
        (analysis_data["warmfront_identified"] & analysis_data["funding_scheme"].isin(
            ["ECO4 GBIS (ECO+)"])) |
        (analysis_data["gbis_eligible"] & analysis_data["eco4_eligible"].isin([False, None]))
        ]
    warmfront_identified = analysis_data[analysis_data["warmfront_identified"]]
    # Of the ECO jobs, what proportion to we get right
    warmfront_identified_eco = warmfront_identified[
        warmfront_identified["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"])
    ]
    eco_success_rate = warmfront_identified_eco["eco4_eligible"].sum() / warmfront_identified_eco.shape[0]
    warmfront_identified_gbis = warmfront_identified[
        warmfront_identified["funding_scheme"].isin(["ECO4 GBIS (ECO+)"])
    ]
    # No gbis for this
    # gbis_success_rate = warmfront_identified_gbis["gbis_eligible"].sum() / warmfront_identified_gbis.shape[0]
    # Additional identified
    additional_identified_eco = analysis_data[
        (analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False)
        ]
    additional_identified_eco["eligibility_classification"].value_counts()
    additional_identified_gbis = analysis_data[
        (analysis_data["gbis_eligible"] == True) & (analysis_data["eco4_eligible"] == False) & (
            analysis_data["warmfront_identified"] == False
        )
        ].shape[0]
    # Future
    additional_identified_eco_future = analysis_data[
        (analysis_data["eco4_eligible_future"] == True) & (analysis_data["warmfront_identified"] == False)
        ].shape[0]
    additional_identified_gbis_future = analysis_data[
        (analysis_data["gbis_eligible_future"] == True) & (analysis_data["eco4_eligible_future"] == False) & (
            analysis_data["warmfront_identified"] == False
        )
        ].shape[0]
 def app():
    data, survey_list = load_data()
    data["row_id"] = ["ha24_" + str(i) for i in range(0, len(data))]
    cleaned = read_from_s3(
        s3_file_name="cleaned_epc_data/cleaned.bson",
        bucket_name="retrofit-data-dev"
    )
    cleaned = msgpack.unpackb(cleaned, raw=False)
    cleaning_data = read_dataframe_from_s3_parquet(
        bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
    )
    created_at = datetime.now().isoformat()
    photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
    results_df, scoring_data, nodata = get_epc_data(
        data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds
    )
    # Pickle results just in case
    # import pickle
    # with open("ha24_10_jan.pickle", "wb") as f:
    #     pickle.dump(
    #         {
    #             "scoring_data": scoring_data,
    #             "results": results_df,
    #             "nodata": nodata
    #         }, f
    #     )
    # Read in pickle
    # import pickle
    # with open("ha24_10_jan.pickle", "rb") as f:
    #     saved = pickle.load(f)
    # scoring_data = saved["scoring_data"]
    # results_df = saved["results"]
    # nodata = saved["nodata"]
--- a/etl/eligibility/ha_15_32/ha25_app.py
+++ b/etl/eligibility/ha_15_32/ha25_app.py
@ -0,0 +1,883 @@
 import os
 import msgpack
 import openpyxl
 from pathlib import Path
 from datetime import datetime
 import pandas as pd
 import numpy as np
 from utils.s3 import read_from_s3
 from utils.logger import setup_logger
 from dotenv import load_dotenv
 from utils.s3 import read_dataframe_from_s3_parquet
 from tqdm import tqdm
 from backend.SearchEpc import SearchEpc
 from etl.eligibility.Eligibility import Eligibility
 from etl.eligibility.ha_15_32.app import prepare_model_data_row
 from etl.epc.DataProcessor import DataProcessor
 from etl.epc.settings import COLUMNS_TO_MERGE_ON
 from backend.ml_models.api import ModelApi
 from etl.solar.SolarPhotoSupply import SolarPhotoSupply
 from recommendations.recommendation_utils import calculate_cavity_age
 from recommendation_utils import convert_thickness_to_numeric
 import re
 EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
 ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
 logger = setup_logger()
 load_dotenv(ENV_FILE)
 def load_data():
    workbook = openpyxl.load_workbook('etl/eligibility/ha_15_32/HESTIA - HA 25 ASSET LIST.xlsx', data_only=True)
    sheet = workbook.active
    rows_data = []
    rows_colors = []
    for row in sheet.iter_rows(min_row=1, values_only=True):  # use values_only=True to get values
        row_data = list(row)  # No need for comprehension, values_only=True returns a tuple of values
        rows_data.append(row_data)
    # Headers are on the final row. Pop them off and store them and then remove them from rows_data
    headers = rows_data.pop()
    # The postcode header is None, so we replace it with "postcode"
    headers[-1] = "postcode"
    # Handle colours separately
    for row in sheet.iter_rows(min_row=1, values_only=False):
        # Assume first cell color is indicative of entire row
        row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
        rows_colors.append(row_color)
    # Remove the final row of colours, which is the header
    rows_colors.pop()
    asset_list = pd.DataFrame(rows_data, columns=headers)
    asset_list['row_color'] = rows_colors
    asset_list["row_colour_name"] = np.where(
        asset_list["row_color"] == "FFFF0000", "red",
        np.where(asset_list["row_color"] == "FF00B050", "green", "yellow")
    )
    asset_list["row_colour_code"] = np.where(
        asset_list["row_colour_name"] == "red", "does not meet criteria",
        np.where(asset_list["row_colour_name"] == "green", "identified potential eco", "maybe in the future")
    )
    asset_list["address"] = asset_list["T1_Address"].copy().str.lower()
    asset_list["address"] = asset_list["address"].str.replace("flat", "")
    asset_list["address"] = asset_list["address"].str.strip()
    split_addresses = asset_list['address'].str.split(' ', expand=True)
    split_addresses.columns = ['HouseNo', 'address2', 'address3', 'address4', 'address5', 'address6', 'address7',
                               'address8',
                               'address9', 'address10', 'address11', 'address12', 'address13', 'address14', ]
    split_addresses["HouseNo"] = split_addresses["HouseNo"].str.replace(";", "")
    # We could re-concatenate but we only care about HouseNo for the moment
    asset_list = pd.concat([asset_list, split_addresses[["HouseNo"]]], axis=1)
    asset_list["postcode"] = asset_list["postcode"].str.strip()
    # We analysis historical ECO3 survey list
    eco3_survey_workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/HESTIA - HA 25 ECO3 SURVEY LIST.xlsx')
    eco3_survey_sheet = eco3_survey_workbook["CAVITY"]
    eco3_survey_rows = []
    eco3_survey_colors = []
    for row in eco3_survey_sheet.iter_rows(min_row=2, values_only=False):  # Assuming the first row is headers
        row_data = [cell.value for cell in row]  # This will get you the cell values
        row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
        # row_color = COLOR_INDEX[row_color]
        eco3_survey_rows.append(row_data)
        eco3_survey_colors.append(row_color)
    # Some adhoc analysis on the eco3 survey list, just to get completion and cancellation rates historically
    eco3_survey_list = pd.DataFrame(eco3_survey_rows, columns=[cell.value for cell in eco3_survey_sheet[1]])
    eco3_survey_list["row_colour"] = eco3_survey_colors
    # Remove rows where street name is missing
    eco3_survey_list = eco3_survey_list[~pd.isnull(eco3_survey_list["Street / Block Name"])]
    # We need to parse the row colours
    # We have the following mappings:
    # FF7030A0: purple
    # FF92D050: green
    # FFFF0000: red
    # FFFFFF00: yellow
    # FF38FD23: green
    eco3_survey_list["row_colour_name"] = np.where(
        eco3_survey_list["row_colour"] == "FF7030A0", "purple",
        np.where(eco3_survey_list["row_colour"] == "FF92D050", "green",
                 np.where(eco3_survey_list["row_colour"] == "FFFF0000", "red",
                          np.where(eco3_survey_list["row_colour"] == "FFFFFF00", "yellow",
                                   np.where(eco3_survey_list["row_colour"] == "FF38FD23", "green", "unknown")
                                   )
                          )
                 )
    )
    # We map the meaning:
    # red: cancelled
    # green: installed advised install complete
    # purple: installer advised install complete + post works EPC
    # yellow: filler row - drop
    eco3_survey_list["row_colour_code"] = np.where(
        eco3_survey_list["row_colour_name"] == "red", "cancelled",
        np.where(eco3_survey_list["row_colour_name"] == "green", "installed advised install complete",
                 np.where(eco3_survey_list["row_colour_name"] == "purple",
                          "installer advised install complete + post works EPC",
                          np.where(eco3_survey_list["row_colour_name"] == "yellow", "filler row - drop", "unknown")
                          )
                 )
    )
    # This is good enough for the indicative cancellation rates
    # We now read in the indicative survey list which identified pospects for ECO4 works
    eco4_survey_workbook = openpyxl.load_workbook(
        f'etl/eligibility/ha_15_32/HESTIA - HA 25 ADHOC ISOLATED IDENTIFIED PROPERTIES FOR CWI.xlsx'
    )
    eco4_prospect_survey_sheet = eco4_survey_workbook["LiveWest"]
    eco4_prospects_survey_rows = []
    eco4_prospects_survey_colors = []
    for row in eco4_prospect_survey_sheet.iter_rows(min_row=2, values_only=False):  # Assuming the first row is headers
        row_data = [cell.value for cell in row]  # This will get you the cell values
        row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
        # row_color = COLOR_INDEX[row_color]
        eco4_prospects_survey_rows.append(row_data)
        eco4_prospects_survey_colors.append(row_color)
    # Some adhoc analysis on the eco3 survey list, just to get completion and cancellation rates historically
    eco4_prospects_survey_list = pd.DataFrame(
        eco4_prospects_survey_rows, columns=[cell.value for cell in eco4_prospect_survey_sheet[1]]
    )
    eco4_prospects_survey_list["row_colour"] = eco4_prospects_survey_colors
    eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.lower()
    eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.strip()
    eco4_prospects_survey_list = eco4_prospects_survey_list[~pd.isnull(eco4_prospects_survey_list["ADDRESS 1"])]
    eco4_prospects_survey_list["survey_key"] = ["survey_" + str(i) for i in range(0, len(eco4_prospects_survey_list))]
    # Correct some errors in the survey list
    eco4_prospects_survey_list["POSTCODE"] = np.where(
        (eco4_prospects_survey_list["ADDRESS 1"] == "berry park") &
        (eco4_prospects_survey_list["POSTCODE"] == "PL12 6HP"),
        "PL12 6EN",
        eco4_prospects_survey_list["POSTCODE"]
    )
    # Remove semi colons from address in asset and survey list
    asset_list["T1_Address"] = asset_list["T1_Address"].str.replace(";", "")
    eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.replace(";", "")
    # In the prosepcts survey list, we have 6 WALKHAM MEADOWS listed twice, which should be 6a and 6b
    eco4_prospects_survey_list.loc[838, "NO"] = "6a"
    eco4_prospects_survey_list.loc[839, "NO"] = "6b"
    # 3, 7, 9 BOLDVENTURE ROAD should be BOLDVENTURE CLOSE
    eco4_prospects_survey_list["ADDRESS 1"] = np.where(
        (eco4_prospects_survey_list["ADDRESS 1"] == "boldventure road") &
        (eco4_prospects_survey_list["NO"].isin([3, 7, 9])),
        "boldventure close",
        eco4_prospects_survey_list["ADDRESS 1"]
    )
    eco4_prospects_survey_list["ADDRESS 1"] = np.where(
        (eco4_prospects_survey_list["ADDRESS 1"] == "old farm road") & (
            eco4_prospects_survey_list["POSTCODE"] == "PL5 1EP"),
        "old school road",
        eco4_prospects_survey_list["ADDRESS 1"]
    )
    eco4_prospects_survey_list["ADDRESS 1"] = np.where(
        (eco4_prospects_survey_list["ADDRESS 1"] == "croft orchard") & (
            eco4_prospects_survey_list["POSTCODE"] == "TQ12 6RP") & (
            eco4_prospects_survey_list["NO"] == 52),
        "drum way",
        eco4_prospects_survey_list["ADDRESS 1"]
    )
    # String replace
    eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.replace(
        "the gulls, collaton road", "the gulls collaton road"
    )
    eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.replace(
        "crows-an-eglose", "crows-an-eglos"
    )
    # We have a high volume of rows that do not match
    matched = []
    nomatch = []
    for _, row in tqdm(eco4_prospects_survey_list.iterrows(), total=len(eco4_prospects_survey_list)):
        # Not in the asset list
        if (row["ADDRESS 1"] == "berry park") and row["NO"] in [40, 42] and row["POSTCODE"] == "PL12 6EN":
            nomatch.append(row.to_dict())
            continue
        # Not in the asset list
        if (row["ADDRESS 1"] == "roberts road") and row["NO"] == 23 and row["POSTCODE"] == "PL5 1DP":
            nomatch.append(row.to_dict())
            continue
        # Not in the asset list
        if row["ADDRESS 1"] in [
            "kaynton mead", "broadmoor lane", "hoopers barton", "ecos court", "selwood road",
            "castle street"
        ]:
            nomatch.append(row.to_dict())
            continue
        house_number = row["NO"]
        if isinstance(house_number, str):
            house_number = house_number.lower()
            if "flat" in house_number:
                house_number = house_number.split("flat")[1].strip()
        # Filter on the first line of the address
        df = asset_list[asset_list["T1_Address"].str.lower().str.contains(row["ADDRESS 1"].lower())].copy()
        if house_number is not None:
            if df.shape[0] != 1:
                df = df[df["T1_Address"].str.lower().str.contains(str(house_number))]
        if df.shape[0] != 1:
            if house_number is not None:
                df = df[df["HouseNo"] == str(house_number)]
            if df.shape[0] != 1:
                if row["POSTCODE"] is not None:
                    df = df[df["postcode"].str.lower().str.contains(row["POSTCODE"].lower())]
                if df.shape[0] != 1:
                    nomatch.append(row.to_dict())
                    continue
        matched.append(
            {
                "survey_key": row["survey_key"],
                "matched_address": df["T1_Address"].values[0],
                "survey_house_no": row["NO"],
                "survey_street_name": row["ADDRESS 1"],
                "survey_postcode": row["POSTCODE"],
            }
        )
    nomatch = pd.DataFrame(nomatch)
    matched = pd.DataFrame(matched)
    matched["warmfront_identified"] = True
    # Combine asset list and surveys
    data = asset_list.merge(
        matched, how="left", left_on="T1_Address", right_on="matched_address",
    )
    data["warmfront_identified"] = data["warmfront_identified"].fillna(False)
    lost_identified_properties = eco4_prospects_survey_list[
        ~eco4_prospects_survey_list["survey_key"].isin(matched["survey_key"])
    ]
    return data, eco4_prospects_survey_list, lost_identified_properties
 def map_year_to_age_band(year):
    try:
        year = int(year)
    except ValueError:
        return "Invalid Year"  # Or any other way you want to handle invalid inputs
    if year < 1900:
        return "England and Wales: before 1900"
    elif 1900 <= year <= 1929:
        return "England and Wales: 1900-1929"
    elif 1930 <= year <= 1949:
        return "England and Wales: 1930-1949"
    elif 1950 <= year <= 1966:
        return "England and Wales: 1950-1966"
    elif 1967 <= year <= 1975:
        return "England and Wales: 1967-1975"
    elif 1976 <= year <= 1982:
        return "England and Wales: 1976-1982"
    elif 1983 <= year <= 1990:
        return "England and Wales: 1983-1990"
    elif 1991 <= year <= 1995:
        return "England and Wales: 1991-1995"
    elif 1996 <= year <= 2002:
        return "England and Wales: 1996-2002"
    elif 2003 <= year <= 2006:
        return "England and Wales: 2003-2006"
    elif 2007 <= year <= 2011:
        return "England and Wales: 2007-2011"
    else:  # Assuming all remaining years are 2012 onwards
        return "England and Wales: 2012 onwards"
 def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
    scoring_data = []
    results = []
    nodata = []
    property_type_lookup = {
        "Flat": {"property-type": "Flat", "built-form": None},
        "Mid Terrace House": {"property-type": "House", "built-form": "Mid-Terrace"},
        "End Terrace House": {"property-type": "House", "built-form": "End-Terrace"},
        "Maisonnette": {"property-type": "Flat", "built-form": None},
        "Semi Detached House": {"property-type": "House", "built-form": "Semi-Detached"},
        "Detached House": {"property-type": "House", "built-form": "Detached"},
        "Coach House": {"property-type": "House", "built-form": "Detached"},
        "Bungalow": {"property-type": "Bungalow", "built-form": None},
        "Detached Bungalow": {"property-type": "Bungalow", "built-form": "Detached"},
        "House": {"property-type": "House", "built-form": None},
        "Semi Detached Bung": {"property-type": "Bungalow", "built-form": "Semi-Detached"},
        "Bedspace": {"property-type": None, "built-form": None},
        "Office Buildings": {"property-type": None, "built-form": None},
        "End Terrace Bungalow": {"property-type": "Bungalow", "built-form": "End-Terrace"},
        "Mid Terrace Bungalow": {"property-type": "Bungalow", "built-form": "Mid-Terrace"},
        "Bedsit": {"property-type": "Flat", "built-form": None},
        "Mid Terrace Housekeeping": {"property-type": "House", "built-form": "Mid-Terrace"},
        "Mid Terrace Housekeeping ": {"property-type": "House", "built-form": "Mid-Terrace"},
        "End Terrace Housex": {"property-type": "House", "built-form": "End-Terrace"},
        "Guest Room": {"property-type": None, "built-form": None}
    }
    for _, property_meta in tqdm(data, total=len(data)):
        searcher = SearchEpc(
            address1=property_meta["HouseNo"],
            postcode=property_meta["postcode"],
            auth_token=EPC_AUTH_TOKEN,
            os_api_key=None,
            full_address=property_meta["address"]
        )
        searcher.ordnance_survey_client.property_type = property_type_lookup[property_meta["T1_AssetType"]][
            "property-type"]
        searcher.ordnance_survey_client.built_form = property_type_lookup[property_meta["T1_AssetType"]]["built-form"]
        searcher.find_property(skip_os=True)
        if searcher.newest_epc is None:
            nodata.append(property_meta)
            continue
        if searcher.newest_epc.get("estimated"):
            # We insert the row ID as our proxy for UPRN
            proxy_uprn = int(property_meta["row_id"].split("_")[1])
            searcher.newest_epc["uprn"] = proxy_uprn
        newest_epc = searcher.newest_epc
        older_epcs = searcher.older_epcs
        full_sap_epc = searcher.full_sap_epc
        # We also want to get the penultimate epc
        # penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
        # if not penultimate_epc:
        #     penultimate_epc = newest_epc
        eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
        eligibility.check_gbis_warmfront()
        eligibility.check_eco4_warmfront()
        # if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront):
        #     eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
        #     eligibility.check_gbis_warmfront()
        #     eligibility.check_eco4_warmfront()
        #     # If this is the case, we need to update the older epcs
        #     # We don't update just to make data cleaning easier
        #     if penultimate_epc.get("estimated") is None:
        #         older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
        # If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity
        # Loft MUST be suitable
        cavity_age = None
        if (
            eligibility.walls["is_cavity_wall"] and
            eligibility.walls["is_filled_cavity"] and
            eligibility.loft["suitability"] and
            eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
        ):
            # We check the age of the cavity and if it's particularly old, we flag it
            cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
        # Full checks
        eligibility.check_gbis()
        eligibility.check_eco4()
        if eligibility.eco4_warmfront["eligible"]:
            if eligibility.epc["uprn"] in ["", None]:
                eligibility.epc["uprn"] = int(property_meta["row_id"].split("_")[1])
            if eligibility.epc["construction-age-band"] in ["", None]:
                eligibility.epc["construction-age-band"] = map_year_to_age_band(property_meta["Build Yr"])
            # This is not the right place to do this but this is temp
            if eligibility.epc["extension-count"] in ["", None]:
                eligibility.epc["extension-count"] = 0
            # Not in the right place but temp
            if eligibility.epc["built-form"] in ["", None]:
                if not older_epcs:
                    eligibility.epc["built-form"] = "Mid-Terrace"
            scoring_dictionary = prepare_model_data_row(
                property_id=property_meta["row_id"],
                modelling_epc=eligibility.epc,
                cleaned=cleaned,
                cleaning_data=cleaning_data,
                created_at=created_at,
                old_data=older_epcs,
                full_sap_epc=full_sap_epc,
                photo_supply_lookup=photo_supply_lookup,
                floor_area_decile_thresholds=floor_area_decile_thresholds,
            )
            scoring_data.extend(scoring_dictionary)
        results.append(
            {
                "row_id": property_meta["row_id"],
                "uprn": eligibility.epc["uprn"],
                "Address": property_meta["T1_Address"],
                "Postcode": property_meta["postcode"],
                "property_type": eligibility.epc["property-type"],
                "gbis_eligible": eligibility.gbis_warmfront,
                "eco4_eligible": eligibility.eco4_warmfront["eligible"],
                "eco4_message": eligibility.eco4_warmfront["message"],
                "sap": float(eligibility.epc["current-energy-efficiency"]),
                "gbis_eligible_future": eligibility.gbis["eligible"],
                "gbis_eligible_future_message": eligibility.gbis["message"],
                "eco4_eligible_future": eligibility.eco4["eligible"],
                "eco4_eligible_future_message": eligibility.eco4["message"],
                # Property components
                "roof": eligibility.roof["clean_description"],
                "walls": eligibility.walls["clean_description"],
                "cavity_type": eligibility.cavity["type"],
                "heating": eligibility.epc["mainheat-description"],
                "tenure": eligibility.tenure,
                "date_epc": eligibility.epc["lodgement-date"],
                "cavity_age": cavity_age,
                **eligibility.walls,
                **eligibility.roof,
            }
        )
    scoring_df = pd.DataFrame(scoring_data)
    # Perform the same cleaning as in the model - first clean number of room variables though
    scoring_df = DataProcessor.apply_averages_cleaning(
        data_to_clean=scoring_df,
        cleaning_data=cleaning_data,
        cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
        colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
    )
    scoring_df = DataProcessor.apply_averages_cleaning(
        data_to_clean=scoring_df,
        cleaning_data=cleaning_data,
        cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
    ).drop(columns=["LOCAL_AUTHORITY"])
    scoring_df = DataProcessor.clean_missings_after_description_process(
        scoring_df,
        ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
            "insulation_thickness" in c) or ("ENERGY_EFF" in c)]
    )
    scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
    scoring_df["UPRN"] = scoring_df["UPRN"].astype(int)
    model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
    all_predictions = model_api.predict_all(
        df=scoring_df,
        bucket="retrofit-data-dev",
        prediction_buckets={
            "sap_change_predictions": "retrofit-sap-predictions-dev",
            "heat_demand_predictions": "retrofit-heat-predictions-dev",
            "carbon_change_predictions": "retrofit-carbon-predictions-dev"
        }
    )
    predictions = all_predictions["sap_change_predictions"].copy()
    results_df = pd.DataFrame(results)
    predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
        results_df[["row_id", "sap"]], how="left", on="row_id"
    )
    predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
    predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
    results_df = results_df.merge(
        predictions[["sap_uplift", "row_id"]],
        how="left",
        on="row_id"
    )
    results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
    eligibility_assessment = []
    for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
        # The upgrade requirements are dependent on the current SAP
        # If the property is an F or G, it only needs to upgrade to an %
        if row["sap"] <= 38:
            if row["post_install_sap"] >= 57:
                eligibility_classification = "highest confidence"
            elif row["post_install_sap"] >= 55:
                eligibility_classification = "high confidence"
            elif row["post_install_sap"] >= 53:
                eligibility_classification = "medium confidence"
            else:
                eligibility_classification = "unlikely"
        else:
            if row["post_install_sap"] >= 71:
                eligibility_classification = "highest confidence"
            elif row["post_install_sap"] >= 69:
                eligibility_classification = "high confidence"
            elif row["post_install_sap"] >= 67:
                eligibility_classification = "medium confidence"
            else:
                eligibility_classification = "unlikely"
        eligibility_assessment.append(
            {
                "row_id": row["row_id"],
                "eligibility_classification": eligibility_classification
            }
        )
    eligibility_assessment = pd.DataFrame(eligibility_assessment)
    results_df = results_df.merge(
        eligibility_assessment, how="left", on="row_id"
    )
    return results_df, scoring_data, nodata
 def get_epc_data_for_lost_surveys(
    lost_identified_properties, cleaned, cleaning_data, created_at, photo_supply_lookup,
    floor_area_decile_thresholds
 ):
    lost_identified_properties["row_id"] = [
        "lost_surveys_ha25_" + str(i) for i in range(0, len(lost_identified_properties))
    ]
    scoring_data = []
    results = []
    nodata = []
    property_type_lookup = {
        "MID-TERRACE": {"property-type": "House", "built-form": "Mid-Terrace"},
        "N/A": {"property-type": "House", "built-form": None},
        "END-TERRACE": {"property-type": "House", "built-form": "End-Terrace"},
        "GROUND-FLOOR": {"property-type": "House", "built-form": None},
        "TOP-FLOOR": {"property-type": "House", "built-form": None},
        "SEMI-DETACHED": {"property-type": "House", "built-form": "Semi-Detached"},
        "MID-FLOOR": {"property-type": "House", "built-form": None},
        "TOP-FLOOR FLAT": {"property-type": "House", "built-form": None},
        "DETACHED": {"property-type": "House", "built-form": "Detached"},
        "MID-FLOOR FLAT": {"property-type": "House", "built-form": None},
        "SEMI- DETACHED": {"property-type": "House", "built-form": "Semi-Detached"},
        "NO EPC ON GOV": {"property-type": "House", "built-form": None},
        "Top-floor flat": {"property-type": "House", "built-form": None},
        "GROUND-FLOOR FLAT": {"property-type": "House", "built-form": None},
        "NOT ON GOV SITE": {"property-type": "House", "built-form": None}
    }
    for _, property_meta in tqdm(lost_identified_properties.iterrows(), total=len(lost_identified_properties)):
        if property_meta["POSTCODE"] is None:
            continue
        full_address = ", ".join(
            [str(x) for x in [
                property_meta["NO"], property_meta["ADDRESS 1"], property_meta["ADDRESS 2"], property_meta["ADDRESS 3"]
            ] if x is not None]
        )
        searcher = SearchEpc(
            address1=str(property_meta["NO"]),
            postcode=property_meta["POSTCODE"],
            auth_token=EPC_AUTH_TOKEN,
            os_api_key=None,
            full_address=full_address
        )
        property_type_key = property_meta["PROPERTY TYPE"]
        if property_type_key is not None:
            searcher.ordnance_survey_client.property_type = property_type_lookup[property_type_key.strip()][
                "property-type"]
            searcher.ordnance_survey_client.built_form = property_type_lookup[property_type_key.strip()][
                "built-form"]
        searcher.find_property(skip_os=True)
        if searcher.newest_epc is None:
            nodata.append(property_meta)
            continue
        if searcher.newest_epc.get("estimated"):
            # We insert the row ID as our proxy for UPRN
            proxy_uprn = int(property_meta["row_id"].split("_")[-1])
            searcher.newest_epc["uprn"] = proxy_uprn
        newest_epc = searcher.newest_epc
        older_epcs = searcher.older_epcs
        full_sap_epc = searcher.full_sap_epc
        # We also want to get the penultimate epc
        penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
        if not penultimate_epc:
            penultimate_epc = newest_epc
        eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
        eligibility.check_gbis_warmfront()
        eligibility.check_eco4_warmfront()
        if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront):
            eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
            eligibility.check_gbis_warmfront()
            eligibility.check_eco4_warmfront()
            # If this is the case, we need to update the older epcs
            # We don't update just to make data cleaning easier
            if penultimate_epc.get("estimated") is None:
                older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
        # Full checks
        eligibility.check_gbis()
        eligibility.check_eco4()
        if eligibility.eco4_warmfront["eligible"] & (eligibility.epc["construction-age-band"] not in ["", None]):
            if eligibility.epc["uprn"] in ["", None]:
                eligibility.epc["uprn"] = int(property_meta["row_id"].split("_")[1])
            scoring_dictionary = prepare_model_data_row(
                property_id=property_meta["row_id"],
                modelling_epc=eligibility.epc,
                cleaned=cleaned,
                cleaning_data=cleaning_data,
                created_at=created_at,
                old_data=older_epcs,
                full_sap_epc=full_sap_epc,
                photo_supply_lookup=photo_supply_lookup,
                floor_area_decile_thresholds=floor_area_decile_thresholds,
            )
            scoring_data.extend(scoring_dictionary)
        results.append(
            {
                "row_id": property_meta["row_id"],
                "uprn": eligibility.epc["uprn"],
                "Address": property_meta["ADDRESS 1"],
                "Postcode": property_meta["POSTCODE"],
                "property_type": eligibility.epc["property-type"],
                "gbis_eligible": eligibility.gbis_warmfront,
                "eco4_eligible": eligibility.eco4_warmfront["eligible"],
                "eco4_message": eligibility.eco4_warmfront["message"],
                "sap": float(eligibility.epc["current-energy-efficiency"]),
                "gbis_eligible_future": eligibility.gbis["eligible"],
                "gbis_eligible_future_message": eligibility.gbis["message"],
                "eco4_eligible_future": eligibility.eco4["eligible"],
                "eco4_eligible_future_message": eligibility.eco4["message"],
                # Property components
                "roof": eligibility.roof["clean_description"],
                "walls": eligibility.walls["clean_description"],
                "cavity_type": eligibility.cavity["type"],
                "heating": eligibility.epc["mainheat-description"],
                "tenure": eligibility.tenure,
                "date_epc": eligibility.epc["lodgement-date"],
                **eligibility.walls,
                **eligibility.roof,
            }
        )
    scoring_df = pd.DataFrame(scoring_data)
    # Perform the same cleaning as in the model - first clean number of room variables though
    scoring_df = DataProcessor.apply_averages_cleaning(
        data_to_clean=scoring_df,
        cleaning_data=cleaning_data,
        cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
        colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
    )
    scoring_df = DataProcessor.apply_averages_cleaning(
        data_to_clean=scoring_df,
        cleaning_data=cleaning_data,
        cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
    ).drop(columns=["LOCAL_AUTHORITY"])
    scoring_df = DataProcessor.clean_missings_after_description_process(
        scoring_df,
        ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
            "insulation_thickness" in c) or ("ENERGY_EFF" in c)]
    )
    scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
    scoring_df["UPRN"] = scoring_df["UPRN"].astype(int)
    model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
    all_predictions = model_api.predict_all(
        df=scoring_df,
        bucket="retrofit-data-dev",
        prediction_buckets={
            "sap_change_predictions": "retrofit-sap-predictions-dev",
            "heat_demand_predictions": "retrofit-heat-predictions-dev",
            "carbon_change_predictions": "retrofit-carbon-predictions-dev"
        }
    )
    predictions = all_predictions["sap_change_predictions"].copy()
    results_df = pd.DataFrame(results)
    predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
        results_df[["row_id", "sap"]], how="left", on="row_id"
    )
    predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
    predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
    results_df = results_df.merge(
        predictions[["sap_uplift", "row_id"]],
        how="left",
        on="row_id"
    )
    results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
    eligibility_assessment = []
    for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
        # The upgrade requirements are dependent on the current SAP
        # If the property is an F or G, it only needs to upgrade to an %
        if row["sap"] <= 38:
            if row["post_install_sap"] >= 57:
                eligibility_classification = "highest confidence"
            elif row["post_install_sap"] >= 55:
                eligibility_classification = "high confidence"
            elif row["post_install_sap"] >= 53:
                eligibility_classification = "medium confidence"
            else:
                eligibility_classification = "unlikely"
        else:
            if row["post_install_sap"] >= 71:
                eligibility_classification = "highest confidence"
            elif row["post_install_sap"] >= 69:
                eligibility_classification = "high confidence"
            elif row["post_install_sap"] >= 67:
                eligibility_classification = "medium confidence"
            else:
                eligibility_classification = "unlikely"
        eligibility_assessment.append(
            {
                "row_id": row["row_id"],
                "eligibility_classification": eligibility_classification
            }
        )
    eligibility_assessment = pd.DataFrame(eligibility_assessment)
    results_df = results_df.merge(
        eligibility_assessment, how="left", on="row_id"
    )
    return results_df, scoring_data, nodata
 def analyse_results(results_df, data, eco4_prospects_survey_list):
    analysis_data = data[["row_id", "survey_key", "warmfront_identified"]].merge(
        results_df, how="left", on="row_id"
    )
    analysis_data = analysis_data.merge(
        eco4_prospects_survey_list[["survey_key", "ADDRESS 1", "NO", "POSTCODE"]],
        how="left", on="survey_key"
    )
    # NEW
    analysis_data["roof_insulation_thickness"] = np.where(
        pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
    )
    analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
        lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
    )
    warmfront_identified = analysis_data[
        (analysis_data["warmfront_identified"] == True)
    ]  # 2204
    # Because we don't know which property is for which scheme, we'll just look at what we found
    ideal_eco4 = analysis_data[
        (analysis_data["eco4_eligible"] == True) &
        (analysis_data["roof_insulation_thickness_numeric"] <= 100) &
        (analysis_data["sap"] <= 54)
        ]  # 335
    gbis = analysis_data[
        (analysis_data["gbis_eligible"] == True) &
        ~analysis_data["row_id"].isin(ideal_eco4["row_id"].values)
        ]
    ideal_eco4 = ideal_eco4[ideal_eco4["sap"] <= 54]
 def analyse_lost_surveys(results_df):
    results_df["roof_insulation_thickness"] = np.where(
        pd.isnull(results_df["roof_insulation_thickness"]), None, results_df["roof_insulation_thickness"]
    )
    results_df["roof_insulation_thickness_numeric"] = results_df["roof_insulation_thickness"].apply(
        lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
    )
    ideal_eco4 = results_df[
        (results_df["eco4_eligible"] == True) &
        (results_df["roof_insulation_thickness_numeric"] <= 100) &
        (results_df["sap"] <= 54)
        ]  # 25
    gbis = results_df[
        (results_df["gbis_eligible"] == True) &
        ~results_df["row_id"].isin(ideal_eco4["row_id"].values)
        ]  # 82
 def app():
    data, eco4_prospects_survey_list, lost_identified_properties = load_data()
    data["row_id"] = ["ha25_" + str(i) for i in range(0, len(data))]
    cleaned = read_from_s3(
        s3_file_name="cleaned_epc_data/cleaned.bson",
        bucket_name="retrofit-data-dev"
    )
    cleaned = msgpack.unpackb(cleaned, raw=False)
    cleaning_data = read_dataframe_from_s3_parquet(
        bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
    )
    created_at = datetime.now().isoformat()
    photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
    results_df, scoring_data, nodata = get_epc_data(
        data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds
    )
    # Pickle the outputs
    # Old data was ha25.pickle
    # import pickle
    # with open("ha25_10_jan.pickle", "wb") as f:
    #     pickle.dump(
    #         {
    #             "results_df": results_df,
    #             "scoring_data": scoring_data,
    #             "nodata": nodata
    #         },
    #         f
    #     )
    # Load in pickle
    import pickle
    with open("ha25_10_jan.pickle", "rb") as f:
        saved = pickle.load(f)
    results_df = saved["results_df"]
    scoring_data = saved["scoring_data"]
    nodata = saved["nodata"]
--- a/etl/eligibility/ha_15_32/ha33_app.py
+++ b/etl/eligibility/ha_15_32/ha33_app.py
@ -264,21 +264,21 @@ def get_ha_33data(data, cleaned, cleaning_data, created_at):
 def analyse_ha_33(results_df, data):
-    results_df_social = results_df[results_df["tenure"] == "Rented (social)"]
+    # results_df_social = results_df[results_df["tenure"] == "Rented (social)"]
    #
    # results_df_social["tenure"].value_counts()
-    results_df_social["tenure"].value_counts()
+    data[data["row_id"].isin(results_df["row_id"].values)]["PROPERTY TYPE"].value_counts()
-    data[data["row_id"].isin(results_df_social["row_id"].values)]["PROPERTY TYPE"].value_counts()
+    n_identified = (results_df["gbis_eligible"] | results_df["eco4_eligible"]).sum()
    n_eco4 = results_df["eco4_eligible"].sum()
    n_gbis = results_df[~results_df["eco4_eligible"]]["gbis_eligible"].sum()
-    n_identified = (results_df_social["gbis_eligible"] | results_df_social["eco4_eligible"]).sum()
+    eco_eligibile = results_df[results_df["eco4_eligible"]]
    n_eco4 = results_df_social["eco4_eligible"].sum()
    n_gbis = results_df_social[~results_df_social["eco4_eligible"]]["gbis_eligible"].sum()
    eco_eligibile = results_df_social[results_df_social["eco4_eligible"]]
    eco_eligibile["walls"].value_counts()
    eco_eligibile["roof"].value_counts()
-    results_df_social[results_df_social["gbis_eligible"] | results_df_social["eco4_eligible"]]["tenure"].value_counts()
+    results_df[results_df["gbis_eligible"] | results_df["eco4_eligible"]]["tenure"].value_counts()
    results_df_social["eligibility_classification"].value_counts()
@ -316,3 +316,11 @@ def app():
    created_at = datetime.now().isoformat()
    results_df, _, _ = get_ha_33data(data, cleaned, cleaning_data, created_at)
    # Read in
    import pickle
    with open("ha33_results.pickle", "rb") as f:
        data = pickle.load(f)
    results_df = pd.DataFrame(data["results"])
    scoring_data = data["scoring_data"]
    nodata = data["nodata"]
--- a/etl/eligibility/ha_15_32/ha4_app.py
+++ b/etl/eligibility/ha_15_32/ha4_app.py
@ -0,0 +1,328 @@
 import os
 import msgpack
 from pathlib import Path
 from datetime import datetime
 import numpy as np
 import pandas as pd
 from utils.s3 import read_from_s3
 from utils.logger import setup_logger
 from dotenv import load_dotenv
 from utils.s3 import read_dataframe_from_s3_parquet
 from tqdm import tqdm
 from backend.SearchEpc import SearchEpc
 from etl.eligibility.Eligibility import Eligibility
 from etl.eligibility.ha_15_32.app import prepare_model_data_row
 from etl.epc.DataProcessor import DataProcessor
 from etl.epc.settings import COLUMNS_TO_MERGE_ON
 from backend.ml_models.api import ModelApi
 from etl.solar.SolarPhotoSupply import SolarPhotoSupply
 from recommendations.recommendation_utils import calculate_cavity_age
 from recommendation_utils import convert_thickness_to_numeric
 import re
 EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
 ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
 logger = setup_logger()
 load_dotenv(ENV_FILE)
 def load_ha_4():
    pd.set_option('display.max_rows', 500)
    pd.set_option('display.max_columns', 500)
    pd.set_option('display.width', 1000)
    data = pd.read_csv(f"etl/eligibility/ha_15_32/HA 4 Asset List.csv", low_memory=False)
    return data
 def standardise_ha_4(data):
    # Location name contains some strings like {0664} which we remove
    data['Location Name'] = data['Location Name'].str.replace('\{.*?\}', '', regex=True)
    # Trim whitespace from either end of location name
    data["Location Name"] = data["Location Name"].str.strip()
    # Remove any unusable postcodes
    data = data[data["Post Code"] != '\\\\'].copy()
    # Some specific replacements
    data["Location Name"] = np.where(
        data["Location Name"] == "Calderbrook Pl & Cog La",
        "Calderbrook Place",
        data["Location Name"]
    )
    return data
 def get_ha_4_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
    scoring_data = []
    results = []
    nodata = []
    for _, property_meta in tqdm(data.iterrows(), total=len(data)):
        # For many of the entries in this dataset, we're actually given an entire building, so we EPCs for every
        # building
        searcher = SearchEpc(
            address1=property_meta["Address Line 1"],
            postcode=property_meta["Post Code"],
            auth_token=EPC_AUTH_TOKEN,
            os_api_key=None,
            property_type=property_type_lookup.get(house["Archetype"]),
        )
        searcher.find_property(skip_os=True)
        if searcher.newest_epc is None:
            searcher = SearchEpc(
                address1=property_meta["Location Name"],
                postcode=property_meta["Post Code"],
                auth_token=EPC_AUTH_TOKEN,
                os_api_key=None,
                property_type=property_type_lookup.get(house["Archetype"]),
            )
            searcher.search()
        if searcher.newest_epc is None:
            nodata.append(house["row_id"])
            continue
        newest_epc = searcher.newest_epc
        older_epcs = searcher.older_epcs
        full_sap_epc = searcher.full_sap_epc
        searcher.search()
        if searcher.data is None:
            nodata.append(property_meta.to_dict())
            continue
        epcs = searcher.data["rows"]
        epcs = pd.DataFrame(epcs)
        # Take the newest EPC by UPRN
        epcs = epcs.sort_values(by=["lodgement-date"], ascending=False)
        newest_epcs = epcs.drop_duplicates(subset=["uprn"], keep="first")
        # For each EPC, we now check eligibility
        for _, epc in newest_epcs.iterrows():
            eligibility = Eligibility(epc=epc.to_dict(), cleaned=cleaned)
            eligibility.check_gbis_warmfront()
            eligibility.check_eco4_warmfront()
            # If the house is not identified, we do a full gbis and eco4 check
            eligibility.check_gbis()
            eligibility.check_eco4()
            if eligibility.eco4_warmfront["eligible"]:
                # We get old_eps
                old_data = epcs[
                    (epcs["uprn"] == epc["uprn"]) &
                    (epcs["lmk-key"] != epc["lmk-key"])
                    ].to_dict("records")
                full_sap_epc = epcs[
                    (epcs["uprn"] == epc["uprn"]) &
                    (epcs["transaction-type"] == "new dwelling")
                    ].to_dict("records")
                scoring_dictionary = prepare_model_data_row(
                    property_id=eligibility.epc["uprn"],
                    modelling_epc=eligibility.epc,
                    cleaned=cleaned,
                    cleaning_data=cleaning_data,
                    created_at=created_at,
                    old_data=old_data,
                    full_sap_epc=full_sap_epc
                )
                scoring_data.extend(scoring_dictionary)
            results.append(
                {
                    "uprn": epc["uprn"],
                    "Location Name": property_meta["Location Name"],
                    "Post Code": property_meta["Post Code"],
                    "property_type": eligibility.epc["property-type"],
                    "gbis_eligible": eligibility.gbis_warmfront,
                    "eco4_eligible": eligibility.eco4_warmfront["eligible"],
                    "eco4_message": eligibility.eco4_warmfront["message"],
                    "sap": float(eligibility.epc["current-energy-efficiency"]),
                    "gbis_eligible_future": eligibility.gbis["eligible"],
                    "gbis_eligible_future_message": eligibility.gbis["message"],
                    "eco4_eligible_future": eligibility.eco4["eligible"],
                    "eco4_eligible_future_message": eligibility.eco4["message"],
                    # Property components
                    "roof": eligibility.roof["clean_description"],
                    "walls": eligibility.walls["clean_description"],
                    "cavity_type": eligibility.cavity["type"],
                    "heating": eligibility.epc["mainheat-description"],
                    "tenure": eligibility.tenure,
                    "date_epc": eligibility.epc["lodgement-date"],
                }
            )
    scoring_df = pd.DataFrame(scoring_data)
    # Perform the same cleaning as in the model - first clean number of room variables though
    scoring_df = DataProcessor.apply_averages_cleaning(
        data_to_clean=scoring_df,
        cleaning_data=cleaning_data,
        cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
        colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
    )
    scoring_df = DataProcessor.apply_averages_cleaning(
        data_to_clean=scoring_df,
        cleaning_data=cleaning_data,
        cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
    ).drop(columns=["LOCAL_AUTHORITY"])
    scoring_df = DataProcessor.clean_missings_after_description_process(
        scoring_df,
        ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
            "insulation_thickness" in c) or ("ENERGY_EFF" in c)]
    )
    scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
    model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
    all_predictions = model_api.predict_all(
        df=scoring_df,
        bucket="retrofit-data-dev",
        prediction_buckets={
            "sap_change_predictions": "retrofit-sap-predictions-dev",
            "heat_demand_predictions": "retrofit-heat-predictions-dev",
            "carbon_change_predictions": "retrofit-carbon-predictions-dev"
        }
    )
    predictions = all_predictions["sap_change_predictions"].copy()
    results_df = pd.DataFrame(results)
    predictions = predictions.rename(columns={"property_id": "uprn"}).merge(
        results_df[["uprn", "sap"]], how="left", on="uprn"
    )
    predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
    predictions = predictions.groupby("uprn")["sap_uplift"].sum().reset_index()
    results_df = results_df.merge(
        predictions[["sap_uplift", "uprn"]],
        how="left",
        on="uprn"
    )
    results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
    results_df = results_df[~pd.isnull(results_df["uprn"])]
    eligibility_assessment = []
    for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
        # The upgrade requirements are dependent on the current SAP
        # If the property is an F or G, it only needs to upgrade to an %
        if row["sap"] <= 38:
            if row["post_install_sap"] >= 57:
                eligibility_classification = "highest confidence"
            elif row["post_install_sap"] >= 55:
                eligibility_classification = "high confidence"
            elif row["post_install_sap"] >= 53:
                eligibility_classification = "medium confidence"
            else:
                eligibility_classification = "unlikely"
        else:
            if row["post_install_sap"] >= 71:
                eligibility_classification = "highest confidence"
            elif row["post_install_sap"] >= 69:
                eligibility_classification = "high confidence"
            elif row["post_install_sap"] >= 67:
                eligibility_classification = "medium confidence"
            else:
                eligibility_classification = "unlikely"
        eligibility_assessment.append(
            {
                "uprn": row["uprn"],
                "eligibility_classification": eligibility_classification
            }
        )
    eligibility_assessment = pd.DataFrame(eligibility_assessment)
    results_df = results_df.merge(
        eligibility_assessment, how="left", on="uprn"
    )
    # We have some properties that are duplicated so we take just one instance
    results_df = results_df.drop_duplicates(subset=["uprn"])
    return results_df, scoring_data, nodata
 def analyse_ha_4(results_df, data):
    n_identified = (results_df["gbis_eligible"] | results_df["eco4_eligible"]).sum()
    n_eco4 = results_df["eco4_eligible"].sum()
    n_gbis = results_df[~results_df["eco4_eligible"]]["gbis_eligible"].sum()
    eco_eligibile = results_df[results_df["eco4_eligible"]]
    eco_eligibile["eligibility_classification"].value_counts()
    future_possibilities_eco = results_df[
        (results_df["eco4_eligible_future"] == True) & (~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
        ].copy()
    future_possibilities_gbis = results_df[
        (results_df["gbis_eligible_future"] == True) & (results_df["eco4_eligible_future"] == False) & (
            ~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
        ].copy()
    total_future_possibilities = future_possibilities_eco.shape[0] + future_possibilities_gbis.shape[0]
 def app():
    data = load_ha_4()
    data = standardise_ha_4(data)
    data["row_id"] = ["h4" + str(i) for i in range(0, len(data))]
    cleaned = read_from_s3(
        s3_file_name="cleaned_epc_data/cleaned.bson",
        bucket_name="retrofit-data-dev"
    )
    cleaned = msgpack.unpackb(cleaned, raw=False)
    cleaning_data = read_dataframe_from_s3_parquet(
        bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
    )
    created_at = datetime.now().isoformat()
    photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
    results_df, scoring_data, nodata = get_ha_4_data(
        data=data,
        cleaned=cleaned,
        cleaning_data=cleaning_data,
        created_at=created_at,
        photo_supply_lookup=photo_supply_lookup,
        floor_area_decile_thresholds=floor_area_decile_thresholds
    )
    # Store the data locally as a pickle
    # import pickle
    # with open("ha_4.pickle", "wb") as f:
    #     pickle.dump(
    #         {
    #             "results_df": results_df,
    #             "scoring_data": scoring_data,
    #             "nodata": nodata
    #         }, f)
    # Read in
    # import pickle
    # with open("ha_4.pickle", "rb") as f:
    #     data = pickle.load(f)
    # results_df = data["results_df"]
    # scoring_data = data["scoring_data"]
    # nodata = data["nodata"]
--- a/etl/eligibility/ha_15_32/ha7_app.py
+++ b/etl/eligibility/ha_15_32/ha7_app.py
@ -0,0 +1,383 @@
 import os
 import msgpack
 import openpyxl
 from openpyxl.styles.colors import COLOR_INDEX
 from pathlib import Path
 from datetime import datetime
 import pandas as pd
 import numpy as np
 from utils.s3 import read_from_s3, read_dataframe_from_s3_parquet
 from utils.logger import setup_logger
 from dotenv import load_dotenv
 from tqdm import tqdm
 from backend.SearchEpc import SearchEpc
 from etl.eligibility.Eligibility import Eligibility
 from etl.eligibility.ha_15_32.app import prepare_model_data_row
 from etl.epc.DataProcessor import DataProcessor
 from etl.epc.settings import COLUMNS_TO_MERGE_ON
 from backend.ml_models.api import ModelApi
 from etl.solar.SolarPhotoSupply import SolarPhotoSupply
 from recommendations.recommendation_utils import calculate_cavity_age
 from recommendation_utils import convert_thickness_to_numeric
 ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
 logger = setup_logger()
 load_dotenv(ENV_FILE)
 EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
 OS_API_KEY = os.getenv("ORDNANCE_SURVEY_API_KEY")
 def load_data():
    """
    Load the data from the excel
    """
    workbook = openpyxl.load_workbook('etl/eligibility/ha_15_32/HESTIA - HA 7 ASSET LIST.xlsx')
    sheet = workbook.active
    # Prepare lists to collect rows data and their colors
    rows_data = []
    rows_colors = []
    for row in sheet.iter_rows(min_row=2, values_only=False):  # Assuming the first row is headers
        row_data = [cell.value for cell in row]  # This will get you the cell values
        row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
        row_color = COLOR_INDEX[row_color]
        rows_data.append(row_data)
        rows_colors.append(row_color)
    df = pd.DataFrame(rows_data, columns=[cell.value for cell in sheet[1]])
    # Add the row colors as a new column
    df['row_color'] = rows_colors
    df.columns.values[8] = "is_active"
    # Remove None columns
    df = df.dropna(axis=1, how='all')
    # We now parse the colours
    df["row_color"].unique()
    df["row_colour_name"] = np.where(
        df["row_color"] == "0000FFFF", "red",
        np.where(df["row_color"] == "00FF00FF", "green", "yellow")
    )
    df["row_code"] = np.where(
        df["row_colour_name"] == "red", "invalid",
        np.where(df["row_colour_name"] == "green", "potential ECO4", "needs criteria change")
    )
    return df
 def get_ha7_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
    property_type_lookup = {
        # "Mid Terrace": "Mid-Terrace",
        # "End Terrace": "End-Terrace",
        # "Semi Detached": "Semi-Detached",
        # "Detached": "Detached",
        "House": "House",
        "Flat": "Flat",
        "Bungalow": "Bungalow",
        "Maisonette": "Maisonette",
    }
    scoring_data = []
    results = []
    nodata = []
    for _, house in tqdm(data.iterrows(), total=len(data)):
        if house["Address"]:
            address = house["Address"]
        else:
            address = house["Address2"]
        searcher = SearchEpc(
            address1=address,
            postcode=house["Postcode"],
            auth_token=EPC_AUTH_TOKEN,
            os_api_key=None,
            property_type=property_type_lookup.get(house["Archetype"]),
        )
        searcher.find_property(skip_os=True)
        if searcher.newest_epc is None:
            nodata.append(house["row_id"])
            continue
        newest_epc = searcher.newest_epc
        older_epcs = searcher.older_epcs
        full_sap_epc = searcher.full_sap_epc
        eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
        eligibility.check_gbis_warmfront()
        eligibility.check_eco4_warmfront()
        # If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity
        # Loft MUST be suitable
        cavity_age = None
        if (
            eligibility.walls["is_cavity_wall"] and
            eligibility.walls["is_filled_cavity"] and
            eligibility.loft["suitability"] and
            eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
        ):
            # We check the age of the cavity and if it's particularly old, we flag it
            cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
        # If the house is not identified, we do a full gbis and eco4 check
        eligibility.check_gbis()
        eligibility.check_eco4()
        if eligibility.eco4_warmfront["eligible"]:
            scoring_dictionary = prepare_model_data_row(
                property_id=house["row_id"],
                modelling_epc=eligibility.epc,
                cleaned=cleaned,
                cleaning_data=cleaning_data,
                created_at=created_at,
                old_data=older_epcs,
                full_sap_epc=full_sap_epc,
                photo_supply_lookup=photo_supply_lookup,
                floor_area_decile_thresholds=floor_area_decile_thresholds
            )
            scoring_data.extend(scoring_dictionary)
        # If nothing is eligible or gbis is eligible, then we make a record this
        results.append(
            {
                "row_id": house["row_id"],
                "address": house["Address"],
                "postcode": house["Postcode"],
                "gbis_eligible": eligibility.gbis_warmfront,
                "eco4_eligible": eligibility.eco4_warmfront["eligible"],
                "eco4_message": eligibility.eco4_warmfront["message"],
                "sap": float(eligibility.epc["current-energy-efficiency"]),
                "gbis_eligible_future": eligibility.gbis["eligible"],
                "gbis_eligible_future_message": eligibility.gbis["message"],
                "eco4_eligible_future": eligibility.eco4["eligible"],
                "eco4_eligible_future_message": eligibility.eco4["message"],
                # Property components
                "roof": eligibility.roof["clean_description"],
                "walls": eligibility.walls["clean_description"],
                "heating": eligibility.epc["mainheat-description"],
                "tenure": eligibility.tenure,
                "date_epc": eligibility.epc["lodgement-date"],
                **newest_epc,
                "cavity_age": cavity_age,
                **eligibility.walls,
                **eligibility.roof,
            }
        )
    scoring_df = pd.DataFrame(scoring_data)
    # Implement the same process that is being used in the recommendation engine to cleaning scoring_df
    # Perform the same cleaning as in the model - first clean number of room variables though
    scoring_df = DataProcessor.apply_averages_cleaning(
        data_to_clean=scoring_df,
        cleaning_data=cleaning_data,
        cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
        colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
    )
    scoring_df = DataProcessor.apply_averages_cleaning(
        data_to_clean=scoring_df,
        cleaning_data=cleaning_data,
        cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
    ).drop(columns=["LOCAL_AUTHORITY"])
    scoring_df = DataProcessor.clean_missings_after_description_process(
        scoring_df,
        ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
            "insulation_thickness" in c) or ("ENERGY_EFF" in c)]
    )
    scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
    model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
    all_predictions = model_api.predict_all(
        df=scoring_df,
        bucket="retrofit-data-dev",
        prediction_buckets={
            "sap_change_predictions": "retrofit-sap-predictions-dev",
            "heat_demand_predictions": "retrofit-heat-predictions-dev",
            "carbon_change_predictions": "retrofit-carbon-predictions-dev"
        }
    )
    predictions = all_predictions["sap_change_predictions"].copy()
    results_df = pd.DataFrame(results)
    predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
        results_df[["row_id", "sap"]], how="left", on="row_id"
    )
    predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
    predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
    results_df = results_df.merge(
        predictions[["sap_uplift", "row_id"]],
        how="left",
        on="row_id"
    )
    results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
    eligibility_assessment = []
    for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
        # The upgrade requirements are dependent on the current SAP
        # If the property is an F or G, it only needs to upgrade to an %
        if row["sap"] <= 38:
            if row["post_install_sap"] >= 57:
                eligibility_classification = "highest confidence"
            elif row["post_install_sap"] >= 55:
                eligibility_classification = "high confidence"
            elif row["post_install_sap"] >= 53:
                eligibility_classification = "medium confidence"
            else:
                eligibility_classification = "unlikely"
        else:
            if row["post_install_sap"] >= 71:
                eligibility_classification = "highest confidence"
            elif row["post_install_sap"] >= 69:
                eligibility_classification = "high confidence"
            elif row["post_install_sap"] >= 67:
                eligibility_classification = "medium confidence"
            else:
                eligibility_classification = "unlikely"
        eligibility_assessment.append(
            {
                "row_id": row["row_id"],
                "eligibility_classification": eligibility_classification
            }
        )
    eligibility_assessment = pd.DataFrame(eligibility_assessment)
    results_df = results_df.merge(
        eligibility_assessment, how="left", on="row_id"
    )
    return results_df, scoring_data, nodata
 def analyse_ha_7(results_df, data):
    analysis_data = results_df.merge(
        data[["row_id", "row_code", "Property Type", "Construction Year Band"]], how="left", on="row_id"
    )
    analysis_data["row_code"].value_counts()
    # NEW
    analysis_data["roof_insulation_thickness"] = np.where(
        pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
    )
    analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
        lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
    )
    ideal_eco4 = analysis_data[
        (analysis_data["eco4_eligible"] == True) & (
            analysis_data["roof_insulation_thickness_numeric"] <= 100)
        ]
    secondary_eco4_warmfront_not_sold = analysis_data[
        (analysis_data["eco4_eligible"] == True) & (
            analysis_data["roof_insulation_thickness_numeric"] > 100)
        ]
    # underperforming cavities
    underperforming_cavities = analysis_data[
        (analysis_data["eco4_message"] == "Failed due to full cavity - check cavity age") & (
            analysis_data["cavity_age"] > 9 * 365
        ) & (analysis_data["roof_insulation_thickness_numeric"] <= 100)
        ]
    identified_gbis_not_sold = analysis_data[
        (analysis_data["gbis_eligible"] == True) & (
            analysis_data["eco4_eligible"] == False
        )
        ]
    wf_identified = analysis_data[
        (analysis_data["row_code"] == "potential ECO4")
    ]
    # END NEW
    warmfront_identification = analysis_data["row_code"].value_counts()
    warmfront_identified = analysis_data[analysis_data["row_code"] == "potential ECO4"]
    warmfront_identified["walls"].value_counts(normalize=True)
    analysis_data["Construction Year Band"].value_counts(normalize=True)
    # Number of days from today
    days_to_today = (datetime.now() - pd.to_datetime(warmfront_identified["date_epc"])).dt.days
    days_to_today.mean()
    property_types = analysis_data["Property Type"].value_counts()
    n_identified = (results_df["gbis_eligible"] | results_df["eco4_eligible"]).sum()
    eco_identified = results_df[results_df["eco4_eligible"]]
    n_eco4 = eco_identified["eco4_eligible"].sum()
    gbis_identified = results_df[~results_df["eco4_eligible"] & results_df["gbis_eligible"]]
    n_gbis = results_df[~results_df["eco4_eligible"]]["gbis_eligible"].sum()
    eco_eligibile = results_df[results_df["eco4_eligible"]]
    eco_eligibile["eligibility_classification"].value_counts()
    future_possibilities_eco = results_df[
        (results_df["eco4_eligible_future"] == True) & (~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
        ].copy()
    future_possibilities_gbis = results_df[
        (results_df["gbis_eligible_future"] == True) & (results_df["eco4_eligible_future"] == False) & (
            ~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
        ].copy()
    total_future_possibilities = future_possibilities_eco.shape[0] + future_possibilities_gbis.shape[0]
 def app():
    data = load_data()
    data["row_id"] = ["ha7" + str(i) for i in range(0, len(data))]
    cleaned = read_from_s3(
        s3_file_name="cleaned_epc_data/cleaned.bson",
        bucket_name="retrofit-data-dev"
    )
    cleaned = msgpack.unpackb(cleaned, raw=False)
    cleaning_data = read_dataframe_from_s3_parquet(
        bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
    )
    photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
    created_at = datetime.now().isoformat()
    results_df, scoring_data, nodata = get_ha7_data(
        data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds
    )
    # Pickle results
    # import pickle
    # with open("ha7_results_jan_10.pkl", "wb") as f:
    #     pickle.dump({"results_df": results_df, "scoring_data": scoring_data, "nodata": nodata}, f)
    # Read in the old data
    # import pickle
    # with open("ha7_results_jan_10.pkl", "rb") as f:
    #     old_data = pickle.load(f)
    # results_df = old_data["results_df"]
    # scoring_data = old_data["scoring_data"]
    # nodata = old_data["nodata"]
--- a/etl/epc/DataProcessor.py
+++ b/etl/epc/DataProcessor.py
@ -766,12 +766,16 @@ class EPCDataProcessor:
            how='left'
        )
        global_averages = cleaning_data[cols_to_clean].mean()
        # Fill NaN values with averages
        for col in cols_to_clean:
            data_to_clean[col].fillna(data_to_clean[f"{col}_AVERAGE"], inplace=True)
            data_to_clean.drop(columns=[f"{col}_AVERAGE"], inplace=True)
            # If we still have missings
            data_to_clean[col].fillna(data_to_clean[col].mean(), inplace=True)
            # Final step if we still have missings - use global mean
            data_to_clean[col].fillna(global_averages[col], inplace=True)
        return data_to_clean
--- a/etl/epc/property_change_app.py
+++ b/etl/epc/property_change_app.py
@ -23,6 +23,12 @@ def main():
    pd.DataFrame(epc_pipeline.compiled_all_equal_rows).to_parquet("refactor_datasets/all_equal_rows.parquet")
    pd.concat(epc_pipeline.compiled_cleaning_averages).to_parquet("refactor_datasets/cleaning_averages.parquet")
    from utils.s3 import read_dataframe_from_s3_parquet
    dataset = read_dataframe_from_s3_parquet(
        bucket_name="retrofit-data-dev",
        file_key="sap_change_model/dataset_test.parquet",
    )
 if __name__ == "__main__":
    main()
--- a/etl/epc_clean/epc_attributes/MainheatAttributes.py
+++ b/etl/epc_clean/epc_attributes/MainheatAttributes.py
@ -16,6 +16,7 @@ class MainHeatAttributes(Definitions):
        "solar assisted heat pump",
        "exhaust source heat pump",
        "community heat pump",
        "portable electric heating"
    ]
    FUEL_TYPES = ["electric", "mains gas", "wood logs", "coal", "oil", "wood pellets", "anthracite",
                  "dual fuel mineral and wood", "smokeless fuel", "lpg", "b30k"]
--- a/etl/epc_clean/epc_attributes/WallAttributes.py
+++ b/etl/epc_clean/epc_attributes/WallAttributes.py
@ -152,4 +152,7 @@ class WallAttributes(Definitions):
            else:
                result["insulation_thickness"] = "average"
        if result["is_cavity_wall"] & result["is_as_built"] & (result["insulation_thickness"] == "average"):
            result["is_filled_cavity"] = True
        return result
--- a/etl/epc_clean/epc_attributes/WindowAttributes.py
+++ b/etl/epc_clean/epc_attributes/WindowAttributes.py
@ -52,7 +52,7 @@ class WindowAttributes(Definitions):
                raise ValueError('Invalid description')
    def process(self) -> Dict[str, Union[str, bool]]:
-        result: Dict[str, Union[str, bool]] = {
+        result: Dict[str, Union[str, bool, None]] = {
            "has_glazing": False,
            "glazing_coverage": None,
            "glazing_type": None,
@ -80,7 +80,11 @@ class WindowAttributes(Definitions):
                        break
        # If we didn't find any coverage or type, we assume full coverage
-        if not result["glazing_coverage"]:
+        if (not result["glazing_coverage"]) & (result["glazing_type"] != "single"):
            result["glazing_coverage"] = "full"
        # We reset some values if the glazing is single
        if result["glazing_type"] == "single":
            result["has_glazing"] = False
        return result
--- a/etl/epc_clean/tests/test_data/test_mainheat_attributes_cases.py
+++ b/etl/epc_clean/tests/test_data/test_mainheat_attributes_cases.py
@ -1652,4 +1652,17 @@ mainheat_cases = [
     'has_electricaire': False, 'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False,
     "has_electric_heat_pumps": False,
     "has_micro-cogeneration": False},
    {'original_description': 'Portable electric heating assumed for most rooms', 'has_radiators': False,
     'has_fan_coil_units': False, 'has_pipes_in_screed_above_insulation': False,
     'has_pipes_in_insulated_timber_floor': False, 'has_pipes_in_concrete_slab': False, 'has_boiler': False,
     'has_air_source_heat_pump': False, 'has_room_heaters': False, 'has_electric_storage_heaters': False,
     'has_warm_air': False, 'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False,
     'has_community_scheme': False, 'has_ground_source_heat_pump': False, 'has_no_system_present': False,
     'has_portable_electric_heaters': False, 'has_water_source_heat_pump': False, 'has_electric_heat_pump': False,
     'has_micro-cogeneration': False, 'has_solar_assisted_heat_pump': False, 'has_exhaust_source_heat_pump': False,
     'has_community_heat_pump': False, 'has_portable_electric_heating': True, 'has_electric': True,
     'has_mains_gas': False, 'has_wood_logs': False, 'has_coal': False, 'has_oil': False, 'has_wood_pellets': False,
     'has_anthracite': False, 'has_dual_fuel_mineral_and_wood': False, 'has_smokeless_fuel': False, 'has_lpg': False,
     'has_b30k': False, 'has_assumed': True, 'has_electricaire': False, 'has_assumed_for_most_rooms': True,
     'has_underfloor_heating': False}
 ]
--- a/etl/epc_clean/tests/test_data/test_wall_attributes_cases.py
+++ b/etl/epc_clean/tests/test_data/test_wall_attributes_cases.py
@ -550,7 +550,7 @@ wall_cases = [
     'is_as_built': False, 'is_cob': False, 'is_assumed': False, 'is_sandstone_or_limestone': False,
     'insulation_thickness': None, 'external_insulation': False, 'internal_insulation': False},
    {'original_description': 'Cavity wall, as built, insulated (assumed)', 'thermal_transmittance': None,
-     'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': False, 'is_solid_brick': False,
+     'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': True, 'is_solid_brick': False,
     'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': True,
     'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average',
     'external_insulation': False, 'internal_insulation': False},
@ -727,7 +727,7 @@ wall_cases = [
     'external_insulation': False, 'internal_insulation': False},
    {'original_description': 'Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)',
     'thermal_transmittance': None,
-     'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': False, 'is_solid_brick': False,
+     'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': True, 'is_solid_brick': False,
     'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': True,
     'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average',
     'external_insulation': False, 'internal_insulation': False},
--- a/etl/epc_clean/tests/test_data/test_window_attributes_cases.py
+++ b/etl/epc_clean/tests/test_data/test_window_attributes_cases.py
@ -30,7 +30,8 @@ windows_cases = [
     'glazing_type': 'triple', 'no_data': False},
    {'original_description': 'Gwydrau triphlyg rhannol', 'has_glazing': True, 'glazing_coverage': 'partial',
     'glazing_type': 'triple', 'no_data': False},
-    {'original_description': 'Single glazed', 'has_glazing': True, 'glazing_coverage': 'full', 'glazing_type': 'single',
+    {'original_description': 'Single glazed', 'has_glazing': False, 'glazing_coverage': None,
     'glazing_type': 'single',
     'no_data': False},
    {'original_description': 'Some double glazing', 'has_glazing': True, 'glazing_coverage': 'partial',
     'glazing_type': 'double', 'no_data': False},
@ -46,7 +47,8 @@ windows_cases = [
     'glazing_type': 'double', 'no_data': False},
    {'original_description': 'Gwydrau dwbl gan mwyaf', 'has_glazing': True, 'glazing_coverage': 'most',
     'glazing_type': 'double', 'no_data': False},
-    {'original_description': 'Gwydrau sengl', 'has_glazing': True, 'glazing_coverage': 'full', 'glazing_type': 'single',
+    {'original_description': 'Gwydrau sengl', 'has_glazing': False, 'glazing_coverage': None,
     'glazing_type': 'single',
     'no_data': False},
    {'original_description': 'Ffenestri perfformiad uchel', 'has_glazing': True, 'glazing_coverage': 'full',
     'glazing_type': 'high performance', 'no_data': False},
--- a/etl/epc_clean/tests/test_roof_attributes.py
+++ b/etl/epc_clean/tests/test_roof_attributes.py
@ -3,12 +3,13 @@ from pathlib import Path
 from etl.epc_clean.tests.test_data.test_roof_attributes_cases import clean_roof_test_cases
 from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
 # For local testing
-if __file__ == "<input>":
+# if __file__ == "<input>":
-    input_data_path = Path("./model_data/tests/test_data/EpcClean_inputs.obj")
+#     input_data_path = Path("./model_data/tests/test_data/EpcClean_inputs.obj")
-else:
+# else:
-    current_file_path = Path(__file__)
+#     current_file_path = Path(__file__)
-    input_data_path = current_file_path.parent / 'test_data' / 'EpcClean_inputs.obj'
+#     input_data_path = current_file_path.parent / 'test_data' / 'EpcClean_inputs.obj'
 class TestRoofAttributes:
@ -88,7 +89,12 @@ class TestRoofAttributes:
    def test_clean_roof_no_description(self):
        roof = RoofAttributes('').process()
-        assert roof == {}
+        assert roof == {
            'thermal_transmittance': False, 'thermal_transmittance_unit': False, 'is_pitched': False,
            'is_roof_room': False, 'is_loft': False, 'is_flat': False, 'is_thatched': False,
            'is_at_rafters': False, 'is_assumed': False, 'has_dwelling_above': False, 'is_valid': False,
            'insulation_thickness': False
        }
    def test_clean_roof_edge_cases(self):
        # Insulation thickness edge case
--- a/etl/solar/SolarPhotoSupply.py
+++ b/etl/solar/SolarPhotoSupply.py
@ -0,0 +1,244 @@
 import pandas as pd
 from tqdm import tqdm
 from utils.s3 import save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet
 from utils.logger import setup_logger
 logger = setup_logger()
 class SolarPhotoSupply:
    DATASET_COLUMNS = [
        "UPRN", "PROPERTY_TYPE", "TENURE", "BUILT_FORM", "ROOF_DESCRIPTION", "PHOTO_SUPPLY", "TOTAL_FLOOR_AREA",
        "CONSTRUCTION_AGE_BAND", "SOLAR_WATER_HEATING_FLAG"
    ]
    def __init__(self, file_directories, cleaned_lookup):
        """
        Initialize the SolarPhotoSupply class with file directories and a cleaned lookup. Currently, this class
        just works with locally stored data, but this could be extended to work with data stored in S3.
        :param file_directories: A list of directories where files are stored.
        :param cleaned_lookup: A dictionary containing cleaned lookup data.
        """
        self.file_directories = file_directories
        self.results = []
        self.decile_thresholds = None
        self.roof_lookup = pd.DataFrame(cleaned_lookup.get("roof-description"))
        self.photo_supply_lookup = pd.DataFrame()
        self.floor_area_decile_thresholds = pd.DataFrame()
    def create_dataset(self):
        """
        Create a dataset from the provided file directories. This method processes the data files,
        applies transformations, and aggregates data into a useful format.
        """
        if self.roof_lookup.empty:
            raise ValueError("No roof lookup data")
        results = []
        logger.info("Creating solar photo supply dataset")
        for dir in tqdm(self.file_directories):
            filepath = dir / "certificates.csv"
            df = pd.read_csv(filepath, low_memory=False)
            df = df[~pd.isnull(df["UPRN"])]
            df["UPRN"] = df["UPRN"].astype(int).astype(str)
            # Drop rows that have a missing PROPERTY_TYPE, BUILT_FORM, CONSTRUCTION_AGE_BAND, TOTAL_FLOOR_AREA
            for col in ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "TOTAL_FLOOR_AREA"]:
                df = df[~pd.isnull(df[col])]
            # Take newest LODGEMENT_DATE per UPRN
            df = df.sort_values(by="LODGEMENT_DATE", ascending=False).drop_duplicates(subset=["UPRN"])
            data = df[self.DATASET_COLUMNS].copy()
            data["PHOTO_SUPPLY"] = data["PHOTO_SUPPLY"].fillna(0)
            data = data[data["PHOTO_SUPPLY"] != 0]
            results.append(data)
        self.results = pd.concat(results)
        # Convert total floor area to deciles
        self.decile_thresholds = self.results["TOTAL_FLOOR_AREA"].quantile(
            [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
        ).values
        self.results["floor_area_decile"] = pd.cut(
            self.results["TOTAL_FLOOR_AREA"],
            bins=[0] + list(self.decile_thresholds) + [float('inf')],
            labels=False,
            include_lowest=True
        )
        # Convert tenure to lower
        self.results["TENURE"] = self.results["TENURE"].str.lower()
        self.results = self.results.merge(
            self.roof_lookup.drop(
                columns=[
                    "clean_description", "thermal_transmittance", "thermal_transmittance_unit", "insulation_thickness",
                    "is_assumed"
                ]
            ),
            left_on="ROOF_DESCRIPTION",
            right_on="original_description",
            how="left"
        )
        self.photo_supply_lookup = self.results.groupby(
            [
                "PROPERTY_TYPE", "BUILT_FORM", "TENURE", "is_pitched", "is_roof_room", "is_flat",
                "CONSTRUCTION_AGE_BAND", "floor_area_decile"
            ],
            observed=True
        ).agg(
            {
                "PHOTO_SUPPLY": ["median", "mean"],
            }
        ).reset_index()
        self.photo_supply_lookup.columns = ['_'.join(col).strip() for col in self.photo_supply_lookup.columns.values]
        # Remove trailing underscore from columns
        self.photo_supply_lookup.columns = [
            col[:-1] if col.endswith("_") else col for col in self.photo_supply_lookup.columns.values
        ]
        # Convert columns to lowercase
        self.photo_supply_lookup.columns = [col.lower() for col in self.photo_supply_lookup.columns.values]
        self.floor_area_decile_thresholds = pd.DataFrame(
            self.decile_thresholds,
            columns=["floor_area_decile_thresholds"]
        )
    @staticmethod
    def classify_floor_area(new_area, thresholds):
        """
        Classify a given floor area into a decile based on provided thresholds.
        :param new_area: The new floor area to be classified.
        :param thresholds: A list of thresholds used for classification.
        :return: An integer representing the decile index.
        """
        for i, threshold in enumerate(thresholds):
            if new_area <= threshold:
                return i  # Returns the decile index (0 to 9)
        return len(thresholds)
    def save(self):
        """
        Save the processed data to an S3 bucket in the parquet format. This method also handles
        logging and validation to ensure data is present before saving.
        """
        if self.photo_supply_lookup.empty:
            raise ValueError("No data to save")
        logger.info("Storing outputs to S3")
        # Store this data in s3 as a parquet file
        save_dataframe_to_s3_parquet(
            df=self.photo_supply_lookup,
            bucket_name="retrofit-data-dev",
            file_key="solar_pv_supply/photo_supply_lookup.parquet",
        )
        save_dataframe_to_s3_parquet(
            df=self.floor_area_decile_thresholds,
            bucket_name="retrofit-data-dev",
            file_key=f"solar_pv_supply/floor_area_decile_thresholds.parquet",
        )
    @staticmethod
    def load(bucket):
        """
        Load datasets from an S3 bucket.
        :param bucket: The name of the S3 bucket to load data from.
        :return: A tuple containing photo supply lookup and floor area decile thresholds dataframes.
        """
        photo_supply_lookup = read_dataframe_from_s3_parquet(
            bucket_name=bucket, file_key="solar_pv_supply/photo_supply_lookup.parquet",
        )
        floor_area_decile_thresholds = read_dataframe_from_s3_parquet(
            bucket_name=bucket, file_key="solar_pv_supply/floor_area_decile_thresholds.parquet",
        )
        return photo_supply_lookup, floor_area_decile_thresholds
    @classmethod
    def filter_photo_supply_lookup(
        cls,
        photo_supply_lookup: pd.DataFrame,
        floor_area_decile_thresholds: pd.DataFrame,
        tenure: str,
        built_form: str,
        property_type: str,
        construction_age_band: str,
        is_flat: bool,
        is_pitched: bool,
        is_roof_room: bool,
        floor_area: float
    ):
        """
        Filter the photo supply lookup to find the most appropriate photo supply for a given property.
        :param photo_supply_lookup: The photo supply lookup dataframe.
        :param floor_area_decile_thresholds: The floor area decile thresholds dataframe.
        :param tenure: The tenure of the property.
        :param built_form: The built form of the property.
        :param property_type: The property type of the property.
        :param construction_age_band: The construction age band of the property.
        :param is_flat: Whether the property has a flat roof.
        :param is_pitched: Whether the property has a pitched roof.
        :param is_roof_room: Whether the property has a roof room.
        :param floor_area: The floor area of the property.
        :return:
        """
        # Convert the tenure to lower case, as is done in the creation of the dataset
        tenure = tenure.lower()
        # We remap the "not defined"
        tenure = {
            "not defined - use in the case of a new dwelling for which the intended tenure in not known. it is not to "
            "be used for an existing dwelling":
                "not defined - use in the case of a new dwelling for which the intended tenure in not known. it is no"
        }.get(tenure, tenure)
        photo_supply_matched = photo_supply_lookup[
            (photo_supply_lookup["tenure"] == tenure) &
            (photo_supply_lookup["built_form"] == built_form) &
            (photo_supply_lookup["property_type"] == property_type) &
            (photo_supply_lookup["construction_age_band"] == construction_age_band) &
            (photo_supply_lookup["is_flat"] == is_flat) &
            (photo_supply_lookup["is_pitched"] == is_pitched) &
            (photo_supply_lookup["is_roof_room"] == is_roof_room)
            ]
        if photo_supply_matched.empty:
            # There are a small number of cases where we don't get a full match so try again with a more aggregated
            # average
            photo_supply_matched = photo_supply_lookup[
                (photo_supply_lookup["tenure"] == tenure) &
                (photo_supply_lookup["built_form"] == built_form) &
                (photo_supply_lookup["property_type"] == property_type)
                ]
            if construction_age_band in photo_supply_matched["construction_age_band"].values:
                photo_supply_matched = photo_supply_matched[
                    photo_supply_matched["construction_age_band"] == construction_age_band
                    ]
            if photo_supply_matched.empty:
                raise ValueError("No photo supply matches")
        floor_area_decile = cls.classify_floor_area(
            floor_area, floor_area_decile_thresholds["floor_area_decile_thresholds"].values
        )
        if floor_area_decile in photo_supply_matched["floor_area_decile"].values:
            photo_supply_matched = photo_supply_matched[
                photo_supply_matched["floor_area_decile"] == floor_area_decile
                ]
        return photo_supply_matched
--- a/etl/solar/app.py
+++ b/etl/solar/app.py
@ -0,0 +1,31 @@
 from pathlib import Path
 from etl.epc.property_change_app import get_cleaned
 from etl.solar.SolarPhotoSupply import SolarPhotoSupply
 DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates"
 def app():
    """
    This code reads in the EPC data and attempt to produce a reasonable figure for the photo-supply variable, which
    is the following:
    "Percentage of photovoltaic area as a percentage of total roof area. 0% indicates that a Photovoltaic Supply
    is not present in the property."
    When recommending solar, we want to simulate the retrofit by increasing this value from 0, so we need a sensible
    figure to increase this to. This script will pull the data for that, to allow us to try and deduce what
    a sensible figure would be
    :return:
    """
    directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
    cleaned_lookup = get_cleaned()
    solar_data_client = SolarPhotoSupply(
        file_directories=directories,
        cleaned_lookup=cleaned_lookup
    )
    solar_data_client.create_dataset()
    solar_data_client.save()
--- a/etl/solar/tests/test_solar_photo_supply.py
+++ b/etl/solar/tests/test_solar_photo_supply.py
@ -0,0 +1,109 @@
 import unittest
 import pandas as pd
 from etl.solar.SolarPhotoSupply import SolarPhotoSupply
 class TestSolarPhotoSupply(unittest.TestCase):
    def setUp(self):
        # Mock data for photo_supply_lookup and floor_area_decile_thresholds
        self.photo_supply_lookup = pd.DataFrame({
            "tenure": ["leasehold", "freehold"],
            "built_form": ["detached", "semi-detached"],
            "property_type": ["house", "flat"],
            "construction_age_band": ["pre-1900", "1900-1929"],
            "is_flat": [False, True],
            "is_pitched": [True, False],
            "is_roof_room": [False, True],
            "floor_area_decile": [0, 1],
            "photo_supply": [100, 200]
        })
        self.floor_area_decile_thresholds = pd.DataFrame({
            "floor_area_decile_thresholds": [50, 100]
        })
        self.solar_photo_supply = SolarPhotoSupply([], {})
    def test_correct_filtering(self):
        result = self.solar_photo_supply.filter_photo_supply_lookup(
            self.photo_supply_lookup,
            self.floor_area_decile_thresholds,
            "leasehold",
            "detached",
            "house",
            "pre-1900",
            False,
            True,
            False,
            45
        )
        self.assertEqual(len(result), 1)
        self.assertEqual(result.iloc[0]["photo_supply"], 100)
    def test_no_matches(self):
        with self.assertRaises(ValueError):
            self.solar_photo_supply.filter_photo_supply_lookup(
                self.photo_supply_lookup,
                self.floor_area_decile_thresholds,
                "leasehold",
                "unknown",
                "house",
                "pre-1900",
                False,
                True,
                False,
                45
            )
    def test_floor_area_decile_matching(self):
        result = self.solar_photo_supply.filter_photo_supply_lookup(
            self.photo_supply_lookup,
            self.floor_area_decile_thresholds,
            "freehold",
            "semi-detached",
            "flat",
            "1900-1929",
            True,
            False,
            True,
            60
        )
        self.assertEqual(len(result), 1)
        self.assertEqual(result.iloc[0]["photo_supply"], 200)
    def test_invalid_parameters(self):
        with self.assertRaises(AttributeError):
            self.solar_photo_supply.filter_photo_supply_lookup(
                self.photo_supply_lookup,
                self.floor_area_decile_thresholds,
                123,  # Invalid type for tenure
                "detached",
                "house",
                "pre-1900",
                False,
                True,
                False,
                45
            )
    def test_classify_floor_area(self):
        # Setup
        thresholds = [10, 20, 30, 40, 50]
        solar_photo_supply = SolarPhotoSupply([], {})
        # Test Case 1: Valid floor area
        floor_area = 25
        expected_decile = 2
        result = solar_photo_supply.classify_floor_area(floor_area, thresholds)
        self.assertEqual(result, expected_decile, "Decile classification did not match expected result")
        # Test Case 2: Out of range floor area
        floor_area = 60
        expected_decile = len(thresholds)
        result = solar_photo_supply.classify_floor_area(floor_area, thresholds)
        self.assertEqual(result, expected_decile, "Decile classification for out of range value is incorrect")
 if __name__ == '__main__':
    unittest.main()
--- a/etl/testing_data/estimate_epc.py
+++ b/etl/testing_data/estimate_epc.py
@ -0,0 +1,194 @@
 from pathlib import Path
 from random import choices, sample
 import os
 import pandas as pd
 from tqdm import tqdm
 from dotenv import load_dotenv
 from utils.logger import setup_logger
 from backend.SearchEpc import SearchEpc, vartypes
 from BaseUtility import Definitions
 from etl.epc.settings import BUILT_FORM_REMAP
 ENV_FILE = Path(__file__).parent / "backend" / ".env"
 logger = setup_logger()
 DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates"
 DIR_SAMPLE_SIZE = 500
 N_DIRECTORIES = 50
 EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
 load_dotenv(ENV_FILE)
 CATETORICALS_TO_IGNORE = [
    "postcode", "constituency", "local-authority", "built-form", "property-type", "address1", "constituency-label",
    "building-reference-number", "address2", "posttown", "transaction-type", "lmk-key", "address3",
    "local-authority-label", "county",
 ]
 def check_numeric_performance(estimated_value, actual_value):
    # If we don't have anything to compare against, return None
    if pd.isnull(actual_value):
        return None
    if pd.isnull(estimated_value):
        return 1
    if actual_value == 0 and estimated_value == 0:
        return 0
    if actual_value == 0 and estimated_value != 0:
        return 1
    return abs(estimated_value - actual_value) / actual_value
 def app():
    """
    This script is used to test the EPC estimation process.
    """
    numerical_vartypes = {key: value for key, value in vartypes.items() if value in ["float", "Int64"]}
    str_var_types = {key: value for key, value in vartypes.items() if value == "str"}
    # Make sure we have missed any keys
    if len(numerical_vartypes) + len(str_var_types) != len(vartypes):
        raise ValueError("Not all vartypes have been accounted for")
    # Drop some keys that aren't important
    for k in CATETORICALS_TO_IGNORE:
        str_var_types.pop(k, None)
    directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
    directory_sample = choices(directories, k=N_DIRECTORIES)
    results = []
    for directory in tqdm(directory_sample):
        filepath = directory / "certificates.csv"
        df = pd.read_csv(filepath, low_memory=False)
        df["UPRN"] = df["UPRN"].astype("Int64").astype("str")
        df = df[~pd.isnull(df["UPRN"])]
        # uprn_sample = sample(df["UPRN"].unique().tolist(), DIR_SAMPLE_SIZE)
        # Take a fixed sample based on the first DIR_SAMPLE_SIZE uprns
        uprn_sample = sorted(df["UPRN"].unique().tolist())[:DIR_SAMPLE_SIZE]
        df_sample = df[df["UPRN"].isin(uprn_sample)]
        # Take the record with the newest LODGEMENT_DATETIME by uprn
        df_sample = df_sample.sort_values("LODGEMENT_DATETIME", ascending=False).drop_duplicates("UPRN")
        # Convert the columns to lower case and replace underscores with hyphens, the same as the api
        df_sample.columns = df_sample.columns.str.lower().str.replace("_", "-")
        # For each epc, we test the estimation process
        for _, epc in df_sample.iterrows():
            epc = epc.to_dict()
            address1 = epc["address1"]
            postcode = epc["postcode"]
            # Get all EPCs for this urpn and we make sure they get dropped from the estimate_epc function
            epcs_for_uprn = df[df["UPRN"] == epc["uprn"]]
            lmks_to_drop = epcs_for_uprn["LMK_KEY"].tolist()
            searcher = SearchEpc(address1, postcode, auth_token=EPC_AUTH_TOKEN, os_api_key="")
            searcher.uprn = epc["uprn"]
            # Perform the same remapping for built-form as in the Property class for this test, in case we get (e.g.)
            # Enclosed End-Terrace
            built_form = BUILT_FORM_REMAP.get(epc["built-form"], epc["built-form"])
            if ((epc["property-type"] == "Maisonette") & (built_form == "Detached")) or (
                built_form in Definitions.DATA_ANOMALY_MATCHES
            ):
                built_form = ""
            estimated_epc = searcher.estimate_epc(
                property_type=epc["property-type"], built_form=built_form, lmks_to_drop=lmks_to_drop
            )
            # We now compare the difference between the estimated and original
            # TODO: We can convert windows and lighting to numeric versions and estimate how close we are
            numeric_performance = {
                key: check_numeric_performance(estimated_epc[key], epc[key]) for key, value in
                numerical_vartypes.items()
            }
            # Remove Nones
            numeric_performance = {key: value for key, value in numeric_performance.items() if value is not None}
            # Get an average
            numeric_performance = sum(numeric_performance.values()) / len(numeric_performance)
            numeric_success = 1 - numeric_performance
            # categorical performance
            categorical_performance = {
                key: 0 if estimated_epc[key] != epc[key] else 1 for key, value in str_var_types.items()
            }
            # Get an average
            categorical_success = sum(categorical_performance.values()) / len(categorical_performance)
            results.append(
                {
                    "uprn": epc["uprn"],
                    "numeric_success": numeric_success,
                    "categorical_success": categorical_success,
                    "property_type": epc["property-type"],
                    "built_form": epc["built-form"],
                    "tenure": epc["tenure"],
                }
            )
    # Get aggregate performance figures
    results_df = pd.DataFrame(results)
    results_df["tenure"] = results_df["tenure"].replace("Rented (social)", "rental (social)")
    avg_numeric_succes = results_df["numeric_success"].median()
    avg_categorical_sucess = results_df["categorical_success"].median()
    # With 20 nearest homes
    # 0.7718100840549558
    # 0.5116279069767442
    # 100 nearest homes
    # 0.7859617377809409
    # 0.5348837209302325
    # Fixed sample, sqrt weights
    # Group by tenure
    by_tenure = results_df.groupby("tenure").agg(
        {"numeric_success": "median", "categorical_success": "median", "uprn": "count"}
    )
    pd.set_option('display.max_rows', 500)
    pd.set_option('display.max_columns', 500)
    pd.set_option('display.width', 1000)
    # With 20 nearest homes
    #                                                     numeric_success  categorical_success   uprn
    # tenure
    # NO DATA!                                                   0.847840             0.581395    278
    # Not defined - use in the case of a new dwelling...         0.930282             0.651163    617
    # Owner-occupied                                             0.770330             0.511628   2588
    # Rented (private)                                           0.791885             0.558140   1232
    # owner-occupied                                             0.741088             0.488372  10912
    # rental (private)                                           0.749064             0.488372   3252
    # rental (social)                                            0.822109             0.581395   3878
    # unknown                                                    0.895840             0.627907   1820
    # 100 nearest homes
    # tenure
    # NO DATA!                                                   0.899566             0.604651    233
    # Not defined - use in the case of a new dwelling...         0.927518             0.674419    608
    # Owner-occupied                                             0.777026             0.511628   3167
    # Rented (private)                                           0.805646             0.534884   1316
    # owner-occupied                                             0.762180             0.488372  10835
    # rental (private)                                           0.760503             0.511628   3181
    # rental (social)                                            0.830057             0.604651   3705
    # unknown                                                    0.899948             0.627907   1571
    # By property type - we also want to see how many properties we have for each property type
    by_property_type = results_df.groupby("property_type").agg(
        {"numeric_success": "median", "categorical_success": "median", "uprn": "count"}
    )
    # By property_type & built form
    by_property_type_built_form = results_df.groupby(["property_type", "built_form"]).agg(
        {"numeric_success": "median", "categorical_success": "median", "uprn": "count"}
    )
--- a/etl/testing_data/no_epc_input.py
+++ b/etl/testing_data/no_epc_input.py
@ -0,0 +1,42 @@
 """
 This script will create an input csv for the recommendation engine and upload it to S3, which can be used for
 testing
 """
 import pandas as pd
 from utils.s3 import save_csv_to_s3
 USER_ID = 8
 PORTFOLIO_ID = 57
 def app():
    """
    This portfolio is for testing windows recommendations
    :return:
    """
    test_file = pd.DataFrame(
        [
            {"address": "21 Butler House", "postcode": "E2 0PN", "Notes": None},
            {"address": "22 Butler House", "postcode": "E2 0PN", "Notes": None},
            {"address": "23 Butler House", "postcode": "E2 0PN", "Notes": None},
            {"address": "24 Butler House", "postcode": "E2 0PN", "Notes": None},
        ]
    )
    # Store the data in s3
    filename = f"{USER_ID}/{PORTFOLIO_ID}/no_epc.csv"
    save_csv_to_s3(
        dataframe=test_file,
        bucket_name="retrofit-plan-inputs-dev",
        file_name=filename
    )
    body = {
        "portfolio_id": str(PORTFOLIO_ID),
        "housing_type": "Social",
        "goal": "Increase EPC",
        "goal_value": "A",
        "trigger_file_path": filename
    }
    print(body)
--- a/etl/testing_data/windows_portfolio.py
+++ b/etl/testing_data/windows_portfolio.py
@ -0,0 +1,43 @@
 """
 This script will create an input csv for the recommendation engine and upload it to S3, which can be used for
 testing
 """
 import pandas as pd
 from utils.s3 import save_csv_to_s3
 USER_ID = 8
 PORTFOLIO_ID = 56
 def app():
    """
    This portfolio is for testing windows recommendations
    :return:
    """
    test_file = pd.DataFrame(
        [
            {"address": "3 Church Terrace", "postcode": "LE13 0PW", "Notes": None},
            {"address": "3, Main Street, Redmile", "postcode": "NG13 0GA", "Notes": None},
            {"address": "Manor House, Kennel Lane, Reepham", "postcode": "LN3 4DZ", "Notes": None},
            {"address": "13 Main Street", "postcode": "LE14 2JU", "Notes": None},
            {"address": "8 The Crescent, Coston Road, Buckminster", "postcode": "NG33 5SF", "Notes": None},
        ]
    )
    # Store the data in s3
    filename = f"{USER_ID}/{PORTFOLIO_ID}/windows_portfolio_inputs.csv"
    save_csv_to_s3(
        dataframe=test_file,
        bucket_name="retrofit-plan-inputs-dev",
        file_name=filename
    )
    body = {
        "portfolio_id": str(PORTFOLIO_ID),
        "housing_type": "Social",
        "goal": "Increase EPC",
        "goal_value": "A",
        "trigger_file_path": filename
    }
    print(body)
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@ -18,6 +18,25 @@ regional_labour_variations = [
    {"Region": "Northern Ireland", "Adjustment_Factor": 0.76}
 ]
 # This data is based on the MCS database
 MCS_SOLAR_PV_COST_DATA = {
    "last_updated": "2024-01-04",
    "average_cost_per_kwh": 2013.94,
    "average_cost_per_kwh-Outer London": 2618.75,
    "average_cost_per_kwh-Inner London": 2618.75,
    "average_cost_per_kwh-South East England": 2083.33,
    "average_cost_per_kwh-South West England": 2113,
    "average_cost_per_kwh-East of England": 1973.86,
    "average_cost_per_kwh-East Midlands": 1981.86,
    "average_cost_per_kwh-West Midlands": 1926.55,
    "average_cost_per_kwh-North East England": 2028.49,
    "average_cost_per_kwh-North West England": 1620.42,
    "average_cost_per_kwh-Yorkshire and the Humber": 2060.9,
    "average_cost_per_kwh-Wales": 1898.83,
    "average_cost_per_kwh-Scotland": 1967.97,
    "average_cost_per_kwh-Northern Ireland": 2126.09,
 }
 class Costs:
    """
@ -42,7 +61,7 @@ class Costs:
    # We use a higher contingency rate for internal wall insulation because of the potential for issues with moving
    # fittings and trimming doors, as well as scope for damage to the existing wall during preparation.
-    IWI_CONTINGENCY = 0.15
+    IWI_CONTINGENCY = 0.2
    # Where there is more uncertainty, a higher contingency rate is used
    HIGH_RISK_CONTINGENCY = 0.2
@ -58,12 +77,22 @@ class Costs:
    # have a preliminaries of 12-14% so we use 12% as the median for the preliminaries rate.
    # For External wall insulation (EWI), we use 15% as the preliminaries rate if we think the property might
    # need scaffolding, otherwise we use 12%. This is to account for any site preparation that might be required
-    EWI_NO_SCAFFOLDING_PRELIMINARIES = 0.15
+    EWI_NO_SCAFFOLDING_PRELIMINARIES = 0.2
-    EWI_SCAFFOLDING_PRELIMINARIES = 0.20
+    EWI_SCAFFOLDING_PRELIMINARIES = 0.25
    VAT_RATE = 0.2
    PROFIT_MARGIN = 0.2
    # Based on this greenmatch article, on average, a Sash window is around 50% more expensive than a casement window.
    # Therefore, for a conservative cost estimate, and allowance for a more premium window type, we inflate the material
    # cost of the windows to allow for a sash window type
    # https://www.greenmatch.co.uk/windows/double-glazing/cost
    SASH_WINDOW_INFLATION_FACTOR = 1.5
    # Typically, secondary glazing can be installed for 25% of the cost of double glazed windows - to be conservative,
    # we scale the cost by half
    SECONDARY_GLAZING_SCALING_FACTOR = 0.5
    def __init__(self, property_instance):
        """
        Initializes the Costs class with a property instance.
@ -147,12 +176,16 @@ class Costs:
        """
        material_cost_per_m2 = material["material_cost"]
        # We inflate material costs due to recent price increases
        material_cost_per_m2 = material_cost_per_m2 * 1.5
        base_material_cost = material_cost_per_m2 * floor_area
        labour_cost = material["labour_cost"] * floor_area * self.labour_adjustment_factor
        subtotal_before_profit = base_material_cost + labour_cost
-        contingency_cost = subtotal_before_profit * self.CONTINGENCY
+        # We use high risk contingency because of the possibility of access issues and clearing existing insulation
        contingency_cost = subtotal_before_profit * self.HIGH_RISK_CONTINGENCY
        preliminaries_cost = subtotal_before_profit * self.PRELIMINARIES
        profit_cost = subtotal_before_profit * self.PROFIT_MARGIN
@ -719,3 +752,121 @@ class Costs:
            "labour_days": labour_days,
            "labour_cost": labour_costs
        }
    def window_glazing(self, number_of_windows, material, is_secondary_glazing=False):
        """
        We characterise the jobs to be done for window glazing as the following:
        1) Initial Assessment and Measurements: Before removing the existing window, it's essential to assess the
        condition of the window frame and opening. Precise measurements are taken to ensure the new double glazed
        windows fit perfectly.
        2) Remove the Existing Window: This involves carefully dismantling and removing the old single glazed window. It
        requires skill to avoid damaging the surrounding wall and the window frame (if it's to be reused).
        3) Dispose of the Existing Window: The old window, especially if it's a single glazed unit, needs to be
        disposed of responsibly. Glass and other materials should be recycled where possible.
        4) Surface Preparation: The window opening might need some preparation, especially if there's damage or if
        adjustments are needed to accommodate the new window. This can include repairing or replacing parts of the
        window frame, sealing gaps, and ensuring the opening is level and square.
        5) Install the Window Frame (if new frames are used): In many cases, double glazed windows come with their
        frames. These need to be installed securely into the window opening. This process involves aligning, leveling,
        and fixing the frame in place.
        6) Install the Window Sill: If a new window sill is required, it is installed at this stage. It needs to be
        correctly aligned with the frame and securely attached.
        7) Install the Double Glazed Glass Units: The glass units are carefully inserted into the frame. This step
        requires precision to ensure a snug fit without causing stress on the glass, which could lead to cracking or
        breaking.
        8) Sealing and Weatherproofing: After the glass units are in place, it's crucial to seal around the frame and
        between the glass and frame to ensure there are no drafts and that the installation is weather-tight. This
        typically involves applying silicone sealant or other appropriate sealing materials.
        9) Finishing Touches: This includes any cosmetic work, such as trimming, painting, or staining the frame and
        sill to match the rest of the property. It might also involve cleaning up any mess created during the
        installation.
        10) Inspection and Testing: Finally, the new windows should be inspected to ensure they open, close, and lock
        correctly. This is also a good time to check for any gaps or issues with the sealing.
        For this cost estimation process, we factor in initial assement into the preliminaries
        """
        material_cost = material["material_cost"] * number_of_windows
        labour_cost = (
            material["labour_cost"] * number_of_windows * self.labour_adjustment_factor
        )
        multiplier = self.SECONDARY_GLAZING_SCALING_FACTOR if is_secondary_glazing else (
            self.SASH_WINDOW_INFLATION_FACTOR)
        subtotal = (material_cost + labour_cost) * multiplier
        contingency_cost = subtotal * self.CONTINGENCY
        preliminaries_cost = subtotal * self.PRELIMINARIES
        profit_cost = subtotal * self.PROFIT_MARGIN
        subtotal_before_vat = subtotal + contingency_cost + preliminaries_cost + profit_cost
        vat_cost = subtotal_before_vat * self.VAT_RATE
        total_cost = subtotal_before_vat + vat_cost
        labour_hours = material["labour_hours_per_unit"] * number_of_windows
        labour_hours = labour_hours * self.SECONDARY_GLAZING_SCALING_FACTOR if is_secondary_glazing else labour_hours
        # Assume a team of 2
        labour_days = (labour_hours / 8) / 2
        return {
            "total": total_cost,
            "subtotal": subtotal_before_vat,
            "vat": vat_cost,
            "contingency": contingency_cost,
            "preliminaries": preliminaries_cost,
            "material": material_cost,
            "profit": profit_cost,
            "labour_hours": labour_hours,
            "labour_cost": labour_cost,
            "labour_days": labour_days
        }
    def solar_pv(self, wattage: float):
        """
        Calculates the total cost for solar PV based data provided by the MCS dashboard, which contains
        costing data for installations of renewable and clean energy measures.
        The data in the dashboard is filtered on domestic building installations and then the data across the
        various regions is manually collected. There is currently no automated way to get the data from the MCS
        dashboard
        Price can also be benchmarked against this checkatrade article:
        https://www.checkatrade.com/blog/cost-guides/cost-of-solar-panel-installation/
        :param wattage: Peak wattage of the solar PV system
        :return:
        """
        # Get the cost data relevant to the region
        regional_cost = MCS_SOLAR_PV_COST_DATA["-".join(["average_cost_per_kwh", self.region])]
        kw = wattage / 1000
        total_cost = kw * regional_cost
        subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
        vat = total_cost - subtotal_before_vat
        # Labour hours are based on estimates from online research but an average team seems to consist of 3 people
        # and most jobs take around 2 days. Assuming an 8 hour day for 3 people across 2 days, gives us 72 hours of
        # labour
        return {
            "total": total_cost,
            "subtotal": subtotal_before_vat,
            "vat": vat,
            "labour_hours": 72,
            "labour_days": 2,
        }
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@ -6,6 +6,8 @@ from recommendations.RoofRecommendations import RoofRecommendations
 from recommendations.VentilationRecommendations import VentilationRecommendations
 from recommendations.FireplaceRecommendations import FireplaceRecommendations
 from recommendations.LightingRecommendations import LightingRecommendations
 from recommendations.SolarPvRecommendations import SolarPvRecommendations
 from recommendations.WindowsRecommendations import WindowsRecommendations
 from backend.ml_models.AnnualBillSavings import AnnualBillSavings
@ -35,6 +37,8 @@ class Recommendations:
        )
        self.fireplace_recommender = FireplaceRecommendations(property_instance=property_instance)
        self.lighting_recommender = LightingRecommendations(property_instance=property_instance, materials=materials)
        self.windows_recommender = WindowsRecommendations(property_instance=property_instance, materials=materials)
        self.solar_recommender = SolarPvRecommendations(property_instance=property_instance)
    def recommend(self):
@ -77,6 +81,16 @@ class Recommendations:
        if self.lighting_recommender.recommendation:
            property_recommendations.append(self.lighting_recommender.recommendation)
        # Windows recommendations
        self.windows_recommender.recommend()
        if self.windows_recommender.recommendation:
            property_recommendations.append(self.windows_recommender.recommendation)
        # Solar recommendations
        self.solar_recommender.recommend()
        if self.solar_recommender.recommendation:
            property_recommendations.append(self.solar_recommender.recommendation)
        # We insert temporary ids into the recommendations which is important for the optimiser later
        property_recommendations = self.insert_temp_recommendation_id(property_recommendations)
@ -148,6 +162,8 @@ class Recommendations:
                    # For the moment, we cap the number of SAP points that can be achieved by ventilation at 2
                    rec["sap_points"] = min(rec["sap_points"], VentilationRecommendations.SAP_LIMIT)
                # Round to 2 decimal places
                rec["sap_points"] = round(rec["sap_points"], 2)
                rec["co2_equivalent_savings"] = float(property_instance.data["co2-emissions-current"]) - new_carbon
                # Energy consumption current is per meter squared, so we need to multiply by the floor area to get
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@ -0,0 +1,65 @@
 import numpy as np
 from recommendations.Costs import Costs
 class SolarPvRecommendations:
    # Approximate area of the solar panels
    SOLAR_PANEL_AREA = 1.6
    # Wattage per panel
    SOLAR_PANEL_WATTAGE = 360
    def __init__(self, property_instance):
        """
        :param property_instance: Instance of the Property class, for the home associated to property_id
        """
        self.property = property_instance
        self.costs = Costs(self.property)
        self.recommendation = []
    def recommend(self):
        """
        We check if a property is potentially suitable for solar PV based on the following criteria:
        - The property is a house or bungalow
        - The property has a flat or pitched roof
        - The property does not have existing solar pv
        :return:
        """
        is_valid_property_type = self.property.data["property-type"] in ["House", "Bungalow"]
        is_valid_roof_type = (
            self.property.roof["is_flat"] or self.property.roof["is_pitched"] or self.property.roof["is_roof_room"]
        )
        # If there is no existing solar PV, the photo-supply field will be None or a missing value
        has_no_existing_solar_pv = self.property.data["photo-supply"] in [
            None, 0, self.property.DATA_ANOMALY_MATCHES
        ]
        if not is_valid_property_type or not is_valid_roof_type or not has_no_existing_solar_pv:
            return
        # We now have a property which is potentially suitable for solar PV
        number_solar_panels = np.floor(self.property.solar_pv_roof_area / self.SOLAR_PANEL_AREA)
        solar_panel_wattage = number_solar_panels * self.SOLAR_PANEL_WATTAGE
        # Given the wattage, we estimate the cost of the solar PV system. This is based on the MCS database
        # of solar PV installations
        cost_result = self.costs.solar_pv(wattage=solar_panel_wattage)
        kw = int(np.round(solar_panel_wattage / 1000))
        self.recommendation = [
            {
                "parts": [],
                "type": "solar_pv",
                "description": f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) panel system on the roof",
                "starting_u_value": None,
                "new_u_value": None,
                "sap_points": None,
                **cost_result,
                # This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we scale
                # back up here
                "photo_supply": 100 * self.property.solar_pv_percentage
            }
        ]
--- a/recommendations/WindowsRecommendations.py
+++ b/recommendations/WindowsRecommendations.py
@ -0,0 +1,97 @@
 from typing import List
 import numpy as np
 from backend.Property import Property
 from recommendations.Costs import Costs
 class WindowsRecommendations:
    # If the property has existing glazing, we scale down the number of windows that need to be glazed
    COVERAGE_MAP = {
        # If most of the windows have already been glazed, we assume that 2/3 are glazed and 1/2 are remaining to be
        # glazed
        "most": 0.33,
        # If glazing is partial, we assume 50/50 split between glazed and unglazed
        "partial": 0.5
    }
    def __init__(self, property_instance: Property, materials: List):
        self.property = property_instance
        self.costs = Costs(self.property)
        self.recommendation = []
        self.glazing_material = [
            material for material in materials if material["type"] == "windows_glazing"
        ]
        if len(self.glazing_material) != 1:
            raise ValueError("There should only be one window glazing material")
        self.glazing_material = self.glazing_material[0]
    def recommend(self):
        """
        This method will recommend the best possible glazing options for a property.
        In order to do this, we need to estimate the number of windows that the home has. This information will be
        stored in the property object, under property.number_of_windows
        :return:
        """
        # If the property is in a conservation area or is a listed building, it becomes more difficult to install
        # double glazing. Therefore, we don't recommend it. It is still possible but is not practical as it
        # requires planning permission and might require a more expensive window type, such as timber.
        number_of_windows = self.property.number_of_windows
        is_secondary_glazing = self.property.restricted_measures or (
            self.property.windows["glazing_type"] == "secondary"
        )
        if not number_of_windows:
            raise ValueError("Number of windows not specified")
        if self.property.windows["has_glazing"] & (self.property.windows["glazing_coverage"] == "full"):
            return
        # We scale the number of windows based on the proportion of existing glazing
        if self.property.data["multi-glaze-proportion"] != "":
            n_windows_scalar = 1 - (int(self.property.data["multi-glaze-proportion"]) / 100)
        else:
            n_windows_scalar = self.COVERAGE_MAP.get(self.property.windows["glazing_coverage"], 1)
        number_of_windows *= n_windows_scalar
        number_of_windows = np.ceil(number_of_windows)
        # We then price the job based on the number of windows that there are
        cost_result = self.costs.window_glazing(
            number_of_windows=number_of_windows,
            material=self.glazing_material,
            is_secondary_glazing=is_secondary_glazing
        )
        glazing_type = "secondary glazing" if is_secondary_glazing else "double glazing"
        if self.property.windows["glazing_coverage"] in ["partial", "most"]:
            description = f"Install {glazing_type} to the remaining windows"
        else:
            description = f"Install {glazing_type} to all windows"
        if self.property.is_listed:
            description += ". Secondary glazing recommended due to listed building status"
        elif self.property.is_heritage:
            description += ". Secondary glazing recommended due to herigate building status"
        elif self.property.in_conservation_area:
            description += ". Secondary glazing recommended due to conservation area status"
        self.recommendation = [
            {
                "parts": [],
                "type": "windows_glazing",
                "description": description,
                "starting_u_value": None,
                "new_u_value": None,
                "sap_points": None,
                **cost_result,
                "is_secondary_glazing": is_secondary_glazing
            }
        ]
--- a/recommendations/recommendation_utils.py
+++ b/recommendations/recommendation_utils.py
@ -1,4 +1,5 @@
 import math
 from datetime import datetime
 from copy import deepcopy
 from typing import Union
@ -565,7 +566,7 @@ def estimate_external_wall_area(num_floors, floor_height, perimeter, built_form)
        'Detached': 4,
    }
-    exposed_wall_area = total_wall_area * (number_exposed_walls[built_form] / 4)
+    exposed_wall_area = total_wall_area * (number_exposed_walls.get(built_form, 3) / 4)
    return exposed_wall_area
@ -669,3 +670,87 @@ def esimtate_pitched_roof_area(floor_area: float, floor_height: float) -> float:
    area = 2 * (slope * wall_width)
    return area
 def estimate_windows(
    property_type, built_form, construction_age_band, floor_area, number_habitable_rooms, extension_count
 ):
    # Base window count based on habitable rooms
    window_count = number_habitable_rooms
    # Additional windows for non-habitable rooms (e.g., kitchen, bathroom)
    # Assuming most houses will have at least one kitchen and one bathroom
    # Scale non-habitable windows with the number of habitable rooms
    non_habitable_base = 2  # Base for kitchen and bathroom
    extra_non_habitable = max(0, (number_habitable_rooms - 3) // 2)  # Extra for large houses
    window_count += non_habitable_base + extra_non_habitable
    # Adjustments based on built form and property type
    if property_type in ["House", "Bungalow"] and built_form in ["Semi-Detached", "Detached"]:
        built_form_lookup = {
            "Semi-Detached": 3,
            "Detached": 4,
        }
    else:
        # For Flats and Maisonettes, adjustments might be less
        built_form_lookup = {
            "Mid-Terrace": 0,
            "End-Terrace": 1,
            "Semi-Detached": 1,
            "Detached": 2,
        }
    window_count += built_form_lookup.get(built_form, 0)
    # Adjust for floor area (larger floor area might indicate more rooms/windows)
    if floor_area < 85:  # Small to medium properties
        # Standard window count likely sufficient
        pass
    elif 85 <= floor_area <= 120:  # Medium to large properties
        # More rooms or larger rooms likely, potentially more windows
        window_count += 1
    elif floor_area > 120:  # Very large properties
        # Likely to have significantly more or larger rooms
        window_count += 2
    # Adjust for construction age band
    if construction_age_band in ["England and Wales: before 1900", "England and Wales: 1900-1929"]:
        # Older houses with smaller, more numerous windows
        window_count += 1
    # Adjust for extensions (each extension might add windows)
    window_count += extension_count
    # Adjustments for specific property types
    if property_type in ["Flat", "Maisontte"]:
        # Flats might have fewer windows due to shared walls
        # Maisonettes might follow a similar pattern to flats or small houses
        window_count -= 1
    # Ensure window count is not negative
    if window_count < 0:
        raise ValueError("Window count cannot be negative.")
    return window_count
 def calculate_cavity_age(newest_epc, older_epcs, cleaned):
    all_epcs = [newest_epc] + older_epcs
    df = []
    for x in all_epcs:
        # Get the cleaned mapping
        mapped = [y for y in cleaned["walls-description"] if y["original_description"] == x["walls-description"]]
        if not mapped:
            continue
        df.append(
            {
                **mapped[0],
                "inspection-date": x["lodgement-date"],
            }
        )
    df = pd.DataFrame(df)
    df = df[df["is_cavity_wall"] & df["is_filled_cavity"]]
    cavity_age = (datetime.now() - pd.to_datetime(df["inspection-date"].max())).days
    return cavity_age
--- a/recommendations/tests/test_costs.py
+++ b/recommendations/tests/test_costs.py
@ -1,6 +1,7 @@
 from recommendations.Costs import Costs
 from unittest.mock import Mock
 import datetime
 import pytest
 class TestCosts:
@ -58,9 +59,9 @@ class TestCosts:
        )
        assert loft_results == {
-            'total': 430.21445040000003, 'subtotal': 358.512042, 'vat': 71.70240840000001,
+            'total': 639.4133610000001, 'subtotal': 532.8444675000001, 'vat': 106.56889350000002,
-            'contingency': 25.608003000000004, 'preliminaries': 25.608003000000004, 'material': 198.29923000000002,
+            'contingency': 71.045929, 'preliminaries': 35.5229645, 'material': 297.448845, 'profit': 71.045929,
-            'profit': 51.21600600000001, 'labour_hours': 3.685, 'labour_cost': 57.7808, 'labour_days': 0.460625
+            'labour_hours': 3.685, 'labour_cost': 57.7808, 'labour_days': 0.460625
        }
    def test_internal_wall_insulation(self):
@ -176,11 +177,9 @@ class TestCosts:
        )
        assert iwi_results == {
-            'total': 6650.889456921851, 'subtotal': 5542.407880768209, 'vat': 1108.4815761536418,
+            'total': 6880.2304726777775, 'subtotal': 5733.525393898148, 'vat': 1146.7050787796295,
-            'contingency': 573.3525393898148, 'preliminaries': 382.2350262598765,
+            'contingency': 764.470052519753, 'preliminaries': 382.2350262598765, 'material': 1747.488000615996,
-            'material': 1747.488000615996,
+            'profit': 764.470052519753, 'labour_hours': 88.23759388401297, 'labour_days': 2.757424808875405,
            'profit': 764.470052519753, 'labour_hours': 88.23759388401297,
            'labour_days': 2.757424808875405,
            'labour_cost': 1927.1602026551818
        }
@ -414,8 +413,8 @@ class TestCosts:
        )
        assert ewi_results == {
-            'total': 14561.688989159393, 'subtotal': 12134.740824299493, 'vat': 2426.948164859899,
+            'total': 15047.078622131372, 'subtotal': 12539.232185109477, 'vat': 2507.8464370218953,
-            'contingency': 808.9827216199662, 'preliminaries': 1617.9654432399325, 'material': 4020.565147410677,
+            'contingency': 808.9827216199662, 'preliminaries': 2022.4568040499155, 'material': 4020.565147410677,
            'profit': 1617.9654432399325, 'labour_hours': 187.02533486285358, 'labour_days': 5.8445417144641745,
            'labour_cost': 3921.5600094613983
        }
@ -499,3 +498,48 @@ class TestCosts:
                                           'labour_hours': 24.79, 'labour_days': 1.549375, 'labour_cost': 186.9032}
        assert costs.labour_adjustment_factor == 0.88
    # Mock property instance for regional tests
    @pytest.fixture(params=[
        ("Northamptonshire", "East Midlands", 7927.44),
        ("Greater London Authority", "Inner London", 10475.0),
        ("Adur", "South East England", 8333.32),
        ("Bournemouth", "South West England", 8452),
        ("Basildon", "East of England", 7895.44),
        ("Birmingham", "West Midlands", 7706.2),
        ("County Durham", "North East England", 8113.96),
        ("Allerdale", "North West England", 6481.68),
        ("York", "Yorkshire and the Humber", 8243.6),
        ("Cardiff", "Wales", 7595.32),
        ("Glasgow City", "Scotland", 7871.88),
        ("Belfast", "Northern Ireland", 8504.36)
    ])
    def mock_property_with_region(self, request):
        county, region, expected_cost = request.param
        mock_property = Mock()
        mock_property.data = {"county": county}
        return mock_property, region, expected_cost
    # Test for different wattages
    @pytest.mark.parametrize("wattage, expected_cost", [
        (3000, 5945.58),
        (4000, 7927.44),
        (5000, 9909.3),
        (6000, 11891.16),
    ])
    def test_solar_pv_different_wattages(self, wattage, expected_cost):
        mock_property = Mock()
        mock_property.data = {"county": "Mansfield"}
        costs = Costs(mock_property)
        result = costs.solar_pv(wattage)
        assert result['total'] == pytest.approx(expected_cost, rel=0.01)
    def test_solar_pv_regional_variation(self, mock_property_with_region):
        # Test for regional cost variations
        property_instance, expected_region, expected_cost = mock_property_with_region
        costs = Costs(property_instance)
        assert costs.region == expected_region
        result = costs.solar_pv(4000)  # Testing with a fixed wattage of 4000
        assert result['total'] == pytest.approx(expected_cost, rel=0.01)
--- a/recommendations/tests/test_data/materials.py
+++ b/recommendations/tests/test_data/materials.py
@ -942,8 +942,24 @@ materials = [
             'https://www.hamuch.com/cost/led-spot-light#:~:text=It%20costs%20an%20average%20of,'
             'will%20drive%20up%20the%20cost.',
     'created_at': datetime.datetime(2023, 11, 28, 22, 49, 12, 244907), 'is_active': True, 'prime_material_cost': None,
-     'material_cost': 20.0, 'labour_cost': 46.0, 'labour_hours_per_unit': 0.8, 'plant_cost': 0.0, 'total_cost': 66.0,
+     'material_cost': 20.0, 'labour_cost': 15.0, 'labour_hours_per_unit': 0.8, 'plant_cost': 0.0, 'total_cost': 66.0,
     'notes': 'We estimate the unit economics from the checkatrade article. We assume that the average job consists '
              'of installing 6 lights based on the hamuch article. We use the median value of 400 for a job of 6 '
-              'lights'}
+              'lights'},
    {'id': 1235, 'type': 'windows_glazing',
     'description': 'uPVC windows; Profile 22 or other equal and approved; reinforced where appropriate with '
                    'aluminium alloy; in refurbishment work, including standard ironmongery; sills and factory glazed '
                    'with low-e 24 mm double glazing; removing existing windows and fixing new in position; including '
                    'lugs plugged and screwed to brickwork or blockwork; Casement/fixed light; including vents; '
                    'e.p.d.m. glazing gaskets and weather seals; 1770 mm × 1200 mm; ref P312WW',
     'depth': 0.0, 'depth_unit': None, 'cost': None, 'cost_unit': 'gbp_per_unit', 'r_value_per_mm': None,
     'r_value_unit': 'square_meter_kelvin_per_watt', 'thermal_conductivity': None, 'thermal_conductivity_unit': None,
     'link': 'SPONs',
     'created_at': datetime.datetime(2023, 11, 28, 22, 49, 12, 244907),
     'is_active': True, 'prime_material_cost': 176.55,
     'material_cost': 182.25, 'labour_cost': 163.36, 'labour_hours_per_unit': 6.5, 'plant_cost': 0.0,
     'total_cost': 345.61,
     'notes': 'This is the cost of removal of existing windows and installation of new windows. This is a casement '
              'style window, which is the most common but also the cheapest style. In the cost estimation framework, '
              'we can inflate prices for different finishes, to be conservative on price.'}
 ]
--- a/recommendations/tests/test_fireplace_recommendations.py
+++ b/recommendations/tests/test_fireplace_recommendations.py
@ -6,7 +6,7 @@ from recommendations.FireplaceRecommendations import FireplaceRecommendations
 class TestFirepaceRecommendations:
    def test_no_fireplaces(self):
-        property_instance = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
+        property_instance = Property(id=0, address="fake", postcode="fake")
        property_instance.data = {
            "number-open-fireplaces": 0
        }
@ -22,7 +22,7 @@ class TestFirepaceRecommendations:
        assert recommender.recommendation is None
    def test_one_fireplace(self):
-        property_instance = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
+        property_instance = Property(id=0, address="fake", postcode="fake")
        property_instance.data = {
            "number-open-fireplaces": 1
        }
@ -40,7 +40,7 @@ class TestFirepaceRecommendations:
        assert recommender.recommendation[0]["total"] == 300
    def test_multiple_fireplaces(self):
-        property_instance = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
+        property_instance = Property(id=0, address="fake", postcode="fake")
        property_instance.data = {
            "number-open-fireplaces": 3
        }
--- a/recommendations/tests/test_floor_recommendations.py
+++ b/recommendations/tests/test_floor_recommendations.py
@ -21,16 +21,6 @@ class TestFloorRecommendations:
        ) as f:
            return pickle.load(f)
    @pytest.fixture
    def mock_floor_rec_instance(self):
        # Creating a mock instance of WallRecommendations with the necessary attributes
        property_mock = Mock()
        property_mock.full_sap_epc = {"lodgement-date": "2000-01-01"}
        property_mock.data = {"county": "York"}
        mock_wall_rec_instance = FloorRecommendations(property_mock, materials)
        return mock_wall_rec_instance
    def test_init(self, input_properties):
        input_properties[0].insulation_floor_area = 50
        input_properties[0].insulation_wall_area = 90
@ -68,6 +58,7 @@ class TestFloorRecommendations:
        input_properties[2].wall_type = "solid brick"
        input_properties[2].floor_type = "suspended"
        input_properties[2].number_of_floors = 1
        input_properties[2].floor_level = 0
        recommender = FloorRecommendations(property_instance=input_properties[2], materials=materials)
        assert recommender.estimated_u_value is None
@ -93,6 +84,8 @@ class TestFloorRecommendations:
        input_properties[3].insulation_floor_area = 100
        input_properties[3].insulation_wall_area = 100
        input_properties[3].number_of_floors = 1
        input_properties[3].floor_level = 0
        recommender = FloorRecommendations(property_instance=input_properties[3], materials=materials)
        assert recommender.estimated_u_value is None
        recommender.recommend()
@ -114,6 +107,7 @@ class TestFloorRecommendations:
        input_properties[4].wall_type = "solid brick"
        input_properties[4].floor_type = "solid"
        input_properties[4].number_of_floors = 1
        input_properties[4].floor_level = 0
        # In this case, we have no county, so in this case, it should yse the local-authority-label if possible
        input_properties[4].data["county"] = ""
--- a/recommendations/tests/test_lighting_recommendations.py
+++ b/recommendations/tests/test_lighting_recommendations.py
@ -9,7 +9,7 @@ from recommendations.tests.test_data.materials import materials
 class TestLightingRecommendations:
    def test_init_invalid_materials(self):
-        input_property0 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock())
+        input_property0 = Property(id=1, postcode="F4k3 6", address="623 fake street")
        input_property0.lighting = {"low_energy_proportion": 0}
        input_property0.data = {"county": "Greater London Authority"}
        # Test for invalid materials
@ -18,7 +18,7 @@ class TestLightingRecommendations:
    def test_recommend_no_action_needed(self):
        # Case where no recommendation is needed
-        input_property1 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock())
+        input_property1 = Property(id=1, postcode="F4k3 6", address="623 fake street")
        input_property1.lighting = {"low_energy_proportion": 100}
        input_property1.data = {"county": "Greater London Authority"}
@ -28,7 +28,7 @@ class TestLightingRecommendations:
    def test_recommend_action_needed(self):
        # Case where recommendation is needed
-        input_property1 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock())
+        input_property1 = Property(id=1, postcode="F4k3 6", address="623 fake street")
        input_property1.lighting = {"low_energy_proportion": 100}
        input_property1.data = {"county": "Greater London Authority"}
        input_property1.lighting = {"low_energy_proportion": 0.80}
@ -40,8 +40,7 @@ class TestLightingRecommendations:
        assert lr.recommendation == [
            {'parts': [], 'type': 'low_energy_lighting', 'description': 'Install low energy lighting in 4 outlets',
-             'starting_u_value': None, 'new_u_value': None, 'sap_points': 0.4, 'total': 458.976, 'subtotal': 382.48,
+             'starting_u_value': None, 'new_u_value': None, 'sap_points': 0.4, 'total': 240.24,
-             'vat': 76.49600000000001, 'contingency': 27.320000000000007, 'preliminaries': 27.320000000000007,
+             'subtotal': 200.20000000000002, 'vat': 40.040000000000006, 'contingency': 14.3, 'preliminaries': 14.3,
-             'material': 80.0, 'profit': 54.640000000000015, 'labour_hours': 3.2, 'labour_days': 0.4,
+             'material': 80.0, 'profit': 28.6, 'labour_hours': 3.2, 'labour_days': 0.4, 'labour_cost': 63.0}
             'labour_cost': 193.20000000000002}
        ]
--- a/recommendations/tests/test_recommendation_utils.py
+++ b/recommendations/tests/test_recommendation_utils.py
@ -427,3 +427,106 @@ def test_external_wall_area():
    for num_floors, floor_height, perimeter, built_form, expected in test_cases:
        result = recommendation_utils.estimate_external_wall_area(num_floors, floor_height, perimeter, built_form)
        assert result == expected, f"Test failed for {built_form}: Expected {expected}, got {result}"
 def test_estimate_windows():
    # Based on data from an EPR that has 4 windows
    windows_case_1 = recommendation_utils.estimate_windows(
        property_type="Flat",
        built_form="Semi-Detached",
        construction_age_band="England and Wales: 1976-1982",
        floor_area=37,
        number_habitable_rooms=2,
        extension_count=0,
    )
    assert windows_case_1 == 4, f"Expected 4 windows, got {windows_case_1}"
    # Based on data from an EPR that has 7 winows, however two of the windows were very small, having areas of
    # 0.21m^2 and 0.3m^2 respectively. We see 6 as a reasonable estimate for the number of windows
    windows_case_2 = recommendation_utils.estimate_windows(
        property_type="House",
        built_form="Mid-Terrace",
        construction_age_band="England and Wales: 1950-1966",
        floor_area=69,
        number_habitable_rooms=4,
        extension_count=0,
    )
    assert windows_case_2 == 6, f"Expected 6 windows, got {windows_case_2}"
    # Based on data from an EPR on a bungalow, that has 6 windows. Two of the windows are small, both have a 0.4m^2 area
    # and so 5 windows is an acceptable estimate
    windows_case_3 = recommendation_utils.estimate_windows(
        property_type="Bungalow",
        built_form="Mid-Terrace",
        construction_age_band="England and Wales: 1967-1975",
        floor_area=56,
        number_habitable_rooms=3,
        extension_count=0,
    )
    assert windows_case_3 == 5, f"Expected 5 windows, got {windows_case_3}"
    # Based on data from an EPR on a end terrace house that has 8 windows. One of the windows is very small, with an
    # area of 0.25 m^2 and so 7 windows is an acceptable estimate
    windows_case_4 = recommendation_utils.estimate_windows(
        property_type="House",
        built_form="End-Terrace",
        construction_age_band="England and Wales: 1967-1975",
        floor_area=77.28,
        number_habitable_rooms=4,
        extension_count=0,
    )
    assert windows_case_4 == 7, f"Expected 7 windows, got {windows_case_4}"
    # Based on data from an EPR on a Semi-detatched house that has 11 windows based on the associated condition report
    # Right now, we estimate 12 windows for this property
    windows_case_5 = recommendation_utils.estimate_windows(
        property_type="House",
        built_form="Semi-Detached",
        construction_age_band="England and Wales: 1950-1966",
        floor_area=88.4,
        number_habitable_rooms=5,
        extension_count=0,
    )
    assert windows_case_5 == 12, f"Expected 12 windows, got {windows_case_5}"
    # Based on Khalim's flat which has 3 windows. There is no construction age band on the EPC. The windows are large
    # so an estimate of 5 windows is a reasonable estimate
    windows_case_6 = recommendation_utils.estimate_windows(
        property_type="Flat",
        built_form="",
        construction_age_band="",
        floor_area=100,
        number_habitable_rooms=3,
        extension_count=0,
    )
    assert windows_case_6 == 5, f"Expected 5 windows, got {windows_case_6}"
    # Based on an EPR semi detatched house though we don't have the exact number of windows. We estimate 10
    windows_case_7 = recommendation_utils.estimate_windows(
        property_type="House",
        built_form="Semi-Detached",
        construction_age_band="England and Wales: 1967-1975",
        floor_area=85,
        number_habitable_rooms=4,
        extension_count=0,
    )
    assert windows_case_7 == 10, f"Expected 10 windows, got {windows_case_7}"
    # Base on Khalim's parents flat
    windows_case_8 = recommendation_utils.estimate_windows(
        property_type="Flat",
        built_form="End-Terrace",
        construction_age_band="",
        floor_area=50,
        number_habitable_rooms=3,
        extension_count=0,
    )
    assert windows_case_8 == 5, f"Expected 5 windows, got {windows_case_8}"
--- a/recommendations/tests/test_roof_recommendations.py
+++ b/recommendations/tests/test_roof_recommendations.py
@ -1,5 +1,4 @@
 from backend.Property import Property
 from unittest.mock import Mock
 from recommendations.RoofRecommendations import RoofRecommendations
 from recommendations.tests.test_data.materials import materials
@ -7,7 +6,7 @@ from recommendations.tests.test_data.materials import materials
 class TestRoofRecommendations:
    def test_loft_insulation_recommendation_no_insulation(self):
-        property_instance = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
+        property_instance = Property(id=0, address="fake", postcode="fake")
        property_instance.age_band = "F"
        property_instance.insulation_floor_area = 100
        property_instance.roof = {
@ -32,7 +31,7 @@ class TestRoofRecommendations:
        assert len(roof_recommender.recommendations)
    def test_loft_insulation_recommendation_50mm_insulation(self):
-        property_instance2 = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
+        property_instance2 = Property(id=0, address="fake", postcode="fake")
        property_instance2.age_band = "F"
        property_instance2.insulation_floor_area = 100
        property_instance2.roof = {
@ -54,11 +53,11 @@ class TestRoofRecommendations:
        assert len(roof_recommender2.recommendations) == 1
-        assert roof_recommender2.recommendations[0]["total"] == 1310.56464
+        assert roof_recommender2.recommendations[0]["total"] == 1936.9206000000004
        assert roof_recommender2.recommendations[0]["new_u_value"] == 0.14
        assert roof_recommender2.recommendations[0]["starting_u_value"] == 0.68
-        property_instance3 = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
+        property_instance3 = Property(id=0, address="fake", postcode="fake")
        property_instance3.age_band = "F"
        property_instance3.insulation_floor_area = 100
        property_instance3.roof = {
@ -83,7 +82,7 @@ class TestRoofRecommendations:
        assert roof_recommender3.recommendations[0]["parts"][0]["depth"] == 270
    def test_loft_insulation_recommendation_150mm_insulation(self):
-        property_instance4 = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
+        property_instance4 = Property(id=0, address="fake", postcode="fake")
        property_instance4.age_band = "F"
        property_instance4.insulation_floor_area = 100
        property_instance4.roof = {
@ -105,12 +104,12 @@ class TestRoofRecommendations:
        assert len(roof_recommender4.recommendations) == 4
-        assert roof_recommender4.recommendations[0]["total"] == 788.0544
+        assert roof_recommender4.recommendations[0]["total"] == 1128.744
        assert roof_recommender4.recommendations[0]["new_u_value"] == 0.15
        assert roof_recommender4.recommendations[0]["starting_u_value"] == 0.3
        assert roof_recommender4.recommendations[0]["parts"][0]["depth"] == 150
-        property_instance5 = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
+        property_instance5 = Property(id=0, address="fake", postcode="fake")
        property_instance5.age_band = "F"
        property_instance5.insulation_floor_area = 100
        property_instance5.roof = {
@ -137,7 +136,7 @@ class TestRoofRecommendations:
    def test_loft_insulation_recommendation_270mm_insulation(self):
        # We shouldn't recommend anything in this case
-        property_instance6 = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
+        property_instance6 = Property(id=0, address="fake", postcode="fake")
        property_instance6.age_band = "F"
        property_instance6.insulation_floor_area = 100
        property_instance6.roof = {
@ -278,7 +277,7 @@ class TestRoofRecommendations:
    #            "Insulate your room roof with 270mm of Example room roof insulation"
    def test_flat_no_insulation(self):
-        property_instance11 = Property(id=11, address1="fake", postcode="fake", epc_client=Mock())
+        property_instance11 = Property(id=11, address="fake", postcode="fake")
        property_instance11.age_band = "D"
        property_instance11.insulation_floor_area = 33.5
        property_instance11.perimeter = 24
@ -307,7 +306,7 @@ class TestRoofRecommendations:
               "Insulate the home's flat roof with 150mm of Ecotherm Eco-Versal General Purpose Insulation Board"
    def test_flat_insulated(self):
-        property_instance12 = Property(id=12, address1="fake", postcode="fake", epc_client=Mock())
+        property_instance12 = Property(id=12, address="fake", postcode="fake")
        property_instance12.age_band = "D"
        property_instance12.insulation_floor_area = 40
        property_instance12.perimeter = 30
@ -331,7 +330,7 @@ class TestRoofRecommendations:
        assert not roof_recommender12.recommendations
    def test_flat_limited_insulation(self):
-        property_instance13 = Property(id=12, address1="fake", postcode="fake", epc_client=Mock())
+        property_instance13 = Property(id=12, address="fake", postcode="fake")
        property_instance13.age_band = "D"
        property_instance13.insulation_floor_area = 40
        property_instance13.perimeter = 40
@ -363,7 +362,7 @@ class TestRoofRecommendations:
               "Insulate the home's flat roof with 150mm of Ecotherm Eco-Versal General Purpose Insulation Board"
    def test_property_above(self):
-        property_instance14 = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
+        property_instance14 = Property(id=0, address="fake", postcode="fake")
        property_instance14.age_band = "F"
        property_instance14.insulation_floor_area = 100
        property_instance14.roof = {
--- a/recommendations/tests/test_solar_pv_recommendations.py
+++ b/recommendations/tests/test_solar_pv_recommendations.py
@ -0,0 +1,79 @@
 import pytest
 from recommendations.SolarPvRecommendations import SolarPvRecommendations
 from backend.Property import Property
 class TestSolarPvRecommendations:
    @pytest.fixture
    def property_instance_invalid_type(self):
        # Setup the property_instance with an invalid property type
        property_instance_invalid_type = Property(id=1, address="", postcode="")
        property_instance_invalid_type.data = {
            "property-type": "InvalidType", "county": "Broxbourne", "photo-supply": None
        }
        property_instance_invalid_type.roof = {"is_flat": False, "is_pitched": False, "is_roof_room": False}
        return property_instance_invalid_type
    @pytest.fixture
    def property_instance_invalid_roof(self):
        # Setup the property_instance with invalid roof type
        property_instance_invalid_roof = Property(id=1, address="", postcode="")
        property_instance_invalid_roof.data = {
            "county": "Huntingdonshire", "property-type": "House", "photo-supply": None
        }
        property_instance_invalid_roof.roof = {"is_flat": False, "is_pitched": False, "is_roof_room": False}
        return property_instance_invalid_roof
    @pytest.fixture
    def property_instance_has_solar_pv(self):
        # Setup the property_instance without existing solar pv
        property_instance_has_solar_pv = Property(id=1, address="", postcode="")
        property_instance_has_solar_pv.data = {"photo-supply": "40", "county": "Huntingdonshire",
                                               "property-type": "House"}
        property_instance_has_solar_pv.roof = {"is_flat": True}
        return property_instance_has_solar_pv
    @pytest.fixture
    def property_instance_valid_all(self):
        # Setup a valid property_instance that passes all conditions
        property_instance_valid_all = Property(id=1, address="", postcode="")
        property_instance_valid_all.solar_pv_roof_area = 20
        property_instance_valid_all.solar_pv_percentage = 40
        property_instance_valid_all.data = {"property-type": "House", "photo-supply": None, "county": "Huntingdonshire"}
        property_instance_valid_all.roof = {"is_flat": True}
        return property_instance_valid_all
    def test_invalid_property_type(self, property_instance_invalid_type):
        solar_pv = SolarPvRecommendations(property_instance_invalid_type)
        solar_pv.recommend()
        assert not solar_pv.recommendation
    def test_invalid_roof_type(self, property_instance_invalid_roof):
        solar_pv = SolarPvRecommendations(property_instance_invalid_roof)
        solar_pv.recommend()
        assert not solar_pv.recommendation
    def test_existing_solar_pv(self, property_instance_has_solar_pv):
        solar_pv = SolarPvRecommendations(property_instance_has_solar_pv)
        solar_pv.recommend()
        assert not solar_pv.recommendation
    def test_valid_all_conditions(self, property_instance_valid_all):
        solar_pv = SolarPvRecommendations(property_instance_valid_all)
        solar_pv.recommend()
        assert solar_pv.recommendation == [
            {
                'parts': [],
                'type': 'solar_pv',
                'description': 'Install a 4 kilowatt-peak (kWp) solar photovoltaic (PV) panel system on the roof',
                'starting_u_value': None,
                'new_u_value': None,
                'sap_points': None,
                'total': 8527.0752,
                'subtotal': 7105.896,
                'vat': 1421.1791999999996,
                'labour_hours': 72,
                'labour_days': 2,
                'photo_supply': 4000
            }
        ]
--- a/recommendations/tests/test_ventilation_recommendations.py
+++ b/recommendations/tests/test_ventilation_recommendations.py
@ -1,5 +1,4 @@
 from backend.Property import Property
 from unittest.mock import Mock
 from recommendations.VentilationRecommendations import VentilationRecommendations
 from recommendations.tests.test_data.materials import materials
@ -7,7 +6,7 @@ from recommendations.tests.test_data.materials import materials
 class TestVentilationRecommendations:
    def test_natural_ventilation(self):
-        input_property1 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock())
+        input_property1 = Property(id=1, postcode="F4k3 6", address="623 fake street")
        input_property1.data = {"mechanical-ventilation": "natural"}
        recommender = VentilationRecommendations(
@ -28,7 +27,7 @@ class TestVentilationRecommendations:
        assert recommender.recommendation[0]["parts"][0]["quantity"] == 2
    def test_missing_ventilation(self):
-        input_property2 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock())
+        input_property2 = Property(id=1, postcode="F4k3 6", address="623 fake street")
        input_property2.data = {"mechanical-ventilation": None}
        recommender2 = VentilationRecommendations(
@ -49,7 +48,7 @@ class TestVentilationRecommendations:
        assert recommender2.recommendation[0]["parts"][0]["quantity"] == 2
    def test_nodata_ventilation(self):
-        input_property3 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock())
+        input_property3 = Property(id=1, postcode="F4k3 6", address="623 fake street")
        input_property3.data = {"mechanical-ventilation": "NO DATA!!"}
        recommender3 = VentilationRecommendations(
@ -70,7 +69,7 @@ class TestVentilationRecommendations:
        assert recommender3.recommendation[0]["parts"][0]["quantity"] == 2
    def test_existing_ventilation_1(self):
-        input_property4 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock())
+        input_property4 = Property(id=1, postcode="F4k3 6", address="623 fake street")
        input_property4.data = {"mechanical-ventilation": 'mechanical, extract only'}
        recommender4 = VentilationRecommendations(
@ -86,7 +85,7 @@ class TestVentilationRecommendations:
        assert recommender4.has_ventilaion
    def test_existing_ventilation_2(self):
-        input_property5 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock())
+        input_property5 = Property(id=1, postcode="F4k3 6", address="623 fake street")
        input_property5.data = {"mechanical-ventilation": 'mechanical, supply and extract'}
        recommender5 = VentilationRecommendations(
--- a/recommendations/tests/test_wall_recommendations.py
+++ b/recommendations/tests/test_wall_recommendations.py
@ -231,7 +231,7 @@ class TestWallRecommendationsBase:
 class TestCavityWallRecommensations:
    def test_fill_empty_cavity(self):
-        input_property = Property(id=1, postcode="F4k3", address1="123 fake street", epc_client=Mock())
+        input_property = Property(id=1, postcode="F4k3", address="123 fake street")
        input_property.walls = {
            'original_description': 'Cavity wall, as built, no insulation (assumed)',
            'clean_description': 'Cavity wall, as built, no insulation',
@ -265,7 +265,7 @@ class TestCavityWallRecommensations:
        assert np.isclose(recommender.recommendations[1]["total"], 2004.6600000000003)
    def test_fill_partial_filled_cavity(self):
-        input_property = Property(id=1, postcode="F4k3", address1="123 fake street", epc_client=Mock())
+        input_property = Property(id=1, postcode="F4k3", address="123 fake street")
        input_property.walls = {
            'original_description': 'Cavity wall, as built, partial insulation (assumed)',
            'clean_description': 'Cavity wall, as built, partial insulation',
@ -299,7 +299,7 @@ class TestCavityWallRecommensations:
        assert np.isclose(recommender.recommendations[1]["total"], 1999.9350000000002)
    def test_system_built_wall(self):
-        input_property2 = Property(id=1, postcode="F4k3 2", address1="223 fake street", epc_client=Mock())
+        input_property2 = Property(id=1, postcode="F4k3 2", address="223 fake street")
        input_property2.walls = {
            'original_description': 'System built, as built, no insulation (assumed)',
            'clean_description': 'System built, as built, no insulation',
@ -331,22 +331,22 @@ class TestCavityWallRecommensations:
        assert len(recommender2.recommendations) == 9
        assert recommender2.estimated_u_value == 1
        assert np.isclose(recommender2.recommendations[0]["new_u_value"], 0.19)
-        assert np.isclose(recommender2.recommendations[0]["total"], 15899.9616)
+        assert np.isclose(recommender2.recommendations[0]["total"], 16429.960320000002)
        assert recommender2.recommendations[0]["parts"][0]["type"] == "external_wall_insulation"
        assert recommender2.recommendations[0]["parts"][0]["depth"] == 100
        assert np.isclose(recommender2.recommendations[8]["new_u_value"], 0.23)
-        assert np.isclose(recommender2.recommendations[8]["total"], 10916.3424)
+        assert np.isclose(recommender2.recommendations[8]["total"], 11292.768)
        assert recommender2.recommendations[8]["parts"][0]["type"] == "internal_wall_insulation"
        assert recommender2.recommendations[8]["parts"][0]["depth"] == 72.5
        assert np.isclose(recommender2.recommendations[6]["new_u_value"], 0.29)
-        assert np.isclose(recommender2.recommendations[6]["total"], 10621.934399999998)
+        assert np.isclose(recommender2.recommendations[6]["total"], 10988.208)
        assert recommender2.recommendations[6]["parts"][0]["type"] == "internal_wall_insulation"
        assert recommender2.recommendations[6]["parts"][0]["depth"] == 52.5
    def test_timber_frame_wall(self):
-        input_property3 = Property(id=1, postcode="F4k3 2", address1="223 fake street", epc_client=Mock())
+        input_property3 = Property(id=1, postcode="F4k3 2", address="223 fake street")
        input_property3.walls = {
            'original_description': 'Timber frame, as built, no insulation (assumed)',
            'clean_description': 'Timber frame, as built, no insulation',
@ -378,17 +378,17 @@ class TestCavityWallRecommensations:
        assert len(recommender3.recommendations) == 6
        assert recommender3.estimated_u_value == 1.9
        assert np.isclose(recommender3.recommendations[0]["new_u_value"], 0.2)
-        assert np.isclose(recommender3.recommendations[0]["total"], 13117.46832)
+        assert np.isclose(recommender3.recommendations[0]["total"], 13554.717263999999)
        assert recommender3.recommendations[0]["parts"][0]["type"] == "external_wall_insulation"
        assert recommender3.recommendations[0]["parts"][0]["depth"] == 100.0
        assert np.isclose(recommender3.recommendations[1]["new_u_value"], 0.23)
-        assert np.isclose(recommender3.recommendations[1]["total"], 34070.50944)
+        assert np.isclose(recommender3.recommendations[1]["total"], 35206.19308800001)
        assert recommender3.recommendations[1]["parts"][0]["type"] == "external_wall_insulation"
        assert recommender3.recommendations[1]["parts"][0]["depth"] == 150.0
    def test_granite_or_whinstone_wall(self):
-        input_property4 = Property(id=1, postcode="F4k3 2", address1="223 fake street", epc_client=Mock())
+        input_property4 = Property(id=1, postcode="F4k3 2", address="223 fake street")
        input_property4.walls = {
            'original_description': 'Granite or whinstone, as built, no insulation (assumed)',
            'clean_description': 'Granite or whinstone, as built, no insulation',
@ -420,17 +420,17 @@ class TestCavityWallRecommensations:
        assert len(recommender4.recommendations) == 6
        assert recommender4.estimated_u_value == 2.3
        assert np.isclose(recommender4.recommendations[0]["new_u_value"], 0.21)
-        assert np.isclose(recommender4.recommendations[0]["total"], 28562.514352)
+        assert np.isclose(recommender4.recommendations[0]["total"], 29547.42864)
        assert recommender4.recommendations[0]["parts"][0]["type"] == "external_wall_insulation"
        assert recommender4.recommendations[0]["parts"][0]["depth"] == 100
        assert np.isclose(recommender4.recommendations[1]["new_u_value"], 0.23)
-        assert np.isclose(recommender4.recommendations[1]["total"], 74186.52678400002)
+        assert np.isclose(recommender4.recommendations[1]["total"], 76744.68288000001)
        assert recommender4.recommendations[1]["parts"][0]["type"] == "external_wall_insulation"
        assert recommender4.recommendations[1]["parts"][0]["depth"] == 150
    def test_cob_wall(self):
-        input_property5 = Property(id=1, postcode="F4k3 2", address1="223 fake street", epc_client=Mock())
+        input_property5 = Property(id=1, postcode="F4k3 2", address="223 fake street")
        input_property5.walls = {
            'original_description': 'Cob, as built',
            'clean_description': 'Cob, as built',
@ -462,17 +462,17 @@ class TestCavityWallRecommensations:
        assert len(recommender5.recommendations) == 5
        assert recommender5.estimated_u_value == 0.8
        assert np.isclose(recommender5.recommendations[0]["new_u_value"], 0.29)
-        assert np.isclose(recommender5.recommendations[0]["total"], 8665.040384000002)
+        assert np.isclose(recommender5.recommendations[0]["total"], 8963.834880000002)
        assert recommender5.recommendations[0]["parts"][0]["type"] == "external_wall_insulation"
        assert recommender5.recommendations[0]["parts"][0]["depth"] == 50
        assert np.isclose(recommender5.recommendations[3]["new_u_value"], 0.26)
-        assert np.isclose(recommender5.recommendations[3]["total"], 20078.742992)
+        assert np.isclose(recommender5.recommendations[3]["total"], 20771.11344)
        assert recommender5.recommendations[3]["parts"][0]["type"] == "internal_wall_insulation"
        assert recommender5.recommendations[3]["parts"][0]["depth"] == 100
    def test_sandstone_or_limestone_wall(self):
-        input_property6 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock())
+        input_property6 = Property(id=1, postcode="F4k3 6", address="623 fake street")
        input_property6.walls = {
            'original_description': 'Sandstone or limestone, as built, no insulation (assumed)',
            'clean_description': 'Sandstone or limestone, as built, no insulation',
@ -504,16 +504,16 @@ class TestCavityWallRecommensations:
        assert len(recommender6.recommendations) == 9
        assert recommender6.estimated_u_value == 1
        assert np.isclose(recommender6.recommendations[0]["new_u_value"], 0.19)
-        assert np.isclose(recommender6.recommendations[0]["total"], 44829.0584)
+        assert np.isclose(recommender6.recommendations[0]["total"], 46374.888000000006)
        assert recommender6.recommendations[0]["parts"][0]["type"] == "external_wall_insulation"
        assert recommender6.recommendations[0]["parts"][0]["depth"] == 100
        assert np.isclose(recommender6.recommendations[2]["new_u_value"], 0.21)
-        assert np.isclose(recommender6.recommendations[2]["total"], 116436.25280000002)
+        assert np.isclose(recommender6.recommendations[2]["total"], 120451.29600000002)
        assert recommender6.recommendations[2]["parts"][0]["type"] == "external_wall_insulation"
        assert recommender6.recommendations[2]["parts"][0]["depth"] == 150
        assert np.isclose(recommender6.recommendations[4]["new_u_value"], 0.28)
-        assert np.isclose(recommender6.recommendations[4]["total"], 91267.0136)
+        assert np.isclose(recommender6.recommendations[4]["total"], 94414.15199999999)
        assert recommender6.recommendations[4]["parts"][0]["type"] == "internal_wall_insulation"
        assert recommender6.recommendations[4]["parts"][0]["depth"] == 100
--- a/recommendations/tests/test_window_recommendations.py
+++ b/recommendations/tests/test_window_recommendations.py
@ -0,0 +1,252 @@
 from recommendations.WindowsRecommendations import WindowsRecommendations
 from backend.Property import Property
 from recommendations.tests.test_data.materials import materials
 class TestWindowRecommendations:
    def test_fully_single_glazed(self):
        """
        For this property, we expect all windows to be single glazed and should recommend full double glazing
        :return:
        """
        property_1 = Property(
            id=1,
            postcode='1',
            address='1',
            data={
                "county": "Wychavon",
                "multi-glaze-proportion": 0,
                "uprn": 0
            }
        )
        property_1.windows = {
            'original_description': 'Single glazed', 'has_glazing': False, 'glazing_coverage': 'full',
            'glazing_type': 'single',
            'no_data': False
        }
        property_1.number_of_windows = 7
        recommender = WindowsRecommendations(property_instance=property_1, materials=materials)
        assert not recommender.recommendation
        recommender.recommend()
        assert recommender.recommendation == [
            {'parts': [], 'type': 'windows_glazing', 'description': 'Install double glazing to all windows',
             'starting_u_value': None, 'new_u_value': None, 'sap_points': None, 'total': 5721.943248,
             'subtotal': 4768.28604, 'vat': 953.6572080000001, 'contingency': 340.59186, 'preliminaries': 340.59186,
             'material': 1275.75, 'profit': 681.18372, 'labour_hours': 45.5, 'labour_cost': 994.8624,
             'labour_days': 2.84375, 'is_secondary_glazing': False}]
    def test_partial_double_glazed(self):
        """
        For this property, the double glazing is describes as partial, therefore we recommend completion of
        double glazing
        :return:
        """
        property_2 = Property(
            id=1,
            postcode='1',
            address='1',
            data={
                "county": "Wychavon",
                "multi-glaze-proportion": 33,
                "uprn": 0
            }
        )
        property_2.windows = {'original_description': 'Mostly double glazing', 'has_glazing': True,
                              'glazing_coverage': 'most',
                              'glazing_type': 'double', 'no_data': False}
        property_2.number_of_windows = 7
        recommender2 = WindowsRecommendations(property_instance=property_2, materials=materials)
        assert not recommender2.recommendation
        recommender2.recommend()
        assert recommender2.recommendation == [
            {'parts': [], 'type': 'windows_glazing', 'description': 'Install double glazing to the remaining windows',
             'starting_u_value': None, 'new_u_value': None, 'sap_points': None, 'total': 4087.10232,
             'subtotal': 3405.9186, 'vat': 681.18372, 'contingency': 243.2799, 'preliminaries': 243.2799,
             'material': 911.25, 'profit': 486.5598, 'labour_hours': 32.5, 'labour_cost': 710.6160000000001,
             'labour_days': 2.03125, 'is_secondary_glazing': False}]
    def test_fully_double_glazed(self):
        """
        This property has full double glazing so we shouldn't recommend anything
        :return:
        """
        property_3 = Property(
            id=1,
            postcode='1',
            address='1',
            data={
                "county": "Wychavon",
                "multi-glaze-proportion": 80,
                "uprn": 0
            }
        )
        property_3.windows = {'original_description': 'Fully double glazed', 'has_glazing': True,
                              'glazing_coverage': 'full',
                              'glazing_type': 'double', 'no_data': False}
        property_3.number_of_windows = 7
        recommender3 = WindowsRecommendations(property_instance=property_3, materials=materials)
        assert not recommender3.recommendation
        recommender3.recommend()
        assert not recommender3.recommendation
    def test_fully_secondary_glazed(self):
        property_4 = Property(
            id=1,
            postcode='1',
            address='1',
            data={
                "county": "Wychavon",
                "multi-glaze-proportion": 100,
                "uprn": 0
            }
        )
        property_4.windows = {'original_description': 'Full secondary glazing', 'has_glazing': True,
                              'glazing_coverage': 'full',
                              'glazing_type': 'secondary', 'no_data': False}
        property_4.number_of_windows = 7
        recommender4 = WindowsRecommendations(property_instance=property_4, materials=materials)
        assert not recommender4.recommendation
        recommender4.recommend()
        assert not recommender4.recommendation
    def test_partial_secondary_glazing(self):
        property_5 = Property(
            id=1,
            postcode='1',
            address='1',
            data={
                "county": "Wychavon",
                "multi-glaze-proportion": 50,
                "uprn": 0
            }
        )
        property_5.windows = {'original_description': 'Partial secondary glazing', 'has_glazing': True,
                              'glazing_coverage': 'partial',
                              'glazing_type': 'secondary', 'no_data': False}
        property_5.number_of_windows = 7
        recommender5 = WindowsRecommendations(property_instance=property_5, materials=materials)
        assert not recommender5.recommendation
        recommender5.recommend()
        assert recommender5.recommendation == [
            {'parts': [], 'type': 'windows_glazing',
             'description': 'Install secondary glazing to the remaining windows',
             'starting_u_value': None, 'new_u_value': None, 'sap_points': None, 'total': 1089.893952,
             'subtotal': 908.24496, 'vat': 181.64899200000002, 'contingency': 64.87464, 'preliminaries': 64.87464,
             'material': 729.0, 'profit': 129.74928, 'labour_hours': 13.0, 'labour_cost': 568.4928,
             'labour_days': 0.8125, 'is_secondary_glazing': True}]
    def test_single_glazed_restricted_measures(self):
        property_6 = Property(
            id=1,
            postcode='1',
            address='1',
            data={
                "county": "Wychavon",
                "multi-glaze-proportion": 0,
                "uprn": 0
            }
        )
        property_6.windows = {'original_description': 'Single glazed', 'has_glazing': False, 'glazing_coverage': None,
                              'glazing_type': 'single',
                              'no_data': False}
        property_6.number_of_windows = 7
        property_6.restricted_measures = True
        property_6.is_heritage = True
        recommender6 = WindowsRecommendations(property_instance=property_6, materials=materials)
        assert not recommender6.recommendation
        recommender6.recommend()
        assert recommender6.recommendation == [
            {'parts': [], 'type': 'windows_glazing',
             'description': 'Install secondary glazing to all windows. Secondary '
                            'glazing recommended due to herigate building status',
             'starting_u_value': None, 'new_u_value': None, 'sap_points': None,
             'total': 1907.314416, 'subtotal': 1589.42868, 'vat': 317.885736,
             'contingency': 113.53062, 'preliminaries': 113.53062,
             'material': 1275.75, 'profit': 227.06124, 'labour_hours': 22.75,
             'labour_cost': 994.8624, 'labour_days': 1.421875, 'is_secondary_glazing': True}
        ]
    def test_full_triple_glazed(self):
        property_7 = Property(
            id=1,
            postcode='1',
            address='1',
            data={
                "county": "Wychavon",
                "multi-glaze-proportion": 100,
                "uprn": 0
            }
        )
        property_7.windows = {'original_description': 'Fully triple glazed', 'has_glazing': True,
                              'glazing_coverage': 'full',
                              'glazing_type': 'triple', 'no_data': False}
        property_7.number_of_windows = 7
        recommender7 = WindowsRecommendations(property_instance=property_7, materials=materials)
        assert not recommender7.recommendation
        recommender7.recommend()
        assert not recommender7.recommendation
    def test_partial_triple_glazed(self):
        """
        We should just recommend double glazing to the remaining windows, since it's a cheaper option
        """
        property_8 = Property(
            id=1,
            postcode='1',
            address='1',
            data={
                "county": "Wychavon",
                "multi-glaze-proportion": 80,
                "uprn": 1
            }
        )
        property_8.windows = {'original_description': 'Mostly triple glazing', 'has_glazing': True,
                              'glazing_coverage': 'most',
                              'glazing_type': 'triple', 'no_data': False}
        property_8.number_of_windows = 7
        recommender8 = WindowsRecommendations(property_instance=property_8, materials=materials)
        assert not recommender8.recommendation
        recommender8.recommend()
        assert recommender8.recommendation == [
            {'parts': [], 'type': 'windows_glazing', 'description': 'Install double glazing to the remaining windows',
             'starting_u_value': None, 'new_u_value': None, 'sap_points': None, 'total': 1634.840928,
             'subtotal': 1362.36744, 'vat': 272.47348800000003, 'contingency': 97.31196, 'preliminaries': 97.31196,
             'material': 364.5, 'profit': 194.62392, 'labour_hours': 13.0, 'labour_cost': 284.2464,
             'labour_days': 0.8125, 'is_secondary_glazing': False}]