handled electric community heating

2026-07-27 23:35:01 +00:00 · 2025-11-14 21:10:21 +00:00 · 2025-11-14 21:10:21 +00:00 · 3624b34dd0
commit 3624b34dd0
parent 4d30d6588d
5 changed files with 282 additions and 4 deletions
--- a/asset_list/utils.py
+++ b/asset_list/utils.py
@ -1,6 +1,8 @@
 import time
 import random
 import pandas as pd
+
+from adhoc.investigation import newest_epc
 from backend.SearchEpc import SearchEpc
 from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc
 from tqdm import tqdm
@ -9,6 +11,132 @@ from utils.logger import setup_logger
 logger = setup_logger()


+def get_data_for_property(
+    address1: str,
+    postcode: str,
+    full_address: str,
+    property_type: [str | None],
+    built_form: [str | None],
+    uprn: [str | float | None],
+    epc_auth_token: str,
+    find_my_epc_return_page: bool
+):
+    """
+    Utility function that will fetch the data for a single property
+    :return:
+    """
+
+    if property_type == "block of flats":
+        return None
+
+    house_number = str(address1).strip()
+    full_address = full_address.strip()
+    house_no = SearchEpc.get_house_number(address=str(house_number), postcode=postcode)
+    if house_no is None:
+        house_no = house_number
+
+    if pd.isnull(uprn):
+        uprn = None
+
+    searcher = SearchEpc(
+        address1=str(house_no),
+        postcode=postcode,
+        auth_token=epc_auth_token,
+        os_api_key="",
+        property_type=None,
+        fast=True,
+        full_address=full_address,
+        max_retries=5,
+        uprn=uprn
+    )
+    # Force the skipping of estimating the EPC
+    # We check if the property was split
+
+    searcher.ordnance_survey_client.property_type = property_type
+    searcher.ordnance_survey_client.built_form = built_form
+    searcher.find_property(skip_os=True)
+
+    # Check if we have a flat or appartment
+    if searcher.newest_epc is None and uprn is None:
+        # Try again:
+        if SearchEpc.get_house_number(address=str(house_number), postcode=postcode) is None:
+            # Backup
+            add1 = full_address.split(",")
+            if len(add1) > 1:
+                add1 = add1[1].strip()
+            else:
+                # Try splitting on space
+                add1 = full_address.split(" ")[0].strip()
+        else:
+            add1 = str(house_number)
+        searcher = SearchEpc(
+            address1=add1,
+            postcode=postcode,
+            auth_token=epc_auth_token,
+            os_api_key="",
+            property_type=None,
+            fast=True,
+            full_address=full_address,
+            max_retries=5
+        )
+
+        if (
+            "flat" in house_number.lower() or "apartment" in house_number.lower() or "apt" in
+            house_number.lower()
+        ):
+            searcher.ordnance_survey_client.property_type = "Flat"
+
+        searcher.find_property(skip_os=True)
+
+    # As a final resort, we estimate the EPC
+    if property_type is not None and searcher.newest_epc is None:
+        searcher.ordnance_survey_client.property_type = property_type
+        searcher.ordnance_survey_client.built_form = built_form
+        searcher.find_property(skip_os=True)
+
+    if searcher.newest_epc is None:
+        return None
+
+    # Retrieve data from FindMyEPC
+    try:
+        find_epc_searcher = RetrieveFindMyEpc(
+            address=searcher.newest_epc["address"],
+            postcode=searcher.newest_epc["postcode"]
+        )
+        find_epc_response = find_epc_searcher.retrieve_newest_find_my_epc_data(
+            return_page=find_my_epc_return_page
+        )
+
+    except ValueError as e:
+        if "No EPC found" in str(e) and "address1" in searcher.newest_epc:
+            try:
+                find_epc_searcher = RetrieveFindMyEpc(
+                    address=searcher.newest_epc["address1"], postcode=searcher.newest_epc["postcode"]
+                )
+                find_epc_response = find_epc_searcher.retrieve_newest_find_my_epc_data()
+            except ValueError as e:
+                if "No EPC found" in str(e):
+                    find_epc_response = ({}, None) if find_my_epc_return_page else ({})
+                else:
+                    logger.error(f"Error retrieving FindMyEPC data: {e}")
+                    raise Exception(f"Error retrieving FindMyEPC data: {e}")
+        else:
+            find_epc_response = ({}, None) if find_my_epc_return_page else ({})
+    except Exception as e:
+        raise Exception(f"Error retrieving FindMyEPC data: {e}")
+
+    newest_epc = searcher.newest_epc
+    older_epcs = searcher.older_epcs
+
+    find_my_epc_page = None
+    if find_my_epc_return_page:
+        find_my_epc_data, find_my_epc_page = find_epc_response
+    else:
+        find_my_epc_data = find_epc_response
+
+    return newest_epc, older_epcs, find_my_epc_data, find_my_epc_page
+
+
 def get_data(
    df,
    manual_uprn_map,
--- a/backend/Property.py
+++ b/backend/Property.py
@ -1221,11 +1221,12 @@ class Property:
                None: "Natural Gas (Community Scheme)",
                "mains gas": "Natural Gas (Community Scheme)",
                "biomass": "Smokeless Fuel",
+                "electricity": "Electricity"
            }
            if self.main_fuel["fuel_type"] in fuel_map:  # We assume when None as it's unknown
                self.heating_energy_source = fuel_map[self.main_fuel["fuel_type"]]
            else:
-                raise Exception("Implement me")
+                raise NotImplementedError(f"Unhandled fuel {self.main_fuel['fuel_type']}")

        if self.hotwater["heater_type"] is not None:
            self.hot_water_energy_source = heater_type_to_fuel[self.hotwater["heater_type"]]
@ -1247,7 +1248,7 @@ class Property:
                secondary_heating = self.data["secondheat-description"]
                self.hot_water_energy_source = assumptions.DESCRIPTIONS_TO_FUEL_TYPES[secondary_heating]["fuel"]
            else:
-                raise Exception("Investiage me")
+                raise NotImplementedError(f"Investiage me - unhandled hot water fuel {fuel}")
        else:
            self.hot_water_energy_source = hotwater_appliance_to_fuel[self.hotwater["appliance"]]

--- a/backend/tests/test_integration.py
+++ b/backend/tests/test_integration.py
@ -91,7 +91,7 @@ costs_by_floor_area = costs_by_floor_area.groupby("current-energy-efficiency")[
 ].mean().reset_index()

 sample_epc_data = epc_data[pd.to_datetime(epc_data["LODGEMENT_DATE"]) >= "2015-01-01"].drop_duplicates("UPRN").sample(
-    10000).reset_index(drop=True)
+    20000).reset_index(drop=True)

 # TODO: In Property find_energy_sources, sort out biomass community heating - what fuel type
 # TODO: We might be able to remove find_energy_sources entirely and remove estimate_electrical_consumption. It's used
--- a/etl/customers/peabody/Nov
+++ b/etl/customers/peabody/Nov
@ -0,0 +1,145 @@
+"""
+This scipt prepares the raw data that was sent over by Peabody for production of
+a standardised asset list
+
+They have sent over just short of 100,000 properties and so, to make this easier, we will do the following
+1) Break the data up into subsets of 25,000
+2) Combine the data provided into a single list
+"""
+import json
+import time
+import os
+import pandas as pd
+from tqdm import tqdm
+from dotenv import load_dotenv
+from asset_list.utils import get_data_for_property
+from utils.logger import setup_logger
+
+logger = setup_logger()
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+property_list = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
+    "- Data Extracts for Domna.xlsx",
+    sheet_name="Properties"
+)
+sustainability_data = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
+    "- Data Extracts for Domna.xlsx",
+    sheet_name="Sustainability"
+)
+
+# Basic overview:
+# 1) We have 10,634 postcodes. If we needed to make requests to the ordnance survey API for
+# all of these postcodes, it would cost at least £106, not accounting for double requests for postcodes
+# where we have more than 100 properties (WE DONT!)
+# 2) This is on average 9.36 properties per postcode
+# 3) The UPRN in the property_list matches to the Org Ref in the sustainability data. These
+#    is an additional UPRN column in sustainability data which appears to be the ordnance survey UPRN
+# 4) There appears to be some anomalous records, e.g. a flat with 543 m2 floor area and another flat
+#    with 6m2 floor area
+# 5) Based on the residential indicator, all properties appear to be resi
+# 6) We should do some quick calcs on how much it might cost to fetch all of the solar API data
+# 7) We have 8785 missing UPRNS, which we should potentially try and fill
+# 8) In the backend, we should probably start storing the raw EPC input data to allow for much quicker
+# re-runs. All we really need to do is store the find my EPC data, perhaps against UPRN and RRN, as well
+# as the raw EPC data, against uprn. This will be useful for scenario re-builds and will be much much
+# quicker, as a starting point. Do we store in the database vs s3? TBC
+
+n_postcodes = property_list["Post Code"].nunique()
+postcode_summary = property_list.groupby("Post Code")["UPRN"].count().reset_index()
+postcode_summary["UPRN"].mean()
+
+test_match = property_list.merge(sustainability_data, left_on="UPRN", right_on="Org Ref")
+
+
+def classify_floor_area(x):
+    if x <= 72:
+        return "0-72"
+    if x <= 97:
+        return "73-97"
+    if x <= 199:
+        return "98-199"
+    return "200+"
+
+
+sustainability_data["Postal Region"] = sustainability_data["Postcode"].str.split(" ").str[0]
+sustainability_data["Floor Area Band"] = sustainability_data["Total Floor Area (m2)"].apply(
+    lambda x: classify_floor_area(x)
+)
+
+archetypes = sustainability_data[
+    ["Type", "Attachment", "Construction Years", "Wall Construction", "Wall Insulation",
+     "Roof Construction", "Roof Insulation", "Floor Construction", "Floor Insulation",
+     "Glazing", "Heating", "Boiler Efficiency", "Main Fuel", "Controls Adequacy",
+     "Floor Area Band"]
+].drop_duplicates()
+
+# Maps the property types to the format recognised by the EPC api
+property_type_map = {}
+# Maps the build form to the format recognised by the OS api
+built_form_map = {}
+
+# Proposed data fetching
+# 1) grab propeties with UPRN and fetch the assocated EPC data & find my EPC data
+# Some thoughts:
+# S3 is quite cheap to query however we may incur some cost if we're making hundreds of thousands of calls
+# to S3 to fetch data out of it. It's cheap to fetch data, if we aren't taking data out of S3, but we
+# should consider this. This may influence whether or not we want to store each record individually
+# against UPRN, or store against the 10,641 postcodes. We can fetch the data and store in a single
+# large dump and then determine later if we want to split it up
+
+# TODO: Handle properties without uprn
+# TODO: I think we can json dump all of this, but check if we can load and re-use the page source
+# TODO: Create batches?
+
+batch_size = 500
+batch_indexes = list(range(0, len(sustainability_data), batch_size))
+
+# TODO: SET
+working_directory = ""
+download_contents = os.listdir(working_directory)
+
+for i in range(0, len(sustainability_data.standardised_asset_list), batch_size):
+
+    batch_name = f"batch_{i}_to_{i + batch_size}"
+    # TODO: Check this
+    if batch_name in download_contents:
+        # Means we already have the data downloaded
+        continue
+
+    batch_data = {}
+    for _, property_data in tqdm(sustainability_data.iterrows(), total=len(sustainability_data)):
+        os_uprn = property_data["UPRN"]
+        address1 = property_data["Address 1"]
+        postcode = property_data["Postcode"]
+        full_address_components = [
+            x for x in [property_data["Address 1"], property_data["Address 2"], property_data["Address 3"]]
+            if not pd.isnull(x)
+        ]
+        full_address = ", ".join(full_address_components)
+
+        fetched_data = get_data_for_property(
+            address1=address1,
+            postcode=postcode,
+            full_address=full_address,
+            property_type=property_type_map[property_data["Type"]],
+            built_form=built_form_map[property_data["Attachment"]],
+            uprn=property_data["UPRN"],
+            epc_auth_token=EPC_AUTH_TOKEN,
+            find_my_epc_return_page=True
+        )
+
+        batch_data[property_data["Org Ref"]] = fetched_data
+
+        # TODO: We likely want to do something like this: to slow down
+        # TODO: We also perhaps store the data in batches
+        if len(batch_data) % 50 == 0 and len(batch_data) > 0:
+            logger.info("Sleeping for 10 seconds to avoid hitting API rate limit")
+            time.sleep(10)
+
+    # Store the batch data in the wd
+    with open(os.path.join(working_directory, batch_name), "wb") as f:
+        json.dump(batch_data, f)
--- a/etl/find_my_epc/RetrieveFindMyEpc.py
+++ b/etl/find_my_epc/RetrieveFindMyEpc.py
@ -371,7 +371,7 @@ class RetrieveFindMyEpc:

        return all_find_my_epc_data

-    def retrieve_newest_find_my_epc_data(self, sap_2012_date=None):
+    def retrieve_newest_find_my_epc_data(self, sap_2012_date=None, return_page=False):
        """
        For a post code and address, we pull out all the required data from the find my epc website
        """
@ -577,6 +577,10 @@ class RetrieveFindMyEpc:
            **low_carbon_energy_sources,
        }

+        if return_page:
+            # We return the page text as well, which can be parsed again later
+            return resulting_data, postcode_response.text
+
        return resulting_data

    def format_recommendations(self, recommendations, assessment_data, sap_2012_date=None):