adjusting search epc function to handle pydantic issues for the moment

2026-06-08 11:17:27 +00:00 · 2024-10-28 19:26:14 +00:00 · 2024-10-28 19:26:14 +00:00 · 6e8d9a025c
commit 6e8d9a025c
parent 54b09e88e1
5 changed files with 178 additions and 9 deletions
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@ -7,7 +7,7 @@
      <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
      <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
    </content>
-    <orderEntry type="jdk" jdkName="Stonewater-wave-3" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Engine" jdkType="Python SDK" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
  <component name="PyNamespacePackagesService">
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@ -3,7 +3,7 @@
  <component name="Black">
    <option name="sdkName" value="Python 3.10 (backend)" />
  </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Stonewater-wave-3" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Engine" project-jdk-type="Python SDK" />
  <component name="PyCharmProfessionalAdvertiser">
    <option name="shown" value="true" />
  </component>
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@ -256,16 +256,12 @@ class SearchEpc:
            else:
                params = {"address": self.address1, "postcode": self.postcode}

+        url = os.path.join(self.client.domestic.host, "search")
+
        for retry in range(self.max_retries):
            try:

-                if "uprn" in params:
-                    # We use the direct call method inside, since we need to implement uprn as a valid
-                    # parameter for the search function
-                    url = os.path.join(self.client.domestic.host, "search")
                response = self.client.domestic.call(method="get", url=url, params=params)
-                else:
-                    response = self.client.domestic.search(params=params, size=size)

                if response:
                    self.data = response
--- a/etl/customers/livewest/route_march_2024_10_28.py
+++ b/etl/customers/livewest/route_march_2024_10_28.py
@ -0,0 +1,171 @@
+import os
+
+import pandas as pd
+from tqdm import tqdm
+
+from dotenv import load_dotenv
+from utils.s3 import read_excel_from_s3
+from backend.SearchEpc import SearchEpc
+from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
+
+from recommendations.recommendation_utils import (
+    estimate_perimeter,
+    estimate_external_wall_area,
+    estimate_number_of_floors
+)
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+
+def app():
+    """
+    This app is EPC pulling data for some properties owned by Livewest
+
+    Data request contents:
+    Date of last EPC
+    Reason for EPC
+    SAP score on register
+    Property Type
+    Property Area
+    Property Age
+    Any Dimensions (HLP,PW,RH)
+    Property Wall Construction
+    Heating Type
+    Secondary Heating
+    Loft Insulation Depth
+
+    Additional if possible:
+    Heat loss calculations
+    EPC recommendations
+    Property UPRN
+
+    """
+    asset_list = pd.read_excel(
+        "/Users/khalimconn-kowlessar/Downloads/LIVEWEST 3578 ECO4 ECO PLUS GBIS.xlsx", header=0
+    )
+
+    epc_data = []
+    for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
+
+        postcode = home["Postcode"]
+        house_number = home["Number"]
+        full_address = home["Full Address"]
+
+        searcher = SearchEpc(
+            address1=str(house_number),
+            postcode=postcode,
+            auth_token=EPC_AUTH_TOKEN,
+            os_api_key="",
+            property_type=None,
+            fast=True,
+            full_address=full_address
+        )
+        # Force the skipping of estimating the EPC
+        searcher.ordnance_survey_client.property_type = None
+        searcher.ordnance_survey_client.built_form = None
+
+        searcher.find_property(skip_os=True)
+        if searcher.newest_epc is None:
+            continue
+
+        # Look for EPC recommendatons
+        try:
+            property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"])
+        except:
+            property_recommendations = {"rows": []}
+
+        epc = {
+            "asset_list_address": full_address,
+            **searcher.newest_epc.copy(),
+            "recommendations": property_recommendations["rows"]
+        }
+
+        epc_data.append(epc)
+
+    epc_df = pd.DataFrame(epc_data)
+
+    # Retrieve just the data we need
+    epc_df = epc_df[
+        [
+            "asset_list_address",
+            "uprn",
+            "property-type",
+            "built-form",
+            "inspection-date",
+            "current-energy-rating",
+            "current-energy-efficiency",
+            "roof-description",
+            "walls-description",
+            "transaction-type",
+            # New fields needed
+            "secondheat-description",
+            "total-floor-area",
+            "construction-age-band",
+            "floor-height",
+            "number-habitable-rooms",
+            "mainheat-description"
+            #
+            "energy-consumption-current",  # kwh/m2
+        ]
+    ]
+
+    asset_list = asset_list.merge(
+        epc_df,
+        how="left",
+        left_on=["ADDRESS"],
+        right_on=["asset_list_address"]
+    )
+
+    asset_list = asset_list.drop(columns=["asset_list_address"])
+
+    # Rename the columns
+    asset_list = asset_list.rename(columns={
+        "inspection-date": "Date of last EPC",
+        "current-energy-efficiency": "SAP score on register",
+        "current-energy-rating": "EPC rating on register",
+        "property-type": "Property Type",
+        "built-form": "Archetype",
+        "total-floor-area": "Property Floor Area",
+        "construction-age-band": "Property Age Band",
+        "floor-height": "Property Floor Height",
+        "number-habitable-rooms": "Number of Habitable Rooms",
+        "walls-description": "Wall Construction",
+        "roof-description": "Roof Construction",
+        "mainheat-description": "Heating Type",
+        "secondheat-description": "Secondary Heating",
+        "transaction-type": "Reason for last EPC"
+    })
+
+    asset_list["Estimated Number of Floors"] = asset_list.apply(
+        lambda x: estimate_number_of_floors(property_type=x["Property Type"]), axis=1
+    )
+
+    asset_list["Property Floor Area"] = asset_list["Property Floor Area"].astype(float)
+    asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].astype(float)
+
+    asset_list["Estimated Perimeter (m)"] = asset_list.apply(
+        lambda x: estimate_perimeter(
+            floor_area=x["Property Floor Area"] / x["Estimated Number of Floors"],
+            num_rooms=x["Number of Habitable Rooms"] / x["Estimated Number of Floors"],
+        ), axis=1
+    )
+
+    asset_list["Estimated Heat Loss Perimeter (m)"] = asset_list.apply(
+        lambda x: estimate_external_wall_area(
+            num_floors=x["Estimated Number of Floors"],
+            floor_height=float(x["Property Floor Height"]) if x["Property Floor Height"] else 2.5,
+            perimeter=x["Estimated Perimeter (m)"],
+            built_form=x["Archetype"]
+        ),
+        axis=1
+    )
+
+    asset_list["Roof Insulation Thickness"] = asset_list.apply(
+        lambda x: RoofAttributes(description=x["Roof Construction"]).process()["insulation_thickness"],
+        axis=1
+    )
+
+    # Store as an excel
+    filename = "LHP EPC Data pull.xlsx"
+    asset_list.to_excel(filename, index=False)
--- a/etl/customers/stonewater/Wave
+++ b/etl/customers/stonewater/Wave
@ -226,5 +226,7 @@ def main():

    extracted_data = pd.DataFrame(extracted_data)

+    missed = [f for f in survey_folders if f not in extracted_data["survey_folder"].tolist()]
+
 # if __name__ == "__main__":
 #     main()