checking additional list

2026-07-27 23:35:01 +00:00 · 2024-11-06 13:41:41 +00:00 · 2024-11-06 13:41:41 +00:00 · 7c4e32abc9
commit 7c4e32abc9
parent ba3130b1c5
4 changed files with 280 additions and 2 deletions
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@ -7,7 +7,7 @@
      <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
      <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
    </content>
-    <orderEntry type="jdk" jdkName="Fastapi-backend" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Stonewater-wave-3" jdkType="Python SDK" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
  <component name="PyNamespacePackagesService">
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@ -3,7 +3,7 @@
  <component name="Black">
    <option name="sdkName" value="Python 3.10 (backend)" />
  </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Fastapi-backend" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Stonewater-wave-3" project-jdk-type="Python SDK" />
  <component name="PyCharmProfessionalAdvertiser">
    <option name="shown" value="true" />
  </component>
--- a/etl/customers/stonewater/potential_eco_properties.py
+++ b/etl/customers/stonewater/potential_eco_properties.py
@ -1,4 +1,278 @@
+import os
+import time
+import json
+import pandas as pd
+import numpy as np
+from tqdm import tqdm
+from dotenv import load_dotenv
+from backend.SearchEpc import SearchEpc
+from utils.s3 import read_from_s3, read_pickle_from_s3
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+
+def get_data(asset_list):
+    epc_data = []
+    errors = []
+    for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
+        try:
+            postcode = home["Postcode"]
+            house_number = home["Number"]
+            full_address = home["Full Address"]
+
+            searcher = SearchEpc(
+                address1=str(house_number),
+                postcode=postcode,
+                auth_token=EPC_AUTH_TOKEN,
+                os_api_key="",
+                property_type=None,
+                fast=True,
+                full_address=full_address,
+                max_retries=5
+            )
+            # Force the skipping of estimating the EPC
+            searcher.ordnance_survey_client.property_type = None
+            searcher.ordnance_survey_client.built_form = None
+
+            searcher.find_property(skip_os=True)
+            if searcher.newest_epc is None:
+                continue
+
+            # Look for EPC recommendatons
+            try:
+                property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"])
+            except:
+                property_recommendations = {"rows": []}
+
+            epc = {
+                "row_id": home["row_id"],
+                **searcher.newest_epc.copy(),
+                "recommendations": property_recommendations["rows"]
+            }
+
+            epc_data.append(epc)
+        except Exception as e:
+            errors.append(home["row_id"])
+            time.sleep(5)
+
+    return epc_data, errors
+
+
 def app():
    """
    This code creates a list of cavity properties, for review
    """
+
+    archetyped_properties = pd.read_excel(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24 - "
+        "Archetyped V3.1.xlsx",
+        header=4
+    )
+
+    cavity_descriptions = [
+        "Cavity: AsBuilt (1983-1995)",
+        "Cavity: AsBuilt (Post 1995)",
+        "Cavity: AsBuilt (Pre 1976)",
+        "Cavity: AsBuilt (1976-1982)",
+    ]
+
+    archetyped_properties["Is Cavity Property"] = archetyped_properties["Wall Type"].isin(cavity_descriptions)
+    # We also identify any properties where properties were found to need cavity wall insulation
+
+    costed_packages = pd.read_excel(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater - Costed Retrofit Packages "
+        "20241030 (WIP) Single Model V2.xlsx",
+        sheet_name="Modelled Packages",
+        header=13
+    )
+
+    needs_cwi = costed_packages[
+        costed_packages["Main Wall Insulation"].isin(
+            [
+                "Poss Extract CWI & Refill (issues identified)",
+                "CWI RdSAP Default"
+            ]
+        )
+    ][["Address ID", "Address", "Current SAP Rating", "Current EPC Band", "Postcode", "Archetype ID",
+       "Main Wall Insulation",
+       "Main Roof Type", "Main Roof Insulation", "Main Roof Insulation Thickness"]]
+
+    # We flag these properties
+    archetyped_properties["Survey shows CWI needed for Archetype"] = archetyped_properties["Archetype ID"].isin(
+        needs_cwi["Archetype ID"]
+    )
+
+    archetyped_properties = archetyped_properties[~pd.isnull(archetyped_properties["Address ID"])]
+    archetyped_properties = archetyped_properties[archetyped_properties["Address ID"] != "Address ID"]
+
+    # this is the big list!!!
+    features = pd.read_csv(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Osmosis Reviewed - Parity Download 18.7 - "
+        "master sheet.csv",
+        encoding='latin1'
+    )
+    features["Address ID"] = features["Address ID"].astype(str)
+
+    features_to_merge = features[
+        [
+            "Address ID", "Age", "Property Type", "Walls", "Roofs", "Glazing", "Heating", "Main Fuel", "Hot Water",
+            "Renewables", "Total Floor Area"
+        ]
+    ]
+
+    stonewater_cavity_properties = archetyped_properties[
+        ["Name", "Postcode", "Osm. ID", "Address ID", "UPRN", "UDPRN", "Archetype ID", "House no", "Street name",
+         "Address line 2", "City/Town", "Is Cavity Property", "Survey shows CWI needed for Archetype"]
+    ].merge(
+        features_to_merge, how="left", on="Address ID"
+    )
+
+    # We filter this down to the properties that are cavity properties
+    stonewater_cavity_properties = stonewater_cavity_properties[
+        stonewater_cavity_properties["Is Cavity Property"] |
+        stonewater_cavity_properties["Survey shows CWI needed for Archetype"]
+        ]
+
+    stonewater_cavity_properties["Reason Included"] = "As Built Cavity Property"
+    stonewater_cavity_properties["Reason Included"] = np.where(
+        stonewater_cavity_properties["Survey shows CWI needed for Archetype"] &
+        ~stonewater_cavity_properties["Is Cavity Property"],
+        "Survey revealed potential need for CWI or extract and re-fill",
+        stonewater_cavity_properties["Reason Included"]
+    )
+    stonewater_cavity_properties["Reason Included"] = np.where(
+        stonewater_cavity_properties["Survey shows CWI needed for Archetype"] &
+        stonewater_cavity_properties["Is Cavity Property"],
+        "Surveyed revealed potential need for CWI or extract and re-fill and is an as built cavity property",
+        stonewater_cavity_properties["Reason Included"]
+    )
+    # We indicate the exact properties that need CWI, based on survey findings
+    stonewater_cavity_properties["Reason Included"] = np.where(
+        stonewater_cavity_properties["Address ID"].isin(
+            needs_cwi[needs_cwi["Main Wall Insulation"] == "CWI RdSAP Default"]["Address ID"].astype(int).astype(
+                str).values
+        ),
+        "Survey showed this property needs CWI",
+        stonewater_cavity_properties["Reason Included"]
+    )
+
+    stonewater_cavity_properties["Reason Included"] = np.where(
+        stonewater_cavity_properties["Address ID"].isin(
+            needs_cwi[needs_cwi["Main Wall Insulation"] == "Poss Extract CWI & Refill (issues identified)"][
+                "Address ID"].astype(int).astype(str).values
+        ),
+        "Survey showed this property could need extract and re-fill",
+        stonewater_cavity_properties["Reason Included"]
+    )
+
+    # We get the EPC data
+    epc_data = json.loads(
+        read_from_s3(
+            bucket_name="retrofit-data-dev",
+            s3_file_name="customers/Stonewater/clustering/epc_data.json"
+        )
+    )
+    epc_data = pd.DataFrame(epc_data)
+
+    epc_data["uprn"] = np.where(
+        epc_data["internal_id"] == 1091,
+        83143766,
+        epc_data["uprn"]
+    )
+
+    epc_data_batch_2 = read_pickle_from_s3(
+        s3_file_name="customers/Stonewater/clustering/epc_data_batch_2.pkl",
+        bucket_name="retrofit-data-dev"
+    )
+    epc_data_batch_2 = pd.DataFrame(epc_data_batch_2)
+
+    complete_epcs = pd.concat([epc_data, epc_data_batch_2])
+
+    epcs_to_merge = complete_epcs[
+        [
+            "uprn",
+            "address",
+            "postcode",
+            "property-type",
+            "built-form",
+            "inspection-date",
+            "current-energy-rating",
+            "current-energy-efficiency",
+            "roof-description",
+            "walls-description",
+            "transaction-type",
+            "secondheat-description",
+            "total-floor-area",
+            "construction-age-band",
+            "floor-height",
+            "number-habitable-rooms",
+            "mainheat-description",
+            "energy-consumption-current"
+        ]
+    ].rename(
+        columns={
+            "address": "Address",
+            "postcode": "Postcode",
+            "inspection-date": "Date of last EPC",
+            "current-energy-efficiency": "SAP score on register",
+            "current-energy-rating": "EPC rating on register",
+            "property-type": "Property Type",
+            "built-form": "Archetype",
+            "total-floor-area": "Property Floor Area",
+            "construction-age-band": "Property Age Band",
+            "floor-height": "Property Floor Height",
+            "number-habitable-rooms": "Number of Habitable Rooms",
+            "walls-description": "Wall Construction",
+            "roof-description": "Roof Construction",
+            "mainheat-description": "Heating Type",
+            "secondheat-description": "Secondary Heating",
+            "transaction-type": "Reason for last EPC",
+            "energy-consumption-current": "Heat Demand (kWh/m2)",
+        }
+    )
+    # We de-dupe, taking the newest on the date the EPC was lod
+    epcs_to_merge["Date of last EPC"] = pd.to_datetime(epcs_to_merge["Date of last EPC"])
+    epcs_to_merge = epcs_to_merge.sort_values("Date of last EPC", ascending=False)
+    epcs_to_merge = epcs_to_merge.drop_duplicates(subset="uprn")
+
+    # Merge the EPCs on, with the data we need
+    stonewater_cavity_properties = stonewater_cavity_properties.rename(
+        columns={
+            "Age": "Parity - Build Age",
+            "Property Type": "Parity - Property Type",
+            "Walls": "Parity - Wall Construction",
+            "Roofs": "Parity - Roof Construction",
+            "Glazing": "Parity - Glazing Type",
+            "Heating": "Parity - Heating Type",
+            "Main Fuel": "Parity - Main Fuel",
+            "Hot Water": "Parity - Hot Water",
+            "Renewables": "Parity - Renewables",
+            "Total Floor Area": "Parity - Total Floor Area"
+        }
+    ).merge(
+        epcs_to_merge,
+        how="left",
+        left_on="UPRN",
+        right_on="uprn"
+    )
+
+    # We now flag the additional properties in the as built list
+
+    additional_properties = features[
+        ~features["Address ID"].isin(archetyped_properties["Address ID"].values)
+    ]
+
+    # Filter on as built cavity properties
+    additional_properties = additional_properties[
+        additional_properties["Walls"].isin(
+            cavity_descriptions +
+            ["Cavity: FilledCavity", "Cavity: External", "Cavity: Internal"]
+        )
+    ]
+
+    # Pull the EPCs for these properties
+    for _, home in tqdm(additional_properties.iterrows()):
+        full_address = home["Address"]
+        postcode = home["Postcode"]
+        address1 = full_address.split(",")[0]
--- a/etl/customers/stonewater/requirements/requirements-wave-3-prep.txt
+++ b/etl/customers/stonewater/requirements/requirements-wave-3-prep.txt
@ -2,3 +2,7 @@ PyPDF2
 pandas
 tqdm
 openpyxl
+boto3
+epc-api-python==1.0.2
+usaddress==0.5.11
+fuzzywuzzy==0.18.0