minor

2026-07-27 23:35:01 +00:00 · 2024-11-15 14:56:00 +00:00 · 2024-11-15 14:56:00 +00:00 · 2eaf19c2bb
commit 2eaf19c2bb
parent 69b3ec7961
5 changed files with 343 additions and 1 deletions
--- a/etl/customers/aiha/bid_numbers.py
+++ b/etl/customers/aiha/bid_numbers.py
@ -0,0 +1,92 @@
+"""
+This is an adhoc script, used to pull together some of the figures that are being included in the
+Warm Homes: Social Housing Wave 3 funding application
+"""
+
+import pandas as pd
+import numpy as np
+
+aiha_all_units = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Downloads/AIHA Measures Packages 2024_11_13.xlsx",
+    sheet_name="All Properties - AIHA",
+    header=2
+)
+modelled_units = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Downloads/AIHA Measures Packages 2024_11_13.xlsx",
+    sheet_name="Modelled Properties - Measures",
+    header=5
+)
+aiha_all_units = aiha_all_units.drop(columns=['Unnamed: 0', 'Unnamed: 1'])
+aiha_extracted_property_data = pd.read_csv(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/extracted_property_data.csv"
+)
+aiha_wave_3_units = aiha_all_units[aiha_all_units["Expected Package Cost"].astype(float) > 0]
+# TODO: The EPC C property isn't a C!
+aiha_epc_breakdown = aiha_wave_3_units["Expected EPC Rating"].replace({"D or E": "E"}).value_counts()
+# For CAHA
+caha_epc_breakdown = modelled_units[
+    modelled_units['Survey Key'].str.contains("CAHA")
+]['Current EPC Rating'].value_counts()
+# For Hornsey
+hornsey_epc_breakdown = modelled_units[
+    modelled_units['Survey Key'].str.contains("HORNSEY")
+]['Current EPC Rating'].value_counts()
+
+aiha_original_asset_data = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/240924- KSQ & Domna Info Merge - AIHA - SHDF Wave 3 "
+    "bid - Supplementary information.xlsx",
+    sheet_name="Archetyping Data",
+    header=2
+)
+
+# Get the units in the bid:
+aiha_wave_3_features = aiha_original_asset_data[
+    ['Address letter or number', 'Street address', 'Postcode', "Wall type",
+     "Property type", "built-form", "floor"]
+].merge(
+    aiha_wave_3_units[['Address letter or number', 'Street address', 'Postcode']],
+    how="inner",
+    on=["Address letter or number", "Street address", "Postcode"]
+)
+
+wall_type_breakdown = aiha_wave_3_features["Wall type"].value_counts()
+property_type_breakdown = aiha_wave_3_features.groupby(["Property type", "floor"]).size().reset_index()
+
+# Hornsey data - contained in original asset list
+hornsey_asset_list = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/SHDF - Template - EOI - Hornsey Housing "
+    "Trust.xlsx",
+    sheet_name="Ksquared-All units information",
+    header=3
+)
+
+# We don't need the first row
+hornsey_asset_list = hornsey_asset_list.iloc[1:]
+# Fill NA values with empty strings
+hornsey_asset_list = hornsey_asset_list.fillna("")
+hornsey_asset_list["Address letter or number"] = hornsey_asset_list["Address letter or number"].astype(
+    str
+).str.strip()
+hornsey_asset_list["Postcode"] = hornsey_asset_list["Postcode"].astype(str).str.strip()
+hornsey_asset_list["Street address"] = hornsey_asset_list["Street address"].astype(str).str.strip()
+# Replace double spaces
+for col in ["Address letter or number", "Street address", "Postcode"]:
+    hornsey_asset_list[col] = hornsey_asset_list[col].str.replace("  ", " ")
+
+hornsey_asset_list = hornsey_asset_list[hornsey_asset_list["Address letter or number"] != ""]
+
+hornsey_asset_list["Wall Type Cleaned"] = np.where(
+    hornsey_asset_list["Wall type"].str.contains("Cavity"),
+    "Cavity",
+    "Solid"
+)
+
+hornsey_asset_list["Property type"].value_counts()
+
+# CAHA
+caha_epc_data = pd.read_excel(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/caha_extracted_property_data.xlsx"
+)
+
+caha_epc_data["property_type"].value_counts()
+caha_epc_data["wall_type"].value_counts()
--- a/etl/customers/aiha/xml_extraction.py
+++ b/etl/customers/aiha/xml_extraction.py
@ -92,9 +92,13 @@ def main():

    # THis is the data we need for the AIHA project
    measures_data = extracted_surveys[
-        ["survey_key", "address", "postcode", "current-energy-efficiency", "current-energy-rating", "number_of_floors"]
+        ["survey_key", "address", "postcode", "current-energy-efficiency", "current-energy-rating",
+         "number_of_floors", "walls-description", "property-type", "built-form"]
    ]
    measures_data = measures_data.sort_values("survey_key", ascending=True)
+    measures_data.to_csv(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/extracted_property_data.csv",
+    )

    # Note:
    # The properties will still have "Very poor" ratings for their hot water
--- a/etl/customers/ksquared/Wave3
+++ b/etl/customers/ksquared/Wave3
@ -6,6 +6,7 @@ from etl.epc.settings import EARLIEST_EPC_DATE
 from dotenv import load_dotenv
 from tqdm import tqdm
 import pandas as pd
+import numpy as np
 from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc
 from backend.SearchEpc import SearchEpc
 from utils.s3 import save_csv_to_s3
@ -46,6 +47,12 @@ def hornsey():

    hornsey_asset_list = hornsey_asset_list[hornsey_asset_list["Address letter or number"] != ""]

+    hornsey_asset_list["Wall Type Cleaned"] = np.where(
+        "Cavity" in hornsey_asset_list["Wall type"],
+        "Cavity",
+        "Solid"
+    )
+
    missed_uprns = {
        "Flat 13A Stowell House": 100021213098,
        "Flat 24 Stowell House": 100021213110,
@ -267,6 +274,9 @@ def caha():
                "address": address,
                "postcode": home["Postcode"],
                "property_type": newest_epc["property-type"],
+                "wall_type": newest_epc["walls-description"],
+                "built_form": newest_epc["built-form"],
+                "flat_storey_count": newest_epc['flat-storey-count'],
            }
        )

--- a/etl/customers/southend/epc_data_pull_2024_11_14.py
+++ b/etl/customers/southend/epc_data_pull_2024_11_14.py
@ -0,0 +1,235 @@
+import os
+import time
+
+import pandas as pd
+from tqdm import tqdm
+
+from dotenv import load_dotenv
+from utils.s3 import read_excel_from_s3
+from backend.SearchEpc import SearchEpc
+from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
+
+from recommendations.recommendation_utils import (
+    estimate_perimeter,
+    estimate_external_wall_area,
+    estimate_number_of_floors
+)
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+
+def get_data(asset_list):
+    epc_data = []
+    errors = []
+    for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
+        try:
+            postcode = home["Postcode"]
+            address1 = home["address1"].split(",")[0]
+            full_address = home["Address"]
+
+            searcher = SearchEpc(
+                address1=str(address1),
+                postcode=postcode,
+                auth_token=EPC_AUTH_TOKEN,
+                os_api_key="",
+                property_type=None,
+                fast=True,
+                full_address=full_address,
+                max_retries=5
+            )
+            # Force the skipping of estimating the EPC
+            searcher.ordnance_survey_client.property_type = None
+            searcher.ordnance_survey_client.built_form = None
+
+            searcher.find_property(skip_os=True)
+            if searcher.newest_epc is None:
+                continue
+
+            # Look for EPC recommendatons
+            try:
+                property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"])
+            except:
+                property_recommendations = {"rows": []}
+
+            epc = {
+                "row_id": home["row_id"],
+                **searcher.newest_epc.copy(),
+                "recommendations": property_recommendations["rows"]
+            }
+
+            epc_data.append(epc)
+        except Exception as e:
+            errors.append(home["row_id"])
+            time.sleep(5)
+
+    return epc_data, errors
+
+
+def app():
+    """
+    This app is EPC pulling data for some properties owned by Livewest
+
+    Data request contents:
+    Date of last EPC
+    Reason for EPC
+    SAP score on register
+    Property Type
+    Property Area
+    Property Age
+    Any Dimensions (HLP,PW,RH)
+    Property Wall Construction
+    Heating Type
+    Secondary Heating
+    Loft Insulation Depth
+
+    Additional if possible:
+    Heat loss calculations
+    EPC recommendations
+    Property UPRN
+
+    """
+    asset_list = pd.read_excel(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/southend/Southend Planned programme.xlsx",
+        header=0,
+        sheet_name="Planned RM"
+    )
+    asset_list["row_id"] = asset_list.index
+    asset_list["address1"] = asset_list["Address"].str.split(",").str[0]
+
+    epc_data, errors = get_data(asset_list)
+
+    # We now retrieve any failed properties
+    asset_list_failed = asset_list[asset_list["row_id"].isin(errors)]
+    epc_data_failed, _ = get_data(asset_list_failed)
+
+    # Append the failed data to the main data
+    epc_data.extend(epc_data_failed)
+
+    epc_df = pd.DataFrame(epc_data)
+
+    # We expand out the recommendations
+    recommendations_df = epc_df[["row_id", "recommendations"]]
+
+    unique_recommendations = set()
+    for _, row in recommendations_df.iterrows():
+        unique_recommendations.update([rec["improvement-summary-text"] for rec in row["recommendations"]])
+
+    columns = ["row_id"] + list(unique_recommendations)
+    transformed_data = []
+    for _, row in recommendations_df.iterrows():
+        # Initialize a dictionary for this row with False for all recommendations
+        row_data = {col: False for col in columns}
+        row_data["row_id"] = row["row_id"]
+
+        # Set True for each recommendation present in this row
+        for rec in row["recommendations"]:
+            recommendation_text = rec["improvement-summary-text"]
+            row_data[recommendation_text] = True
+
+        # Append the row data to transformed_data
+        transformed_data.append(row_data)
+
+    transformed_df = pd.DataFrame(transformed_data)
+    # Drop the column that is ""
+    transformed_df = transformed_df.drop(columns=[""])
+
+    # Retrieve just the data we need
+    epc_df = epc_df[
+        [
+            "row_id",
+            "uprn",
+            "property-type",
+            "built-form",
+            "inspection-date",
+            "current-energy-rating",
+            "current-energy-efficiency",
+            "roof-description",
+            "walls-description",
+            "transaction-type",
+            # New fields needed
+            "secondheat-description",
+            "total-floor-area",
+            "construction-age-band",
+            "floor-height",
+            "number-habitable-rooms",
+            "mainheat-description",
+            #
+            "energy-consumption-current",  # kwh/m2
+            "photo-supply",
+        ]
+    ]
+
+    asset_list = asset_list.merge(
+        epc_df,
+        how="left",
+        on="row_id"
+    ).merge(
+        transformed_df,
+        how="left",
+        on="row_id"
+    )
+
+    asset_list = asset_list.drop(columns=["row_id"])
+
+    # Rename the columns
+    asset_list = asset_list.rename(columns={
+        "inspection-date": "Date of last EPC",
+        "current-energy-efficiency": "SAP score on register",
+        "current-energy-rating": "EPC rating on register",
+        "property-type": "Property Type",
+        "built-form": "Archetype",
+        "total-floor-area": "Property Floor Area",
+        "construction-age-band": "Property Age Band",
+        "floor-height": "Property Floor Height",
+        "number-habitable-rooms": "Number of Habitable Rooms",
+        "walls-description": "Wall Construction",
+        "roof-description": "Roof Construction",
+        "mainheat-description": "Heating Type",
+        "secondheat-description": "Secondary Heating",
+        "transaction-type": "Reason for last EPC",
+        "energy-consumption-current": "Heat Demand (kWh/m2)",
+        "photo-supply": "% of the Roof with PV"
+    })
+
+    asset_list["Estimated Number of Floors"] = asset_list.apply(
+        lambda x: estimate_number_of_floors(property_type=x["Property Type"]) if not pd.isnull(
+            x["Property Type"]) else None, axis=1
+    )
+
+    asset_list["Property Floor Area"] = asset_list["Property Floor Area"].astype(float)
+    # Replace "" value with None
+    asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].replace("", None)
+    asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].astype(float)
+
+    asset_list["Estimated Perimeter (m)"] = asset_list.apply(
+        lambda x: estimate_perimeter(
+            floor_area=x["Property Floor Area"] / x["Estimated Number of Floors"],
+            num_rooms=x["Number of Habitable Rooms"] / x["Estimated Number of Floors"],
+        ), axis=1
+    )
+
+    asset_list["Estimated Heat Loss Perimeter (m2)"] = asset_list.apply(
+        lambda x: estimate_external_wall_area(
+            num_floors=x["Estimated Number of Floors"],
+            floor_height=float(x["Property Floor Height"]) if x["Property Floor Height"] else 2.5,
+            perimeter=x["Estimated Perimeter (m)"],
+            built_form=x["Archetype"]
+        ),
+        axis=1
+    )
+
+    asset_list["Roof Insulation Thickness"] = asset_list.apply(
+        lambda x: RoofAttributes(description=x["Roof Construction"]).process()["insulation_thickness"] if not pd.isnull(
+            x["Roof Construction"]) else None,
+        axis=1
+    )
+
+    # Store as an excel
+    filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/southend/southend EPC Data pull - 14 Nov "
+                "2024.xlsx")
+    asset_list.to_excel(filename, index=False)
+
+    asset_list["% of the Roof with PV"].value_counts()
+
+    asset_list[asset_list["% of the Roof with PV"] == "50.0"][["Address", "Postcode"]]
--- a/etl/customers/stonewater/Wave
+++ b/etl/customers/stonewater/Wave
@ -117,6 +117,7 @@ def extract_summary_report(pdf_path):
    - Fuel Bill
    - Address
    """
+    
    data = {
        "Address": None,
        "Postcode": None,