adding in new features

2026-06-08 11:17:27 +00:00 · 2024-11-18 18:24:26 +00:00 · 2024-11-18 18:24:26 +00:00 · 294506853d
commit 294506853d
parent efba61c6ac
3 changed files with 71 additions and 7 deletions
--- a/etl/customers/aiha/bid_numbers.py
+++ b/etl/customers/aiha/bid_numbers.py
@ -52,6 +52,20 @@ aiha_wave_3_features = aiha_original_asset_data[
 wall_type_breakdown = aiha_wave_3_features["Wall type"].value_counts()
 property_type_breakdown = aiha_wave_3_features.groupby(["Property type", "floor"]).size().reset_index()

+aiha_wave_3_features[aiha_wave_3_features["Property type"] == "Flat"][["Street address", "Postcode"]]
+
+# 4   Yetev Lev Court   ...  Semi-Detached     mid  - Medium
+# B    86 Bethune Road  ...    Mid-Terrace     top. - Low
+# A    80 Bethune Road  ...    Mid-Terrace  ground. - Low
+# B    80 Bethune Road  ...             \n      \n  - Low
+# A   9 Clapton Common  ...  Semi-Detached  ground. - Low
+# C   9 Clapton Common  ...    End-Terrace      \n. - Low
+# B      89 Manor Road  ...             \n      \n. - Low
+# A  6 Northfield Road  ...       Detached     top. - Low
+# 13 Northfield Rd  ...  Semi-Detached      \n      - Low
+# A      73 Manor Road  ...    End-Terrace      \n  - Low
+# B      73 Manor Road  ...       Detached     top  - Low
+
 # Hornsey data - contained in original asset list
 hornsey_asset_list = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/SHDF - Template - EOI - Hornsey Housing "
@ -88,5 +102,5 @@ caha_epc_data = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/caha_extracted_property_data.xlsx"
 )

-caha_epc_data["property_type"].value_counts()
-caha_epc_data["wall_type"].value_counts()
+caha_epc_data[caha_epc_data["address"] != "33 Woodhouse Road"]["property_type"].value_counts()
+caha_epc_data[caha_epc_data["address"] != "33 Woodhouse Road"]["wall_type"].value_counts()
--- a/etl/customers/remote_assessments/app.py
+++ b/etl/customers/remote_assessments/app.py
@ -17,6 +17,7 @@ def app():
            "address": "5, Lynton Street",
            "postcode": "DE22 3RW"
        }
+
    ]
    asset_list = pd.DataFrame(asset_list)

--- a/etl/customers/stonewater/Wave
+++ b/etl/customers/stonewater/Wave
@ -6,6 +6,7 @@ import numpy as np
 from tqdm import tqdm
 from collections import Counter
 from scipy.optimize import linprog
+from utils.s3 import read_pickle_from_s3

 CUSTOMER_FOLDER_PATH = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater"
 SURVEY_FOLDERS = os.path.join(CUSTOMER_FOLDER_PATH, "StonewaterSurveys_{i}")
@ -1264,7 +1265,7 @@ def main():
        stonewater_data[c] = stonewater_data[c].astype(str)

    # Save this data to excel
-    stonewater_data.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - costed retrofit packages V2.xlsx", index=False)
+    stonewater_data.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - costed retrofit packages V3.xlsx", index=False)

    cost_sheet = [
        {
@ -1654,17 +1655,66 @@ def propsed_wave_3_sample():
         "Property Type", "Wall Type", "Roof Type", "Heating"]
    ]

+    # Updated packages: to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - costed retrofit packages V3.xlsx", index=False)
    survey_results = pd.read_excel(
        os.path.join(CUSTOMER_FOLDER_PATH, "Stonewater - Bid Packages WIP 14.11.24.xlsx"),
        header=13,
        sheet_name="Modelled Packages"
    )

+    additional_survey_data = pd.read_excel(
+        os.path.join(CUSTOMER_FOLDER_PATH, "Stonewater - costed retrofit packages V3.xlsx"),
+        header=0
+    )
+    survey_results = survey_results.merge(
+        additional_survey_data[
+            [
+                "Address ID",
+                "Main Wall Type", "Main Wall Insulation_x", "Main Wall Thickness",
+                "Main Building Alternative Wall Type", "Main Building Alternative Wall Insulation",
+                "Main Building Alternative Wall Thickness"
+            ]
+        ].rename(columns={"Main Wall Insulation_x": "Main Wall Insulation Type"}),
+        how="left",
+        on="Address ID"
+    )
+
    # TOOD: We probably want the actual surveyed wall, roof, heating type
    survey_results = survey_results[
-        ["Address ID", "Archetype ID", "Current SAP Rating", "Current EPC Band", "Postcode"]
+        [
+            "Address ID", "Archetype ID", "Current SAP Rating", "Current EPC Band", "Postcode",
+            "Main Roof Type", "Main Roof Insulation", "Main Roof Insulation Thickness",
+            "Existing Primary Heating System",
+            "Main Wall Type", "Main Wall Insulation Type", "Main Wall Thickness",
+            "Main Building Alternative Wall Type", "Main Building Alternative Wall Insulation",
+            "Main Building Alternative Wall Thickness"
        ]
-    survey_results["Postal Region"] = survey_results["Postcode"].str.split(" ").str[0]
+    ].rename(
+        columns={
+            "Existing Primary Heating System": "Surveyed Primary Heating System"
+        }
+    )
+
+    # Concatenate from the wall information
+    survey_results["Surveyed: Wall Type"] = survey_results["Main Wall Type"] + ": " + survey_results[
+        "Main Wall Insulation Type"]
+    # Alternative wall
+    survey_results["Survey: Main Alternative Wall"] = (
+        survey_results["Main Building Alternative Wall Type"] + ": " + survey_results[
+        "Main Building Alternative Wall Insulation"]
+    )
+    # Roof information
+    survey_results["Survey: Type"] = survey_results["Main Roof Type"] + ": " + survey_results[
+        "Main Roof Insulation"] + ": " + survey_results["Main Roof Insulation Thickness"].astype(str)
+
+    # Drop the individual columns:
+    survey_results = survey_results.drop(
+        columns=[
+            "Main Roof Type", "Main Roof Insulation", "Main Roof Insulation Thickness",
+            "Main Wall Type", "Main Wall Insulation Type",
+            "Main Building Alternative Wall Type", "Main Building Alternative Wall Insulation"
+        ]
+    )

    survey_results_with_original_features = survey_results.merge(
        asset_list[["UPRN", "Address ID", "Property Type", "Wall Type", "Roof Type", "Heating"]],
@ -1676,7 +1726,6 @@ def propsed_wave_3_sample():
        raise ValueError("Something went wrong")

    # We get longitude & Latitude
-    from utils.s3 import read_pickle_from_s3
    archetyping_spatial_features = read_pickle_from_s3(
        bucket_name="retrofit-data-dev", s3_file_name="scustomers/Stonewater/clustering/spatial_data_to_uprn.pkl",
    )