diff --git a/.idea/Model.iml b/.idea/Model.iml
index df6c4faa..762580d9 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
-
+
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 50cad4ca..c916a158 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
-
+
diff --git a/etl/customers/settle/route_march_2024_11_08.py b/etl/customers/settle/route_march_2024_11_08.py
new file mode 100644
index 00000000..21b6f2df
--- /dev/null
+++ b/etl/customers/settle/route_march_2024_11_08.py
@@ -0,0 +1,226 @@
+import os
+import time
+
+import pandas as pd
+from tqdm import tqdm
+
+from dotenv import load_dotenv
+from utils.s3 import read_excel_from_s3
+from backend.SearchEpc import SearchEpc
+from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
+
+from recommendations.recommendation_utils import (
+ estimate_perimeter,
+ estimate_external_wall_area,
+ estimate_number_of_floors
+)
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+
+def get_data(asset_list):
+ epc_data = []
+ errors = []
+ for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
+ try:
+ postcode = home["Postcode"]
+ house_number = home["AddressLine1"]
+ full_address = ", ".join([home["AddressLine1"], home["AddressLine4"], home["AddressLine5"]])
+
+ searcher = SearchEpc(
+ address1=str(house_number),
+ postcode=postcode,
+ auth_token=EPC_AUTH_TOKEN,
+ os_api_key="",
+ property_type=None,
+ fast=True,
+ full_address=full_address,
+ max_retries=5
+ )
+ # Force the skipping of estimating the EPC
+ searcher.ordnance_survey_client.property_type = None
+ searcher.ordnance_survey_client.built_form = None
+
+ searcher.find_property(skip_os=True)
+ if searcher.newest_epc is None:
+ continue
+
+ # Look for EPC recommendatons
+ try:
+ property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"])
+ except:
+ property_recommendations = {"rows": []}
+
+ epc = {
+ "row_id": home["row_id"],
+ **searcher.newest_epc.copy(),
+ "recommendations": property_recommendations["rows"]
+ }
+
+ epc_data.append(epc)
+ except Exception as e:
+ errors.append(home["row_id"])
+ time.sleep(5)
+
+ return epc_data, errors
+
+
+def app():
+ """
+ This app is EPC pulling data for some properties owned by Livewest
+
+ Data request contents:
+ Date of last EPC
+ Reason for EPC
+ SAP score on register
+ Property Type
+ Property Area
+ Property Age
+ Any Dimensions (HLP,PW,RH)
+ Property Wall Construction
+ Heating Type
+ Secondary Heating
+ Loft Insulation Depth
+
+ Additional if possible:
+ Heat loss calculations
+ EPC recommendations
+ Property UPRN
+
+ """
+ asset_list = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Settle/SETTLE FULL PROPOSED PROGRAMME.xlsx",
+ header=0
+ )
+ asset_list["row_id"] = asset_list.index
+
+ epc_data, errors = get_data(asset_list)
+
+ # We now retrieve any failed properties
+ asset_list_failed = asset_list[asset_list["row_id"].isin(errors)]
+ epc_data_failed, _ = get_data(asset_list_failed)
+
+ # Append the failed data to the main data
+ epc_data.extend(epc_data_failed)
+
+ epc_df = pd.DataFrame(epc_data)
+
+ # We expand out the recommendations
+ recommendations_df = epc_df[["row_id", "recommendations"]]
+
+ unique_recommendations = set()
+ for _, row in recommendations_df.iterrows():
+ unique_recommendations.update([rec["improvement-summary-text"] for rec in row["recommendations"]])
+
+ columns = ["row_id"] + list(unique_recommendations)
+ transformed_data = []
+ for _, row in recommendations_df.iterrows():
+ # Initialize a dictionary for this row with False for all recommendations
+ row_data = {col: False for col in columns}
+ row_data["row_id"] = row["row_id"]
+
+ # Set True for each recommendation present in this row
+ for rec in row["recommendations"]:
+ recommendation_text = rec["improvement-summary-text"]
+ row_data[recommendation_text] = True
+
+ # Append the row data to transformed_data
+ transformed_data.append(row_data)
+
+ transformed_df = pd.DataFrame(transformed_data)
+ # Drop the column that is ""
+ transformed_df = transformed_df.drop(columns=[""])
+
+ # Retrieve just the data we need
+ epc_df = epc_df[
+ [
+ "row_id",
+ "uprn",
+ "property-type",
+ "built-form",
+ "inspection-date",
+ "current-energy-rating",
+ "current-energy-efficiency",
+ "roof-description",
+ "walls-description",
+ "transaction-type",
+ # New fields needed
+ "secondheat-description",
+ "total-floor-area",
+ "construction-age-band",
+ "floor-height",
+ "number-habitable-rooms",
+ "mainheat-description",
+ #
+ "energy-consumption-current", # kwh/m2
+ ]
+ ]
+
+ asset_list = asset_list.merge(
+ epc_df,
+ how="left",
+ on="row_id"
+ ).merge(
+ transformed_df,
+ how="left",
+ on="row_id"
+ )
+
+ asset_list = asset_list.drop(columns=["row_id"])
+
+ # Rename the columns
+ asset_list = asset_list.rename(columns={
+ "inspection-date": "Date of last EPC",
+ "current-energy-efficiency": "SAP score on register",
+ "current-energy-rating": "EPC rating on register",
+ "property-type": "Property Type",
+ "built-form": "Archetype",
+ "total-floor-area": "Property Floor Area",
+ "construction-age-band": "Property Age Band",
+ "floor-height": "Property Floor Height",
+ "number-habitable-rooms": "Number of Habitable Rooms",
+ "walls-description": "Wall Construction",
+ "roof-description": "Roof Construction",
+ "mainheat-description": "Heating Type",
+ "secondheat-description": "Secondary Heating",
+ "transaction-type": "Reason for last EPC",
+ "energy-consumption-current": "Heat Demand (kWh/m2)"
+ })
+
+ asset_list["Estimated Number of Floors"] = asset_list.apply(
+ lambda x: estimate_number_of_floors(property_type=x["Property Type"]) if not pd.isnull(
+ x["Property Type"]) else None, axis=1
+ )
+
+ asset_list["Property Floor Area"] = asset_list["Property Floor Area"].astype(float)
+ # Replace "" value with None
+ asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].replace("", None)
+ asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].astype(float)
+
+ asset_list["Estimated Perimeter (m)"] = asset_list.apply(
+ lambda x: estimate_perimeter(
+ floor_area=x["Property Floor Area"] / x["Estimated Number of Floors"],
+ num_rooms=x["Number of Habitable Rooms"] / x["Estimated Number of Floors"],
+ ), axis=1
+ )
+
+ asset_list["Estimated Heat Loss Perimeter (m2)"] = asset_list.apply(
+ lambda x: estimate_external_wall_area(
+ num_floors=x["Estimated Number of Floors"],
+ floor_height=float(x["Property Floor Height"]) if x["Property Floor Height"] else 2.5,
+ perimeter=x["Estimated Perimeter (m)"],
+ built_form=x["Archetype"]
+ ),
+ axis=1
+ )
+
+ asset_list["Roof Insulation Thickness"] = asset_list.apply(
+ lambda x: RoofAttributes(description=x["Roof Construction"]).process()["insulation_thickness"] if not pd.isnull(
+ x["Roof Construction"]) else None,
+ axis=1
+ )
+
+ # Store as an excel
+ filename = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Settle/Settle EPC Data pull - 08 Nov 2024.xlsx"
+ asset_list.to_excel(filename, index=False)
diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py
index 9f929db1..0036a0a4 100644
--- a/etl/customers/stonewater/Wave 3 Preparation.py
+++ b/etl/customers/stonewater/Wave 3 Preparation.py
@@ -8,7 +8,7 @@ from collections import Counter
CUSTOMER_FOLDER_PATH = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater"
SURVEY_FOLDERS = os.path.join(CUSTOMER_FOLDER_PATH, "StonewaterSurveys_{i}")
-NUM_FOLDERS = 14
+NUM_FOLDERS = 15
def sap_to_epc(sap_points: int | float):
@@ -871,7 +871,10 @@ def main():
# We now merge on the coordinator data so that against each property, we can map the measures
retrofit_packages_board = pd.read_excel(
- os.path.join(CUSTOMER_FOLDER_PATH, "Stonewater 3.0 Updated SAP Pre & Modelled 29.10.24.xlsx"),
+ os.path.join(
+ CUSTOMER_FOLDER_PATH,
+ "Stonewater_SHDF_3_0_Board_work_in_progress_-_Operations_1731315080 11.11.24.xlsx"
+ ),
header=4
)
retrofit_packages_board = retrofit_packages_board[~pd.isnull(retrofit_packages_board["Name"])]
@@ -902,13 +905,24 @@ def main():
# '102 Cheaton Close': '',
# 'Flat 16 Spring Gardens': '',
# '4 Apple Close': '',
- '25 Folly Lane': '',
-
+ # '25 Folly Lane': '',
+ '2 Calshot Walk': 'StonewaterSurveys_3/156-3-2 Calshot Walk-MK41 8QS',
+ '21 Constitution Hill': 'StonewaterSurveys_1/112-11-21 Constitution Hill-BH14 0PX',
+ '22 Constitution Hill': 'StonewaterSurveys_4/185-8-22 Constitution Hill-BH14 0PX',
+ '2 Marches Cottages, School Lane, Leominster': 'StonewaterSurveys_5/224-1-2 School Lane-HR6 8AA',
+ '26, Copthorn House, Brighton Road': 'StonewaterSurveys_15/133-1-26 Brighton Road-KT20 6BQ',
+ '4, Old St Marys, Ripley Lane': "StonewaterSurveys_15/433-3-4 Ripley Lane-KT24 6JG",
+ '1 Nelson House, Short Street': 'StonewaterSurveys_15/89-2-1 Short Street-GU11 1HX',
+ "18 Nelson House, Short Street": 'StonewaterSurveys_15/25-3- 18 Short Street- GU11 1HX',
+ '3 Nelson House, Short Street': 'StonewaterSurveys_2/138-1-3 Short Street-GU11 1HX',
+ '16, Copthorn House, Brighton Road': 'StonewaterSurveys_13/78-3-16 Brighton Road-KT20 6BQ',
+ '20 Nelson House, Short Street': 'StonewaterSurveys_15/89-1-20 Short Street-GU11 1HX'
}
# We now match this retrofit packages board to the extracted data
matching_lookup = []
for _, home in tqdm(retrofit_packages_board.iterrows(), total=len(retrofit_packages_board)):
+
# Handle the case that has the wrong postcode in the asset data
if home["Name"] in manual_filters:
filtered = extracted_data[extracted_data["survey_folder"] == manual_filters[home["Name"]]].copy()
@@ -972,6 +986,10 @@ def main():
missing_ids = list(missing_ids)
if missing_ids:
# We check that the missing ids have no data yet
+ missed = retrofit_packages_board[retrofit_packages_board["Address ID"].isin(missing_ids)]
+ missed[["Name", "Postcode", "Archetype ID", "Arch. Group Rank"]].to_csv(
+ CUSTOMER_FOLDER_PATH + "/missed_debugging.csv")
+
if len(missing_ids) != 8:
raise Exception("Unacceptable number of missings")
@@ -1316,5 +1334,37 @@ def create_proposed_wave_3_bid(costed_packages_filepath, archetypes_sheet_filepa
# Save excel
proposed_sample.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - Proposed Wave 3 Bid (WIP).xlsx", index=False)
+
+def find_remaining_surveys():
+ """
+ This compares a list of properties that have been surveyed against a list of properties that I have produced
+ costed retrofit packages for, so I know what needs to be downloaded from Sharepoint
+ :return:
+ """
+
+ surveyed = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater"
+ "/Stonewater_SHDF_3_0_Board_work_in_progress_- 07.11.24.xlsx",
+ header=4
+ )
+
+ costed = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater - Costed Retrofit Packages "
+ "20241030 (WIP) MR Review v1.xlsx",
+ header=13,
+ sheet_name="Modelled Packages"
+ )
+ costed = costed[~pd.isnull(costed["Address ID"])]
+
+ needed = surveyed[~surveyed["Address ID"].isin(costed["Address ID"])]
+
+ needed["id"] = needed["Archetype ID"].astype(str) + "-" + needed["Arch. Group Rank"].astype(str)
+ needed = needed.sort_values("id", ascending=True)
+ needed[["id", "Name", "Postcode"]].to_csv(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/needed_surveys.csv"
+ )
+
+ assert needed.shape[0] + costed.shape[0] == surveyed.shape[0]
+
# if __name__ == "__main__":
# main()