diff --git a/.idea/Model.iml b/.idea/Model.iml
index 762580d9..850c0cda 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
-
+
diff --git a/.idea/misc.xml b/.idea/misc.xml
index c916a158..e4070118 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
-
+
diff --git a/etl/customers/livewest/route_march_2024_10_28.py b/etl/customers/livewest/route_march_2024_10_28.py
index c19c78b1..1b259fba 100644
--- a/etl/customers/livewest/route_march_2024_10_28.py
+++ b/etl/customers/livewest/route_march_2024_10_28.py
@@ -19,6 +19,53 @@ load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+def get_data(asset_list):
+ epc_data = []
+ errors = []
+ for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
+ try:
+ postcode = home["Postcode"]
+ house_number = home["Number"]
+ full_address = home["Full Address"]
+
+ searcher = SearchEpc(
+ address1=str(house_number),
+ postcode=postcode,
+ auth_token=EPC_AUTH_TOKEN,
+ os_api_key="",
+ property_type=None,
+ fast=True,
+ full_address=full_address,
+ max_retries=5
+ )
+ # Force the skipping of estimating the EPC
+ searcher.ordnance_survey_client.property_type = None
+ searcher.ordnance_survey_client.built_form = None
+
+ searcher.find_property(skip_os=True)
+ if searcher.newest_epc is None:
+ continue
+
+ # Look for EPC recommendatons
+ try:
+ property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"])
+ except:
+ property_recommendations = {"rows": []}
+
+ epc = {
+ "row_id": home["row_id"],
+ **searcher.newest_epc.copy(),
+ "recommendations": property_recommendations["rows"]
+ }
+
+ epc_data.append(epc)
+ except Exception as e:
+ errors.append(home["row_id"])
+ time.sleep(5)
+
+ return epc_data, errors
+
+
def app():
"""
This app is EPC pulling data for some properties owned by Livewest
@@ -45,56 +92,49 @@ def app():
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Downloads/LIVEWEST 3578 ECO4 ECO PLUS GBIS.xlsx", header=0
)
+ asset_list["row_id"] = asset_list.index
- epc_data = []
- errors = []
- for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
- try:
- postcode = home["Postcode"]
- house_number = home["Number"]
- full_address = home["Full Address"]
+ epc_data, errors = get_data(asset_list)
- searcher = SearchEpc(
- address1=str(house_number),
- postcode=postcode,
- auth_token=EPC_AUTH_TOKEN,
- os_api_key="",
- property_type=None,
- fast=True,
- full_address=full_address,
- max_retries=3
- )
- # Force the skipping of estimating the EPC
- searcher.ordnance_survey_client.property_type = None
- searcher.ordnance_survey_client.built_form = None
+ # We now retrieve any failed properties
+ asset_list_failed = asset_list[asset_list["row_id"].isin(errors)]
+ epc_data_failed, _ = get_data(asset_list_failed)
- searcher.find_property(skip_os=True)
- if searcher.newest_epc is None:
- continue
-
- # Look for EPC recommendatons
- try:
- property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"])
- except:
- property_recommendations = {"rows": []}
-
- epc = {
- "asset_list_address": full_address,
- **searcher.newest_epc.copy(),
- "recommendations": property_recommendations["rows"]
- }
-
- epc_data.append(epc)
- except Exception as e:
- errors.append(e)
- time.sleep(5)
+ # Append the failed data to the main data
+ epc_data.extend(epc_data_failed)
epc_df = pd.DataFrame(epc_data)
+ # We expand out the recommendations
+ recommendations_df = epc_df[["row_id", "recommendations"]]
+
+ unique_recommendations = set()
+ for _, row in recommendations_df.iterrows():
+ unique_recommendations.update([rec["improvement-summary-text"] for rec in row["recommendations"]])
+
+ columns = ["row_id"] + list(unique_recommendations)
+ transformed_data = []
+ for _, row in recommendations_df.iterrows():
+ # Initialize a dictionary for this row with False for all recommendations
+ row_data = {col: False for col in columns}
+ row_data["row_id"] = row["row_id"]
+
+ # Set True for each recommendation present in this row
+ for rec in row["recommendations"]:
+ recommendation_text = rec["improvement-summary-text"]
+ row_data[recommendation_text] = True
+
+ # Append the row data to transformed_data
+ transformed_data.append(row_data)
+
+ transformed_df = pd.DataFrame(transformed_data)
+ # Drop the column that is ""
+ transformed_df = transformed_df.drop(columns=[""])
+
# Retrieve just the data we need
epc_df = epc_df[
[
- "asset_list_address",
+ "row_id",
"uprn",
"property-type",
"built-form",
@@ -110,7 +150,7 @@ def app():
"construction-age-band",
"floor-height",
"number-habitable-rooms",
- "mainheat-description"
+ "mainheat-description",
#
"energy-consumption-current", # kwh/m2
]
@@ -119,11 +159,14 @@ def app():
asset_list = asset_list.merge(
epc_df,
how="left",
- left_on=["ADDRESS"],
- right_on=["asset_list_address"]
+ on="row_id"
+ ).merge(
+ transformed_df,
+ how="left",
+ on="row_id"
)
- asset_list = asset_list.drop(columns=["asset_list_address"])
+ asset_list = asset_list.drop(columns=["row_id"])
# Rename the columns
asset_list = asset_list.rename(columns={
@@ -140,14 +183,18 @@ def app():
"roof-description": "Roof Construction",
"mainheat-description": "Heating Type",
"secondheat-description": "Secondary Heating",
- "transaction-type": "Reason for last EPC"
+ "transaction-type": "Reason for last EPC",
+ "energy-consumption-current": "Heat Demand (kWh/m2)"
})
asset_list["Estimated Number of Floors"] = asset_list.apply(
- lambda x: estimate_number_of_floors(property_type=x["Property Type"]), axis=1
+ lambda x: estimate_number_of_floors(property_type=x["Property Type"]) if not pd.isnull(
+ x["Property Type"]) else None, axis=1
)
asset_list["Property Floor Area"] = asset_list["Property Floor Area"].astype(float)
+ # Replace "" value with None
+ asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].replace("", None)
asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].astype(float)
asset_list["Estimated Perimeter (m)"] = asset_list.apply(
@@ -157,7 +204,7 @@ def app():
), axis=1
)
- asset_list["Estimated Heat Loss Perimeter (m)"] = asset_list.apply(
+ asset_list["Estimated Heat Loss Perimeter (m2)"] = asset_list.apply(
lambda x: estimate_external_wall_area(
num_floors=x["Estimated Number of Floors"],
floor_height=float(x["Property Floor Height"]) if x["Property Floor Height"] else 2.5,
@@ -168,10 +215,11 @@ def app():
)
asset_list["Roof Insulation Thickness"] = asset_list.apply(
- lambda x: RoofAttributes(description=x["Roof Construction"]).process()["insulation_thickness"],
+ lambda x: RoofAttributes(description=x["Roof Construction"]).process()["insulation_thickness"] if not pd.isnull(
+ x["Roof Construction"]) else None,
axis=1
)
# Store as an excel
- filename = "LHP EPC Data pull.xlsx"
+ filename = "livewest EPC Data pull - 29 Oct.xlsx"
asset_list.to_excel(filename, index=False)
diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py
index a8e06416..d8d01b22 100644
--- a/etl/customers/stonewater/Wave 3 Preparation.py
+++ b/etl/customers/stonewater/Wave 3 Preparation.py
@@ -283,6 +283,8 @@ def main():
extracted_data.append(summary_data)
extracted_data = pd.DataFrame(extracted_data)
+ # Save this as a csv
+ # extracted_data.to_csv("Wave 3 Summary Data - first 200 files.csv", index=False)
missed = [f for f in survey_folders if f not in extracted_data["survey_folder"].tolist()]