diff --git a/.idea/Model.iml b/.idea/Model.iml
index df6c4faa..762580d9 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
-
+
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 50cad4ca..c916a158 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
-
+
diff --git a/etl/find_my_epc/RetrieveFindMyEpc.py b/etl/find_my_epc/RetrieveFindMyEpc.py
index cd76dae4..913a04b8 100644
--- a/etl/find_my_epc/RetrieveFindMyEpc.py
+++ b/etl/find_my_epc/RetrieveFindMyEpc.py
@@ -26,6 +26,20 @@ class RetrieveFindMyEpc:
self.address_cleaned = self.address.replace(",", "").replace(" ", "").lower()
+ @staticmethod
+ def extract_low_carbon_sources(soup):
+ # Find the section header
+ section_header = soup.find("h3", string="Low and zero carbon energy sources")
+ if not section_header:
+ return {}
+
+ # Locate the list following the header
+ energy_list = section_header.find_next("ul")
+
+ # Extract the list items
+ sources = {item.get_text(strip=True): True for item in energy_list.find_all("li")}
+ return sources
+
def retrieve_newest_find_my_epc_data(self, sap_2012_date=None):
"""
For a post code and address, we pull out all the required data from the find my epc website
@@ -191,6 +205,9 @@ class RetrieveFindMyEpc:
# Finally, we format the recommendations
recommendations = self.format_recommendations(recommendations, assessment_data, sap_2012_date)
+ # 4) Low and zero carbon energy sources
+ low_carbon_energy_sources = self.extract_low_carbon_sources(address_res)
+
resulting_data = {
'epc_certificate': epc_certificate,
'current_epc_rating': current_rating.split(' ')[-6],
@@ -200,7 +217,8 @@ class RetrieveFindMyEpc:
"heating_text": heating_text,
"hot_water_text": hot_water_text,
"recommendations": recommendations,
- **assessment_data
+ **assessment_data,
+ **low_carbon_energy_sources
}
return resulting_data
@@ -246,6 +264,11 @@ class RetrieveFindMyEpc:
],
"Band A condensing boiler": ["boiler_upgrade"],
"Double glazing": ["double_glazing"],
+ "Flue gas heat recovery device in conjunction with boiler": ["flue_gas_heat_recovery"],
+ "Wind turbine": ["wind_turbine"],
+ "Loft insulation": ["loft_insulation"],
+ "Solar photovoltaic (PV) panels": ["solar_pv"],
+ "Party wall insulation": ["party_wall_insulation"],
}
survey = True
diff --git a/etl/route_march_data_pull/app.py b/etl/route_march_data_pull/app.py
new file mode 100644
index 00000000..060897f8
--- /dev/null
+++ b/etl/route_march_data_pull/app.py
@@ -0,0 +1,300 @@
+import os
+import time
+
+import pandas as pd
+import numpy as np
+from tqdm import tqdm
+
+from dotenv import load_dotenv
+from backend.SearchEpc import SearchEpc
+from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc
+from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
+
+from recommendations.recommendation_utils import (
+ estimate_perimeter,
+ estimate_external_wall_area,
+ estimate_number_of_floors
+)
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+
+def get_data(asset_list, fulladdress_column, address1_column, postcode_column):
+ epc_data = []
+ errors = []
+ for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
+ postcode = home[postcode_column]
+ house_number = home[address1_column]
+ full_address = home[fulladdress_column]
+
+ searcher = SearchEpc(
+ address1=str(house_number),
+ postcode=postcode,
+ auth_token=EPC_AUTH_TOKEN,
+ os_api_key="",
+ property_type=None,
+ fast=True,
+ full_address=full_address,
+ max_retries=5
+ )
+ # Force the skipping of estimating the EPC
+ searcher.ordnance_survey_client.property_type = None
+ searcher.ordnance_survey_client.built_form = None
+
+ searcher.find_property(skip_os=True)
+ if searcher.newest_epc is None:
+ continue
+
+ # Look for EPC recommendatons
+ try:
+ property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"])
+ except:
+ property_recommendations = {"rows": []}
+
+ # Retrieve data from FindMyEPC
+ find_epc_searcher = RetrieveFindMyEpc(
+ address=searcher.newest_epc["address"], postcode=searcher.newest_epc["postcode"]
+ )
+ find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
+ time.sleep(np.random.uniform(0.1, 1))
+ try:
+ postcode = home[postcode_column]
+ house_number = home[address1_column]
+ full_address = home[fulladdress_column]
+
+ searcher = SearchEpc(
+ address1=str(house_number),
+ postcode=postcode,
+ auth_token=EPC_AUTH_TOKEN,
+ os_api_key="",
+ property_type=None,
+ fast=True,
+ full_address=full_address,
+ max_retries=5
+ )
+ # Force the skipping of estimating the EPC
+ searcher.ordnance_survey_client.property_type = None
+ searcher.ordnance_survey_client.built_form = None
+
+ searcher.find_property(skip_os=True)
+ if searcher.newest_epc is None:
+ continue
+
+ # Look for EPC recommendatons
+ try:
+ property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"])
+ except:
+ property_recommendations = {"rows": []}
+
+ # Retrieve data from FindMyEPC
+ find_epc_searcher = RetrieveFindMyEpc(
+ address=searcher.newest_epc["address"], postcode=searcher.newest_epc["postcode"]
+ )
+ find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
+ time.sleep(np.random.uniform(0.1, 1))
+
+ epc = {
+ "row_id": home["row_id"],
+ **searcher.newest_epc.copy(),
+ "recommendations": property_recommendations["rows"],
+ "find_my_epc_data": find_epc_data,
+ }
+
+ epc_data.append(epc)
+ except Exception as e:
+ errors.append(home["row_id"])
+ time.sleep(5)
+
+ return epc_data, errors
+
+
+def extract_address1(asset_list, full_address_col, method="first_two_words"):
+ if method == "first_two_words":
+ asset_list["address1_extracted"] = asset_list[full_address_col].str.split(" ").str[:2].str.join(" ")
+ return asset_list
+
+ raise ValueError(f"Method {method} not recognized")
+
+
+def app():
+ """
+ This app is EPC pulling data for some properties owned by Livewest
+
+ Data request contents:
+ Date of last EPC
+ Reason for EPC
+ SAP score on register
+ Property Type
+ Property Area
+ Property Age
+ Any Dimensions (HLP,PW,RH)
+ Property Wall Construction
+ Heating Type
+ Secondary Heating
+ Loft Insulation Depth
+
+ Additional if possible:
+ Heat loss calculations
+ EPC recommendations
+ Property UPRN
+
+ """
+ DATA_FOLDER = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/P&F/"
+ DATA_FILENAME = "BELOW C - WFT FINDINGS ON INSPECTION PLUS SUGGESTED ACTION.xlsx"
+ POSTCODE_COLUMN = "Postcode"
+ FULLADDRESS_COLUMN = "Address"
+ ADDRESS1_COLUMN = None
+ ADDRESS1_METHOD = "first_two_words"
+
+ asset_list = pd.read_excel(os.path.join(DATA_FOLDER, DATA_FILENAME), header=0)
+ asset_list["row_id"] = asset_list.index
+
+ # We clean up portential non-breaking spaces, and double spaces
+ for col in [c for c in [POSTCODE_COLUMN, FULLADDRESS_COLUMN, ADDRESS1_COLUMN] if c is not None]:
+ asset_list[col] = asset_list[col].str.replace('\xa0', ' ', regex=False)
+ asset_list[col] = asset_list[col].str.replace(' ', ' ', regex=False)
+
+ if ADDRESS1_COLUMN is None:
+ ADDRESS1_COLUMN = "address1_extracted"
+ asset_list = extract_address1(asset_list, FULLADDRESS_COLUMN, ADDRESS1_METHOD)
+
+ epc_data, errors = get_data(
+ asset_list=asset_list,
+ fulladdress_column=FULLADDRESS_COLUMN,
+ address1_column=ADDRESS1_COLUMN,
+ postcode_column=POSTCODE_COLUMN
+ )
+
+ # We now retrieve any failed properties
+ asset_list_failed = asset_list[asset_list["row_id"].isin(errors)]
+ epc_data_failed, _ = get_data(
+ asset_list=asset_list_failed,
+ fulladdress_column=FULLADDRESS_COLUMN,
+ address1_column=ADDRESS1_COLUMN,
+ postcode_column=POSTCODE_COLUMN
+ )
+
+ # Append the failed data to the main data
+ epc_data.extend(epc_data_failed)
+
+ epc_df = pd.DataFrame(epc_data)
+
+ # We expand out the recommendations
+ recommendations_df = epc_df[["row_id", "recommendations"]]
+
+ unique_recommendations = set()
+ for _, row in recommendations_df.iterrows():
+ unique_recommendations.update([rec["improvement-summary-text"] for rec in row["recommendations"]])
+
+ columns = ["row_id"] + list(unique_recommendations)
+ transformed_data = []
+ for _, row in recommendations_df.iterrows():
+ # Initialize a dictionary for this row with False for all recommendations
+ row_data = {col: False for col in columns}
+ row_data["row_id"] = row["row_id"]
+
+ # Set True for each recommendation present in this row
+ for rec in row["recommendations"]:
+ recommendation_text = rec["improvement-summary-text"]
+ row_data[recommendation_text] = True
+
+ # Append the row data to transformed_data
+ transformed_data.append(row_data)
+
+ transformed_df = pd.DataFrame(transformed_data)
+ # Drop the column that is ""
+ transformed_df = transformed_df.drop(columns=[""])
+
+ # Retrieve just the data we need
+ epc_df = epc_df[
+ [
+ "row_id",
+ "uprn",
+ "property-type",
+ "built-form",
+ "inspection-date",
+ "current-energy-rating",
+ "current-energy-efficiency",
+ "roof-description",
+ "walls-description",
+ "transaction-type",
+ # New fields needed
+ "secondheat-description",
+ "total-floor-area",
+ "construction-age-band",
+ "floor-height",
+ "number-habitable-rooms",
+ "mainheat-description",
+ #
+ "energy-consumption-current", # kwh/m2
+ ]
+ ]
+
+ asset_list = asset_list.merge(
+ epc_df,
+ how="left",
+ on="row_id"
+ ).merge(
+ transformed_df,
+ how="left",
+ on="row_id"
+ )
+
+ asset_list = asset_list.drop(columns=["row_id"])
+
+ # Rename the columns
+ asset_list = asset_list.rename(columns={
+ "inspection-date": "Date of last EPC",
+ "current-energy-efficiency": "SAP score on register",
+ "current-energy-rating": "EPC rating on register",
+ "property-type": "Property Type",
+ "built-form": "Archetype",
+ "total-floor-area": "Property Floor Area",
+ "construction-age-band": "Property Age Band",
+ "floor-height": "Property Floor Height",
+ "number-habitable-rooms": "Number of Habitable Rooms",
+ "walls-description": "Wall Construction",
+ "roof-description": "Roof Construction",
+ "mainheat-description": "Heating Type",
+ "secondheat-description": "Secondary Heating",
+ "transaction-type": "Reason for last EPC",
+ "energy-consumption-current": "Heat Demand (kWh/m2)"
+ })
+
+ asset_list["Estimated Number of Floors"] = asset_list.apply(
+ lambda x: estimate_number_of_floors(property_type=x["Property Type"]) if not pd.isnull(
+ x["Property Type"]) else None, axis=1
+ )
+
+ asset_list["Property Floor Area"] = asset_list["Property Floor Area"].astype(float)
+ # Replace "" value with None
+ asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].replace("", None)
+ asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].astype(float)
+
+ asset_list["Estimated Perimeter (m)"] = asset_list.apply(
+ lambda x: estimate_perimeter(
+ floor_area=x["Property Floor Area"] / x["Estimated Number of Floors"],
+ num_rooms=x["Number of Habitable Rooms"] / x["Estimated Number of Floors"],
+ ), axis=1
+ )
+
+ asset_list["Estimated Heat Loss Perimeter (m2)"] = asset_list.apply(
+ lambda x: estimate_external_wall_area(
+ num_floors=x["Estimated Number of Floors"],
+ floor_height=float(x["Property Floor Height"]) if x["Property Floor Height"] else 2.5,
+ perimeter=x["Estimated Perimeter (m)"],
+ built_form=x["Archetype"]
+ ),
+ axis=1
+ )
+
+ asset_list["Roof Insulation Thickness"] = asset_list.apply(
+ lambda x: RoofAttributes(description=x["Roof Construction"]).process()["insulation_thickness"] if not pd.isnull(
+ x["Roof Construction"]) else None,
+ axis=1
+ )
+
+ # Store as an excel
+ filename = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Settle/Settle EPC Data pull - 08 Nov 2024.xlsx"
+ asset_list.to_excel(filename, index=False)
diff --git a/etl/route_march_data_pull/requirements.txt b/etl/route_march_data_pull/requirements.txt
new file mode 100644
index 00000000..e69de29b