diff --git a/.idea/Model.iml b/.idea/Model.iml
index df6c4faa..762580d9 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
-
+
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 50cad4ca..c916a158 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
-
+
diff --git a/etl/access_reporting/app.py b/etl/access_reporting/app.py
index 830f4370..8a8254a1 100644
--- a/etl/access_reporting/app.py
+++ b/etl/access_reporting/app.py
@@ -83,8 +83,11 @@ def api_call_decorator(func):
results = []
page_size = kwargs.get('page_size', None)
response_data = {}
+ n_calls = 0
while url:
+ logger.info("Making call for page: " + str(n_calls + 1))
+ n_calls += 1
response = requests.request(http_method, url, headers=self.headers, json=data)
# Handle the response
@@ -93,6 +96,7 @@ def api_call_decorator(func):
if page_size:
results.extend(response_json.get('value', []))
url = response_json.get('@odata.nextLink', None)
+ logger.info(f"Next page URL: {url}")
else:
response_data = response_json # Capture the full response for consistency
break
@@ -270,6 +274,48 @@ class SharePointClient:
return file_content
+ def download_sharepoint_folder(self, drive_id, folder_path, download_dir, excluded_file_types=None):
+ """
+ Downloads all files in a SharePoint folder to the specified local directory.
+
+ :param drive_id: The ID of the SharePoint drive.
+ :param folder_path: The path of the folder in SharePoint.
+ :param download_dir: The local directory to save the downloaded files.
+ :param excluded_file_types: A list of file types to exclude from download (default is None).
+ """
+
+ excluded_file_types = [] if excluded_file_types is None else excluded_file_types
+
+ # Ensure the download directory exists
+ os.makedirs(download_dir, exist_ok=True)
+
+ # List folder contents
+ folder_contents = self.list_folder_contents(drive_id, folder_path)
+ files = folder_contents.get('value', [])
+
+ for item in files:
+ if item.get('folder'): # Check if it's a folder
+ # Recursively handle subfolders
+ subfolder_path = f"{folder_path}/{item['name']}"
+ subfolder_dir = os.path.join(download_dir, item['name'])
+ self.download_sharepoint_folder(drive_id, subfolder_path, subfolder_dir)
+ else:
+ # It's a file, download it
+ file_name = item['name']
+ if file_name.split(".")[-1] in excluded_file_types:
+ continue
+ download_url = item['@microsoft.graph.downloadUrl']
+
+ logger.info(f"Downloading file: {file_name}")
+ file_content = self.download_sharepoint_file(download_url)
+
+ # Save the file locally
+ file_path = os.path.join(download_dir, file_name)
+ with open(file_path, 'wb') as f:
+ f.write(file_content.read())
+
+ logger.info(f"File saved to: {file_path}")
+
def app():
# Customers for WC 18/11/2024
diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py
index 0f757f7b..8538188b 100644
--- a/etl/customers/stonewater/Wave 3 Preparation.py
+++ b/etl/customers/stonewater/Wave 3 Preparation.py
@@ -2905,5 +2905,38 @@ def identify_incorrect_packages():
os.path.join(CUSTOMER_FOLDER_PATH, "Units with assigned packages - with flags.csv"), index=False
)
+
+def revised_model():
+ """
+ This function implements the revised model for Stonewater, where we are looking at new priority postcodes
+ This work was undertaken in January 2021.
+ """
+
+ # 1) Create the new list of properties
+
+ new_priority_postcodes = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Jan 2025 Project/Updated 2025 to 2030 "
+ "priority list.xlsx"
+ )
+
+ original_archetypes = pd.read_excel(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24 "
+ "- Archetyped V3.1.xlsx",
+ header=4
+ )
+ original_archetypes = original_archetypes[~pd.isnull(original_archetypes["Address ID"])]
+ original_archetypes = original_archetypes[original_archetypes["Address ID"] != "Address ID"]
+ original_archetypes["Address ID"] = original_archetypes["Address ID"].astype(int)
+
+ original_archetypes = original_archetypes[
+ ["Address ID", "Archetype ID", ""]
+ ]
+
+ # Check if we have all of the addresses
+ missed = original_archetypes[
+ ~original_archetypes["Address ID"].isin(new_priority_postcodes["Address ID"].values)
+ ]["Archetype ID"].unique()
+ assert
+
# if __name__ == "__main__":
# main()
diff --git a/etl/customers/stonewater/data_cleaning.py b/etl/customers/stonewater/data_cleaning.py
new file mode 100644
index 00000000..8751960c
--- /dev/null
+++ b/etl/customers/stonewater/data_cleaning.py
@@ -0,0 +1,137 @@
+import os
+import shutil
+from tqdm import tqdm
+
+
+def delete_large_files():
+ """
+ This function deletes photos, designs and other files which we don't need
+ :return:
+ """
+
+ folder_path = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Wave 2.1 Surveys"
+
+ # List the contents of this folder since in each sub-folder we have the property folders
+ contents = os.listdir(folder_path)
+
+ for subfolder in contents:
+ if not os.path.isdir(os.path.join(folder_path, subfolder)):
+ continue
+ subfolder_path = os.path.join(folder_path, subfolder)
+ # List the contents
+ property_folders = os.listdir(subfolder_path)
+
+ for property in tqdm(property_folders):
+ # Check if it's a directory
+ if not os.path.isdir(os.path.join(subfolder_path, property)):
+ continue
+
+ property_path = os.path.join(subfolder_path, property)
+ property_contents = os.listdir(property_path)
+ # We delete the contents of the following folders:
+ # '1. RA Property Pics'
+ # '4. Air Tightness Tests'
+ # '5. RD Design Info'
+ for folder_to_delete in ["1. RA Property Pics", "4. Air Tightness Tests", "5. RD Design Info",
+ "1. RA Property PIcs", "Post EPC Photos", "4. RD Design Info",
+ "5. Installer Info", "6. Trustmark lodgement", "7.Post Install Inspection Photos",
+ "6. Trustmark Lodgement", "7. Post Inspection Photos"]:
+ if folder_to_delete not in property_contents:
+ continue
+ folder_to_delete_path = os.path.join(property_path, folder_to_delete)
+ if os.path.isdir(folder_to_delete_path):
+ # Delete the folder, even if it's not empty
+ shutil.rmtree(folder_to_delete_path)
+
+ # We now check the '2. RA Coordinator Info' folder for any .MOV files and delete them
+ if "2. RA Coordinator Info" not in property_contents:
+ coordinator_folder = "1. RA Coordinator Info"
+ else:
+ coordinator_folder = "2. RA Coordinator Info"
+ coordinator_info_path = os.path.join(property_path, coordinator_folder)
+ coordinator_info_contents = os.listdir(coordinator_info_path)
+ # Look for .MOV files and .jpg files
+ for file in coordinator_info_contents:
+ if file.endswith(".MOV"):
+ os.remove(os.path.join(coordinator_info_path, file))
+
+ if file.endswith(".jpg"):
+ os.remove(os.path.join(coordinator_info_path, file))
+
+ if "Property Pics" in coordinator_info_contents:
+ # Delete folder and contents
+ shutil.rmtree(os.path.join(coordinator_info_path, "Property Pics"))
+
+
+def download_data_from_sharepoint():
+ # Given a sharepoint location, this function will download the retrofit assessment folders from the locations
+ # specified in the sharepoint location
+ from etl.access_reporting.app import SharePointClient
+
+ sharepoint_client = SharePointClient(
+ tenant_id="10d5af8b-2cfd-4882-9ccd-b96e4812dacf",
+ client_id="6832a4c5-fb8c-4082-a746-4f51e1020f0d",
+ client_secret="xpC8Q~Frww48SM1V-D8lGy5iOY7P_cJ7FF3jgarQ",
+ site_id="bc925a9a-ad0b-4de9-9a3c-e61014cc7489"
+ )
+
+ # Retrieve the data from Sharepoint and write to local machine
+ contents = sharepoint_client.list_folder_contents(
+ drive_id=sharepoint_client.document_drive["id"],
+ folder_path="Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders"
+ )
+
+ len(contents["value"])
+ folders_to_pull = [
+ folder for folder in contents["value"] if folder["name"] in ["3. Wiltshire", "4. Bournemouth", "5. Coventry"]
+ ]
+ for folder_to_pull in folders_to_pull:
+ # Get the contents
+ folder_contents = sharepoint_client.list_folder_contents(
+ drive_id=sharepoint_client.document_drive["id"],
+ folder_path="Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders" + "/" +
+ folder_to_pull["name"],
+ page_size=100
+ )
+
+ property_folders = [f for f in folder_contents["value"]]
+
+ for property_folder in property_folders:
+ # We go into each property folder and get the contents
+ property_folder_contents = sharepoint_client.list_folder_contents(
+ drive_id=sharepoint_client.document_drive["id"],
+ folder_path="Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders" + "/" +
+ folder_to_pull["name"] + "/" + property_folder["name"]
+ )
+ # We look for the retrofit assessment folder:
+ property_sub_folders = [
+ f for f in property_folder_contents["value"] if "ra coordinator info" in f["name"].lower()
+ ]
+
+ if not property_sub_folders:
+ continue
+
+ # if we have this, we download the folder and store it on my laptop!
+ property_sub_folder = property_sub_folders[0]
+
+ property_folder_path = os.path.join(
+ "Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders",
+ folder_to_pull["name"],
+ property_folder["name"],
+ property_sub_folder["name"]
+ )
+
+ download_dir = os.path.join(
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Wave 2.1 Surveys",
+ folder_to_pull["name"],
+ property_folder["name"],
+ property_sub_folder["name"]
+ )
+
+ # We download the folder
+ sharepoint_client.download_sharepoint_folder(
+ drive_id=sharepoint_client.document_drive["id"],
+ folder_path=property_folder_path,
+ download_dir=download_dir,
+ excluded_file_types=["MOV"]
+ )
diff --git a/etl/customers/stonewater/potential_eco_properties.py b/etl/customers/stonewater/potential_eco_properties.py
index c0301e9a..bda9c30c 100644
--- a/etl/customers/stonewater/potential_eco_properties.py
+++ b/etl/customers/stonewater/potential_eco_properties.py
@@ -7,6 +7,8 @@ from tqdm import tqdm
from dotenv import load_dotenv
from backend.SearchEpc import SearchEpc
from utils.s3 import read_from_s3, read_pickle_from_s3
+import msoffcrypto
+from io import BytesIO
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
@@ -64,6 +66,28 @@ def app():
This code creates a list of cavity properties, for review
"""
+ # Read in the password protected master
+ # TODO: This file should be deleted!
+
+ # Path to the password-protected Excel file
+ file_path = ("/Users/khalimconn-kowlessar/Downloads/STONEWATER MASTER SHEET - UPDATED 20.5.24 - K- PASSWORD "
+ "PROTECTED.xlsx")
+ password = "STONE123" # Replace with the actual password
+
+ # Open the file and decrypt it
+ with open(file_path, "rb") as f:
+ decrypted_file = BytesIO()
+ office_file = msoffcrypto.OfficeFile(f)
+ office_file.load_key(password=password)
+ office_file.decrypt(decrypted_file)
+
+ # Read the decrypted file into a DataFrame
+ eco_rolling_master = pd.read_excel(decrypted_file, sheet_name="Sheet1", engine="openpyxl")
+
+ eco_rolling_master = eco_rolling_master[
+ ~eco_rolling_master['INSTALL/CANCELLATION DATE'].str.contains("CANCELLED")
+ ]
+
archetyped_properties = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24 - "
"Archetyped V3.1.xlsx",
@@ -116,13 +140,16 @@ def app():
features_to_merge = features[
[
- "Address ID", "Age", "Property Type", "Walls", "Roofs", "Glazing", "Heating", "Main Fuel", "Hot Water",
+ "Address ID", "Organisation Reference", "Age", "Property Type", "Walls", "Roofs", "Glazing", "Heating",
+ "Main Fuel",
+ "Hot Water",
"Renewables", "Total Floor Area"
]
]
stonewater_cavity_properties = archetyped_properties[
- ["Name", "Postcode", "Osm. ID", "Address ID", "UPRN", "UDPRN", "Archetype ID", "House no", "Street name",
+ ["Name", "Postcode", "Osm. ID", "Org. ref.", "Address ID", "UPRN", "UDPRN", "Archetype ID", "House no",
+ "Street name",
"Address line 2", "City/Town", "Is Cavity Property", "Survey shows CWI needed for Archetype"]
].merge(
features_to_merge, how="left", on="Address ID"
@@ -166,77 +193,137 @@ def app():
stonewater_cavity_properties["Reason Included"]
)
+ # We flag units that were installed under ECO3
+ numeric_ids = eco_rolling_master[eco_rolling_master["STONEWATER UPRN"] != "NOT ON ASSET LIST"]
+ numeric_ids = numeric_ids[~pd.isnull(numeric_ids["STONEWATER UPRN"])]
+ numeric_ids["STONEWATER UPRN"] = numeric_ids["STONEWATER UPRN"].astype(int)
+
+ stonewater_cavity_properties["Installed under ECO3"] = stonewater_cavity_properties["Org. ref."].isin(
+ numeric_ids['STONEWATER UPRN'].values
+ )
+
+ # Which postcodes were installed under ECO3
+ priority_list_eco3 = stonewater_cavity_properties[
+ stonewater_cavity_properties["Installed under ECO3"]
+ ]["Postcode"].unique()
+
+ # These are properties that were not installed under ECO3, that have the same postcodes as properties
+ # installed under ECO3
+
+ # These are 66 properties we might want to start with as an immediate priority
+ stonewater_cavity_properties["Same Postcode as Installed under ECO3"] = (
+ ~stonewater_cavity_properties["Installed under ECO3"] & (
+ stonewater_cavity_properties["Postcode"].isin(priority_list_eco3)
+ )
+ )
+
# We get the EPC data
- epc_data = json.loads(
- read_from_s3(
- bucket_name="retrofit-data-dev",
- s3_file_name="customers/Stonewater/clustering/epc_data.json"
- )
- )
- epc_data = pd.DataFrame(epc_data)
-
- epc_data["uprn"] = np.where(
- epc_data["internal_id"] == 1091,
- 83143766,
- epc_data["uprn"]
- )
-
- epc_data_batch_2 = read_pickle_from_s3(
- s3_file_name="customers/Stonewater/clustering/epc_data_batch_2.pkl",
- bucket_name="retrofit-data-dev"
- )
- epc_data_batch_2 = pd.DataFrame(epc_data_batch_2)
-
- complete_epcs = pd.concat([epc_data, epc_data_batch_2])
-
- epcs_to_merge = complete_epcs[
- [
- "uprn",
- "address",
- "postcode",
- "property-type",
- "built-form",
- "inspection-date",
- "current-energy-rating",
- "current-energy-efficiency",
- "roof-description",
- "walls-description",
- "transaction-type",
- "secondheat-description",
- "total-floor-area",
- "construction-age-band",
- "floor-height",
- "number-habitable-rooms",
- "mainheat-description",
- "energy-consumption-current"
- ]
- ].rename(
- columns={
- "address": "Address",
- "postcode": "Postcode",
- "inspection-date": "Date of last EPC",
- "current-energy-efficiency": "SAP score on register",
- "current-energy-rating": "EPC rating on register",
- "property-type": "Property Type",
- "built-form": "Archetype",
- "total-floor-area": "Property Floor Area",
- "construction-age-band": "Property Age Band",
- "floor-height": "Property Floor Height",
- "number-habitable-rooms": "Number of Habitable Rooms",
- "walls-description": "Wall Construction",
- "roof-description": "Roof Construction",
- "mainheat-description": "Heating Type",
- "secondheat-description": "Secondary Heating",
- "transaction-type": "Reason for last EPC",
- "energy-consumption-current": "Heat Demand (kWh/m2)",
- }
- )
- # We de-dupe, taking the newest on the date the EPC was lod
- epcs_to_merge["Date of last EPC"] = pd.to_datetime(epcs_to_merge["Date of last EPC"])
- epcs_to_merge = epcs_to_merge.sort_values("Date of last EPC", ascending=False)
- epcs_to_merge = epcs_to_merge.drop_duplicates(subset="uprn")
+ # epc_data = json.loads(
+ # read_from_s3(
+ # bucket_name="retrofit-data-dev",
+ # s3_file_name="customers/Stonewater/clustering/epc_data.json"
+ # )
+ # )
+ # epc_data = pd.DataFrame(epc_data)
+ #
+ # epc_data["uprn"] = np.where(
+ # epc_data["internal_id"] == 1091,
+ # 83143766,
+ # epc_data["uprn"]
+ # )
+ #
+ # epc_data_batch_2 = read_pickle_from_s3(
+ # s3_file_name="customers/Stonewater/clustering/epc_data_batch_2.pkl",
+ # bucket_name="retrofit-data-dev"
+ # )
+ # epc_data_batch_2 = pd.DataFrame(epc_data_batch_2)
+ #
+ # complete_epcs = pd.concat([epc_data, epc_data_batch_2])
+ #
+ # epcs_to_merge = complete_epcs[
+ # [
+ # "uprn",
+ # "address",
+ # "postcode",
+ # "property-type",
+ # "built-form",
+ # "inspection-date",
+ # "current-energy-rating",
+ # "current-energy-efficiency",
+ # "roof-description",
+ # "walls-description",
+ # "transaction-type",
+ # "secondheat-description",
+ # "total-floor-area",
+ # "construction-age-band",
+ # "floor-height",
+ # "number-habitable-rooms",
+ # "mainheat-description",
+ # "energy-consumption-current"
+ # ]
+ # ].rename(
+ # columns={
+ # "address": "Address",
+ # "postcode": "Postcode",
+ # "inspection-date": "Date of last EPC",
+ # "current-energy-efficiency": "SAP score on register",
+ # "current-energy-rating": "EPC rating on register",
+ # "property-type": "Property Type",
+ # "built-form": "Archetype",
+ # "total-floor-area": "Property Floor Area",
+ # "construction-age-band": "Property Age Band",
+ # "floor-height": "Property Floor Height",
+ # "number-habitable-rooms": "Number of Habitable Rooms",
+ # "walls-description": "Wall Construction",
+ # "roof-description": "Roof Construction",
+ # "mainheat-description": "Heating Type",
+ # "secondheat-description": "Secondary Heating",
+ # "transaction-type": "Reason for last EPC",
+ # "energy-consumption-current": "Heat Demand (kWh/m2)",
+ # }
+ # )
+ # # We de-dupe, taking the newest on the date the EPC was lod
+ # epcs_to_merge["Date of last EPC"] = pd.to_datetime(epcs_to_merge["Date of last EPC"])
+ # epcs_to_merge = epcs_to_merge.sort_values("Date of last EPC", ascending=False)
+ # epcs_to_merge = epcs_to_merge.drop_duplicates(subset="uprn")
stonewater_cavity_properties["UPRN"] = stonewater_cavity_properties["UPRN"].astype("Int64").astype(str)
+ stonewater_cavity_properties["Reason Included"].value_counts()
+ # Find the postcodes where an Osmosis survey revealed a need for CWI
+ postcodes_found_needing_cwi = stonewater_cavity_properties[
+ stonewater_cavity_properties["Reason Included"].isin(
+ [
+ "Survey revealed potential need for CWI or extract and re-fill",
+ "Surveyed revealed potential need for CWI or extract and re-fill and is an as built cavity property",
+ "Survey showed this property needs CWI",
+ "Survey showed this property could need extract and re-fill"
+ ]
+ )
+ ]["Postcode"].unique()
+
+ stonewater_cavity_properties["Suspected Needs CWI - not surveyed"] = (
+ (
+ stonewater_cavity_properties[
+ "Postcode"].isin(
+ postcodes_found_needing_cwi)
+ ) & (
+ ~stonewater_cavity_properties[
+ "Reason Included"].isin(
+ [
+ "Survey revealed potential need "
+ "for CWI or extract and re-fill",
+ "Surveyed revealed potential "
+ "need for CWI or extract and "
+ "re-fill and is an as built "
+ "cavity property",
+ "Survey showed this property "
+ "needs CWI",
+ "Survey showed this property "
+ "could need extract and re-fill"
+ ]
+ )
+ )
+ )
# Merge the EPCs on, with the data we need
stonewater_cavity_properties = stonewater_cavity_properties.rename(
@@ -252,12 +339,12 @@ def app():
"Renewables": "Parity - Renewables",
"Total Floor Area": "Parity - Total Floor Area"
}
- ).merge(
- epcs_to_merge,
- how="left",
- left_on="UPRN",
- right_on="uprn"
- )
+ ) # .merge(
+ # epcs_to_merge,
+ # how="left",
+ # left_on="UPRN",
+ # right_on="uprn"
+ # )
# We now flag the additional properties in the as built list
@@ -288,8 +375,56 @@ def app():
additional_properties = additional_properties.merge(house_numbers, how="left", on="Address ID")
additional_properties["row_id"] = additional_properties["Address ID"].copy()
+ # Flag any units in this list that were installed under ECO3
+ additional_properties["Installed under ECO3"] = additional_properties["Organisation Reference"].isin(
+ numeric_ids['STONEWATER UPRN'].values
+ )
+
+ # Additional list ECO3
+ additional_list_eco3 = additional_properties[additional_properties["Installed under ECO3"]]["Postcode"].unique()
+
+ # These are properties that were not installed under ECO3, that have the same postcodes as properties
+ # installed under ECO3
+ # These are 297 properties we might want to start with as an immediate priority
+ additional_properties["Same Postcode as Installed under ECO3"] = (
+ ~additional_properties["Installed under ECO3"] & (
+ additional_properties["Postcode"].isin(additional_list_eco3)
+ )
+ )
+
+ # We do some additional manual checks, for ECO3 properties that were installed that didn't get matched to either
+ # dataaset
+ numeric_ids["In asset list"] = numeric_ids["STONEWATER UPRN"].isin(
+ stonewater_cavity_properties['Org. ref.'].astype(int).values
+ )
+ numeric_ids["In asset list"] = numeric_ids["In asset list"] | (
+ numeric_ids["STONEWATER UPRN"].isin(
+ additional_properties['Organisation Reference'].astype(int).values
+ )
+ )
+
+ # eco3_installs_not_in_asset_list = numeric_ids[~numeric_ids["In asset list"]]
+ # # We now take samples of properties randomly and manually check the ID against the asset list
+ # print(eco3_installs_not_in_asset_list.sample(1)[["STONEWATER UPRN", "Post Code", "NO ", "Street / Block Name", ]])
+ # # Checked STONEWATER UPRN
+ # # 9862, BH15 1NR, 33, THE QUAY FOYER [x]
+ # # 12785, S01 66PN, 57, SEACOLE GARDENS [x]
+ # # 26071, MK42 0TE, 51, De Havilland Avenue, Shortstown [x]
+ # # 18213, HR6 9UW, 20 Ford Street [x]
+ # # 24344, LU4 9FF, 6 SEAL CLOSE [x]
+ # # 31222, SN14 0QZ, 7 HARDBROOK COURT [x]
+ # # 9343, SP4 7XL, 10 OAK PLACE [x]
+ # # 34730, LU5 5TN, 4 TUDOR DRIVE [x]
+ # # 7021, BN27 2BZ, 32 BUTTS FIELD []
+ #
+ # stonewater_cavity_properties[stonewater_cavity_properties['Org. ref.'] == 7021]
+ # stonewater_cavity_properties[stonewater_cavity_properties['Postcode'] == "BN27 2BZ"]["Name"]
+ #
+ # additional_properties[additional_properties['Organisation Reference'] == 7021]
+ # additional_properties[additional_properties['Postcode'] == "BN27 2BZ"][["Address"]]
+
# Pull the EPCs for these properties
- additional_properties_epcs, errors = get_data(additional_properties)
+ # additional_properties_epcs, errors = get_data(additional_properties)
# Save this data as a pickle
# import pickle
@@ -297,12 +432,20 @@ def app():
# "wb") as f:
# pickle.dump(additional_properties_epcs, f)
+ additional_properties["Suspected Needs CWI - not surveyed"] = (
+ (
+ additional_properties["Postcode"].isin(postcodes_found_needing_cwi)
+ )
+ )
+
+ additional_properties["Same Postcode as Installed under ECO3"].value_counts()
+
# We drop Full Address
additional_properties = additional_properties.drop(columns=["Full Address"])
additional_properties2 = additional_properties[[
- "row_id", "Address", "Postcode", "Address ID", "SAP", "SAP Band", "Property Type", "Walls", "Roofs", "Glazing",
- "Heating", "Main Fuel", "Hot Water", "Renewables", "Total Floor Area",
-
+ "Address", "Postcode", "Address ID", "SAP", "SAP Band", "Property Type", "Walls", "Roofs", "Glazing",
+ "Heating", "Main Fuel", "Hot Water", "Renewables", "Total Floor Area", 'Installed under ECO3',
+ 'Same Postcode as Installed under ECO3'
]].rename(
columns={
"SAP": "Parity - Predicted SAP",
@@ -318,56 +461,58 @@ def app():
"Renewables": "Parity - Renewables",
"Total Floor Area": "Parity - Total Floor Area"
}
- ).merge(
- pd.DataFrame(additional_properties_epcs)[
- [
- "row_id",
- "property-type",
- "built-form",
- "inspection-date",
- "current-energy-rating",
- "current-energy-efficiency",
- "roof-description",
- "walls-description",
- "transaction-type",
- "secondheat-description",
- "total-floor-area",
- "construction-age-band",
- "floor-height",
- "number-habitable-rooms",
- "mainheat-description",
- "energy-consumption-current"
- ]
- ].rename(
- columns={
- "inspection-date": "Date of last EPC",
- "current-energy-efficiency": "SAP score on register",
- "current-energy-rating": "EPC rating on register",
- "property-type": "Property Type",
- "built-form": "Archetype",
- "total-floor-area": "Property Floor Area",
- "construction-age-band": "Property Age Band",
- "floor-height": "Property Floor Height",
- "number-habitable-rooms": "Number of Habitable Rooms",
- "walls-description": "Wall Construction",
- "roof-description": "Roof Construction",
- "mainheat-description": "Heating Type",
- "secondheat-description": "Secondary Heating",
- "transaction-type": "Reason for last EPC",
- "energy-consumption-current": "Heat Demand (kWh/m2)",
- }
- ),
- how="left",
- on="row_id"
- )
+ ) # .merge(
+ # pd.DataFrame(additional_properties_epcs)[
+ # [
+ # "row_id",
+ # "property-type",
+ # "built-form",
+ # "inspection-date",
+ # "current-energy-rating",
+ # "current-energy-efficiency",
+ # "roof-description",
+ # "walls-description",
+ # "transaction-type",
+ # "secondheat-description",
+ # "total-floor-area",
+ # "construction-age-band",
+ # "floor-height",
+ # "number-habitable-rooms",
+ # "mainheat-description",
+ # "energy-consumption-current"
+ # ]
+ # ].rename(
+ # columns={
+ # "inspection-date": "Date of last EPC",
+ # "current-energy-efficiency": "SAP score on register",
+ # "current-energy-rating": "EPC rating on register",
+ # "property-type": "Property Type",
+ # "built-form": "Archetype",
+ # "total-floor-area": "Property Floor Area",
+ # "construction-age-band": "Property Age Band",
+ # "floor-height": "Property Floor Height",
+ # "number-habitable-rooms": "Number of Habitable Rooms",
+ # "walls-description": "Wall Construction",
+ # "roof-description": "Roof Construction",
+ # "mainheat-description": "Heating Type",
+ # "secondheat-description": "Secondary Heating",
+ # "transaction-type": "Reason for last EPC",
+ # "energy-consumption-current": "Heat Demand (kWh/m2)",
+ # }
+ # ),
+ # how="left",
+ # on="row_id"
+ # )
# We save the data locally
stonewater_cavity_properties.to_csv(
- "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater Cavity Properties.csv",
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater Cavity Properties - priority "
+ "postcodes.csv",
index=False
)
additional_properties2.to_csv(
- "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater Additional Cavity Properties.csv",
+ "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater Additional Cavity Properties - "
+ "non-priority postcodes.csv",
index=False
)
# Save the survey findings
diff --git a/etl/customers/waltham_forest/whlg eligibile properties.py b/etl/customers/waltham_forest/whlg eligibile properties.py
index fee988c1..9e1949f7 100644
--- a/etl/customers/waltham_forest/whlg eligibile properties.py
+++ b/etl/customers/waltham_forest/whlg eligibile properties.py
@@ -44,6 +44,10 @@ epc_data["has_conservation_restrictions"] = (
| (epc_data["is_heritage_building"] == True)
)
+whlg_eligible_postcodes["Local Authority"].value_counts()
+
+whlg_eligible_postcodes = whlg_eligible_postcodes[whlg_eligible_postcodes["Local Authority"] == "Waltham Forest"]
+
# Pathway 1:
# Match based on eligible postcodes
pathway1 = epc_data[epc_data["postcode"].isin(whlg_eligible_postcodes["Postcode"].values)]
@@ -67,6 +71,10 @@ pathway1["EPC Date"] = pd.to_datetime(pathway1["EPC Date"]).dt.strftime("%Y-%m-%
# Create a year EPC was lodged
pathway1["EPC Year"] = pd.to_datetime(pathway1["EPC Date"]).dt.year
+low_epc = pathway1[pathway1["EPC Rating"].isin(["F", "G"])]
+low_epc["EPC Rating"].value_counts()
+low_epc.tail(1)[["address", "postcode"]]
+
pathway1.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Waltham Forest WHLG - Pathway 1 Eligibility.csv",
index=False