From 36bb4b0f275b402e7806f01cde788676e7090bd3 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 28 Jan 2025 15:10:23 +0000 Subject: [PATCH] pulled data needed for stonewater --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- etl/access_reporting/app.py | 46 ++ .../stonewater/Wave 3 Preparation.py | 33 ++ etl/customers/stonewater/data_cleaning.py | 137 ++++++ .../stonewater/potential_eco_properties.py | 393 ++++++++++++------ .../whlg eligibile properties.py | 8 + 7 files changed, 495 insertions(+), 126 deletions(-) create mode 100644 etl/customers/stonewater/data_cleaning.py diff --git a/.idea/Model.iml b/.idea/Model.iml index df6c4faa..762580d9 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 50cad4ca..c916a158 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/etl/access_reporting/app.py b/etl/access_reporting/app.py index 830f4370..8a8254a1 100644 --- a/etl/access_reporting/app.py +++ b/etl/access_reporting/app.py @@ -83,8 +83,11 @@ def api_call_decorator(func): results = [] page_size = kwargs.get('page_size', None) response_data = {} + n_calls = 0 while url: + logger.info("Making call for page: " + str(n_calls + 1)) + n_calls += 1 response = requests.request(http_method, url, headers=self.headers, json=data) # Handle the response @@ -93,6 +96,7 @@ def api_call_decorator(func): if page_size: results.extend(response_json.get('value', [])) url = response_json.get('@odata.nextLink', None) + logger.info(f"Next page URL: {url}") else: response_data = response_json # Capture the full response for consistency break @@ -270,6 +274,48 @@ class SharePointClient: return file_content + def download_sharepoint_folder(self, drive_id, folder_path, download_dir, excluded_file_types=None): + """ + Downloads all files in a SharePoint folder to the specified local directory. + + :param drive_id: The ID of the SharePoint drive. + :param folder_path: The path of the folder in SharePoint. + :param download_dir: The local directory to save the downloaded files. + :param excluded_file_types: A list of file types to exclude from download (default is None). + """ + + excluded_file_types = [] if excluded_file_types is None else excluded_file_types + + # Ensure the download directory exists + os.makedirs(download_dir, exist_ok=True) + + # List folder contents + folder_contents = self.list_folder_contents(drive_id, folder_path) + files = folder_contents.get('value', []) + + for item in files: + if item.get('folder'): # Check if it's a folder + # Recursively handle subfolders + subfolder_path = f"{folder_path}/{item['name']}" + subfolder_dir = os.path.join(download_dir, item['name']) + self.download_sharepoint_folder(drive_id, subfolder_path, subfolder_dir) + else: + # It's a file, download it + file_name = item['name'] + if file_name.split(".")[-1] in excluded_file_types: + continue + download_url = item['@microsoft.graph.downloadUrl'] + + logger.info(f"Downloading file: {file_name}") + file_content = self.download_sharepoint_file(download_url) + + # Save the file locally + file_path = os.path.join(download_dir, file_name) + with open(file_path, 'wb') as f: + f.write(file_content.read()) + + logger.info(f"File saved to: {file_path}") + def app(): # Customers for WC 18/11/2024 diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py index 0f757f7b..8538188b 100644 --- a/etl/customers/stonewater/Wave 3 Preparation.py +++ b/etl/customers/stonewater/Wave 3 Preparation.py @@ -2905,5 +2905,38 @@ def identify_incorrect_packages(): os.path.join(CUSTOMER_FOLDER_PATH, "Units with assigned packages - with flags.csv"), index=False ) + +def revised_model(): + """ + This function implements the revised model for Stonewater, where we are looking at new priority postcodes + This work was undertaken in January 2021. + """ + + # 1) Create the new list of properties + + new_priority_postcodes = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Jan 2025 Project/Updated 2025 to 2030 " + "priority list.xlsx" + ) + + original_archetypes = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24 " + "- Archetyped V3.1.xlsx", + header=4 + ) + original_archetypes = original_archetypes[~pd.isnull(original_archetypes["Address ID"])] + original_archetypes = original_archetypes[original_archetypes["Address ID"] != "Address ID"] + original_archetypes["Address ID"] = original_archetypes["Address ID"].astype(int) + + original_archetypes = original_archetypes[ + ["Address ID", "Archetype ID", ""] + ] + + # Check if we have all of the addresses + missed = original_archetypes[ + ~original_archetypes["Address ID"].isin(new_priority_postcodes["Address ID"].values) + ]["Archetype ID"].unique() + assert + # if __name__ == "__main__": # main() diff --git a/etl/customers/stonewater/data_cleaning.py b/etl/customers/stonewater/data_cleaning.py new file mode 100644 index 00000000..8751960c --- /dev/null +++ b/etl/customers/stonewater/data_cleaning.py @@ -0,0 +1,137 @@ +import os +import shutil +from tqdm import tqdm + + +def delete_large_files(): + """ + This function deletes photos, designs and other files which we don't need + :return: + """ + + folder_path = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Wave 2.1 Surveys" + + # List the contents of this folder since in each sub-folder we have the property folders + contents = os.listdir(folder_path) + + for subfolder in contents: + if not os.path.isdir(os.path.join(folder_path, subfolder)): + continue + subfolder_path = os.path.join(folder_path, subfolder) + # List the contents + property_folders = os.listdir(subfolder_path) + + for property in tqdm(property_folders): + # Check if it's a directory + if not os.path.isdir(os.path.join(subfolder_path, property)): + continue + + property_path = os.path.join(subfolder_path, property) + property_contents = os.listdir(property_path) + # We delete the contents of the following folders: + # '1. RA Property Pics' + # '4. Air Tightness Tests' + # '5. RD Design Info' + for folder_to_delete in ["1. RA Property Pics", "4. Air Tightness Tests", "5. RD Design Info", + "1. RA Property PIcs", "Post EPC Photos", "4. RD Design Info", + "5. Installer Info", "6. Trustmark lodgement", "7.Post Install Inspection Photos", + "6. Trustmark Lodgement", "7. Post Inspection Photos"]: + if folder_to_delete not in property_contents: + continue + folder_to_delete_path = os.path.join(property_path, folder_to_delete) + if os.path.isdir(folder_to_delete_path): + # Delete the folder, even if it's not empty + shutil.rmtree(folder_to_delete_path) + + # We now check the '2. RA Coordinator Info' folder for any .MOV files and delete them + if "2. RA Coordinator Info" not in property_contents: + coordinator_folder = "1. RA Coordinator Info" + else: + coordinator_folder = "2. RA Coordinator Info" + coordinator_info_path = os.path.join(property_path, coordinator_folder) + coordinator_info_contents = os.listdir(coordinator_info_path) + # Look for .MOV files and .jpg files + for file in coordinator_info_contents: + if file.endswith(".MOV"): + os.remove(os.path.join(coordinator_info_path, file)) + + if file.endswith(".jpg"): + os.remove(os.path.join(coordinator_info_path, file)) + + if "Property Pics" in coordinator_info_contents: + # Delete folder and contents + shutil.rmtree(os.path.join(coordinator_info_path, "Property Pics")) + + +def download_data_from_sharepoint(): + # Given a sharepoint location, this function will download the retrofit assessment folders from the locations + # specified in the sharepoint location + from etl.access_reporting.app import SharePointClient + + sharepoint_client = SharePointClient( + tenant_id="10d5af8b-2cfd-4882-9ccd-b96e4812dacf", + client_id="6832a4c5-fb8c-4082-a746-4f51e1020f0d", + client_secret="xpC8Q~Frww48SM1V-D8lGy5iOY7P_cJ7FF3jgarQ", + site_id="bc925a9a-ad0b-4de9-9a3c-e61014cc7489" + ) + + # Retrieve the data from Sharepoint and write to local machine + contents = sharepoint_client.list_folder_contents( + drive_id=sharepoint_client.document_drive["id"], + folder_path="Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders" + ) + + len(contents["value"]) + folders_to_pull = [ + folder for folder in contents["value"] if folder["name"] in ["3. Wiltshire", "4. Bournemouth", "5. Coventry"] + ] + for folder_to_pull in folders_to_pull: + # Get the contents + folder_contents = sharepoint_client.list_folder_contents( + drive_id=sharepoint_client.document_drive["id"], + folder_path="Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders" + "/" + + folder_to_pull["name"], + page_size=100 + ) + + property_folders = [f for f in folder_contents["value"]] + + for property_folder in property_folders: + # We go into each property folder and get the contents + property_folder_contents = sharepoint_client.list_folder_contents( + drive_id=sharepoint_client.document_drive["id"], + folder_path="Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders" + "/" + + folder_to_pull["name"] + "/" + property_folder["name"] + ) + # We look for the retrofit assessment folder: + property_sub_folders = [ + f for f in property_folder_contents["value"] if "ra coordinator info" in f["name"].lower() + ] + + if not property_sub_folders: + continue + + # if we have this, we download the folder and store it on my laptop! + property_sub_folder = property_sub_folders[0] + + property_folder_path = os.path.join( + "Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders", + folder_to_pull["name"], + property_folder["name"], + property_sub_folder["name"] + ) + + download_dir = os.path.join( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Wave 2.1 Surveys", + folder_to_pull["name"], + property_folder["name"], + property_sub_folder["name"] + ) + + # We download the folder + sharepoint_client.download_sharepoint_folder( + drive_id=sharepoint_client.document_drive["id"], + folder_path=property_folder_path, + download_dir=download_dir, + excluded_file_types=["MOV"] + ) diff --git a/etl/customers/stonewater/potential_eco_properties.py b/etl/customers/stonewater/potential_eco_properties.py index c0301e9a..bda9c30c 100644 --- a/etl/customers/stonewater/potential_eco_properties.py +++ b/etl/customers/stonewater/potential_eco_properties.py @@ -7,6 +7,8 @@ from tqdm import tqdm from dotenv import load_dotenv from backend.SearchEpc import SearchEpc from utils.s3 import read_from_s3, read_pickle_from_s3 +import msoffcrypto +from io import BytesIO load_dotenv(dotenv_path="backend/.env") EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") @@ -64,6 +66,28 @@ def app(): This code creates a list of cavity properties, for review """ + # Read in the password protected master + # TODO: This file should be deleted! + + # Path to the password-protected Excel file + file_path = ("/Users/khalimconn-kowlessar/Downloads/STONEWATER MASTER SHEET - UPDATED 20.5.24 - K- PASSWORD " + "PROTECTED.xlsx") + password = "STONE123" # Replace with the actual password + + # Open the file and decrypt it + with open(file_path, "rb") as f: + decrypted_file = BytesIO() + office_file = msoffcrypto.OfficeFile(f) + office_file.load_key(password=password) + office_file.decrypt(decrypted_file) + + # Read the decrypted file into a DataFrame + eco_rolling_master = pd.read_excel(decrypted_file, sheet_name="Sheet1", engine="openpyxl") + + eco_rolling_master = eco_rolling_master[ + ~eco_rolling_master['INSTALL/CANCELLATION DATE'].str.contains("CANCELLED") + ] + archetyped_properties = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24 - " "Archetyped V3.1.xlsx", @@ -116,13 +140,16 @@ def app(): features_to_merge = features[ [ - "Address ID", "Age", "Property Type", "Walls", "Roofs", "Glazing", "Heating", "Main Fuel", "Hot Water", + "Address ID", "Organisation Reference", "Age", "Property Type", "Walls", "Roofs", "Glazing", "Heating", + "Main Fuel", + "Hot Water", "Renewables", "Total Floor Area" ] ] stonewater_cavity_properties = archetyped_properties[ - ["Name", "Postcode", "Osm. ID", "Address ID", "UPRN", "UDPRN", "Archetype ID", "House no", "Street name", + ["Name", "Postcode", "Osm. ID", "Org. ref.", "Address ID", "UPRN", "UDPRN", "Archetype ID", "House no", + "Street name", "Address line 2", "City/Town", "Is Cavity Property", "Survey shows CWI needed for Archetype"] ].merge( features_to_merge, how="left", on="Address ID" @@ -166,77 +193,137 @@ def app(): stonewater_cavity_properties["Reason Included"] ) + # We flag units that were installed under ECO3 + numeric_ids = eco_rolling_master[eco_rolling_master["STONEWATER UPRN"] != "NOT ON ASSET LIST"] + numeric_ids = numeric_ids[~pd.isnull(numeric_ids["STONEWATER UPRN"])] + numeric_ids["STONEWATER UPRN"] = numeric_ids["STONEWATER UPRN"].astype(int) + + stonewater_cavity_properties["Installed under ECO3"] = stonewater_cavity_properties["Org. ref."].isin( + numeric_ids['STONEWATER UPRN'].values + ) + + # Which postcodes were installed under ECO3 + priority_list_eco3 = stonewater_cavity_properties[ + stonewater_cavity_properties["Installed under ECO3"] + ]["Postcode"].unique() + + # These are properties that were not installed under ECO3, that have the same postcodes as properties + # installed under ECO3 + + # These are 66 properties we might want to start with as an immediate priority + stonewater_cavity_properties["Same Postcode as Installed under ECO3"] = ( + ~stonewater_cavity_properties["Installed under ECO3"] & ( + stonewater_cavity_properties["Postcode"].isin(priority_list_eco3) + ) + ) + # We get the EPC data - epc_data = json.loads( - read_from_s3( - bucket_name="retrofit-data-dev", - s3_file_name="customers/Stonewater/clustering/epc_data.json" - ) - ) - epc_data = pd.DataFrame(epc_data) - - epc_data["uprn"] = np.where( - epc_data["internal_id"] == 1091, - 83143766, - epc_data["uprn"] - ) - - epc_data_batch_2 = read_pickle_from_s3( - s3_file_name="customers/Stonewater/clustering/epc_data_batch_2.pkl", - bucket_name="retrofit-data-dev" - ) - epc_data_batch_2 = pd.DataFrame(epc_data_batch_2) - - complete_epcs = pd.concat([epc_data, epc_data_batch_2]) - - epcs_to_merge = complete_epcs[ - [ - "uprn", - "address", - "postcode", - "property-type", - "built-form", - "inspection-date", - "current-energy-rating", - "current-energy-efficiency", - "roof-description", - "walls-description", - "transaction-type", - "secondheat-description", - "total-floor-area", - "construction-age-band", - "floor-height", - "number-habitable-rooms", - "mainheat-description", - "energy-consumption-current" - ] - ].rename( - columns={ - "address": "Address", - "postcode": "Postcode", - "inspection-date": "Date of last EPC", - "current-energy-efficiency": "SAP score on register", - "current-energy-rating": "EPC rating on register", - "property-type": "Property Type", - "built-form": "Archetype", - "total-floor-area": "Property Floor Area", - "construction-age-band": "Property Age Band", - "floor-height": "Property Floor Height", - "number-habitable-rooms": "Number of Habitable Rooms", - "walls-description": "Wall Construction", - "roof-description": "Roof Construction", - "mainheat-description": "Heating Type", - "secondheat-description": "Secondary Heating", - "transaction-type": "Reason for last EPC", - "energy-consumption-current": "Heat Demand (kWh/m2)", - } - ) - # We de-dupe, taking the newest on the date the EPC was lod - epcs_to_merge["Date of last EPC"] = pd.to_datetime(epcs_to_merge["Date of last EPC"]) - epcs_to_merge = epcs_to_merge.sort_values("Date of last EPC", ascending=False) - epcs_to_merge = epcs_to_merge.drop_duplicates(subset="uprn") + # epc_data = json.loads( + # read_from_s3( + # bucket_name="retrofit-data-dev", + # s3_file_name="customers/Stonewater/clustering/epc_data.json" + # ) + # ) + # epc_data = pd.DataFrame(epc_data) + # + # epc_data["uprn"] = np.where( + # epc_data["internal_id"] == 1091, + # 83143766, + # epc_data["uprn"] + # ) + # + # epc_data_batch_2 = read_pickle_from_s3( + # s3_file_name="customers/Stonewater/clustering/epc_data_batch_2.pkl", + # bucket_name="retrofit-data-dev" + # ) + # epc_data_batch_2 = pd.DataFrame(epc_data_batch_2) + # + # complete_epcs = pd.concat([epc_data, epc_data_batch_2]) + # + # epcs_to_merge = complete_epcs[ + # [ + # "uprn", + # "address", + # "postcode", + # "property-type", + # "built-form", + # "inspection-date", + # "current-energy-rating", + # "current-energy-efficiency", + # "roof-description", + # "walls-description", + # "transaction-type", + # "secondheat-description", + # "total-floor-area", + # "construction-age-band", + # "floor-height", + # "number-habitable-rooms", + # "mainheat-description", + # "energy-consumption-current" + # ] + # ].rename( + # columns={ + # "address": "Address", + # "postcode": "Postcode", + # "inspection-date": "Date of last EPC", + # "current-energy-efficiency": "SAP score on register", + # "current-energy-rating": "EPC rating on register", + # "property-type": "Property Type", + # "built-form": "Archetype", + # "total-floor-area": "Property Floor Area", + # "construction-age-band": "Property Age Band", + # "floor-height": "Property Floor Height", + # "number-habitable-rooms": "Number of Habitable Rooms", + # "walls-description": "Wall Construction", + # "roof-description": "Roof Construction", + # "mainheat-description": "Heating Type", + # "secondheat-description": "Secondary Heating", + # "transaction-type": "Reason for last EPC", + # "energy-consumption-current": "Heat Demand (kWh/m2)", + # } + # ) + # # We de-dupe, taking the newest on the date the EPC was lod + # epcs_to_merge["Date of last EPC"] = pd.to_datetime(epcs_to_merge["Date of last EPC"]) + # epcs_to_merge = epcs_to_merge.sort_values("Date of last EPC", ascending=False) + # epcs_to_merge = epcs_to_merge.drop_duplicates(subset="uprn") stonewater_cavity_properties["UPRN"] = stonewater_cavity_properties["UPRN"].astype("Int64").astype(str) + stonewater_cavity_properties["Reason Included"].value_counts() + # Find the postcodes where an Osmosis survey revealed a need for CWI + postcodes_found_needing_cwi = stonewater_cavity_properties[ + stonewater_cavity_properties["Reason Included"].isin( + [ + "Survey revealed potential need for CWI or extract and re-fill", + "Surveyed revealed potential need for CWI or extract and re-fill and is an as built cavity property", + "Survey showed this property needs CWI", + "Survey showed this property could need extract and re-fill" + ] + ) + ]["Postcode"].unique() + + stonewater_cavity_properties["Suspected Needs CWI - not surveyed"] = ( + ( + stonewater_cavity_properties[ + "Postcode"].isin( + postcodes_found_needing_cwi) + ) & ( + ~stonewater_cavity_properties[ + "Reason Included"].isin( + [ + "Survey revealed potential need " + "for CWI or extract and re-fill", + "Surveyed revealed potential " + "need for CWI or extract and " + "re-fill and is an as built " + "cavity property", + "Survey showed this property " + "needs CWI", + "Survey showed this property " + "could need extract and re-fill" + ] + ) + ) + ) # Merge the EPCs on, with the data we need stonewater_cavity_properties = stonewater_cavity_properties.rename( @@ -252,12 +339,12 @@ def app(): "Renewables": "Parity - Renewables", "Total Floor Area": "Parity - Total Floor Area" } - ).merge( - epcs_to_merge, - how="left", - left_on="UPRN", - right_on="uprn" - ) + ) # .merge( + # epcs_to_merge, + # how="left", + # left_on="UPRN", + # right_on="uprn" + # ) # We now flag the additional properties in the as built list @@ -288,8 +375,56 @@ def app(): additional_properties = additional_properties.merge(house_numbers, how="left", on="Address ID") additional_properties["row_id"] = additional_properties["Address ID"].copy() + # Flag any units in this list that were installed under ECO3 + additional_properties["Installed under ECO3"] = additional_properties["Organisation Reference"].isin( + numeric_ids['STONEWATER UPRN'].values + ) + + # Additional list ECO3 + additional_list_eco3 = additional_properties[additional_properties["Installed under ECO3"]]["Postcode"].unique() + + # These are properties that were not installed under ECO3, that have the same postcodes as properties + # installed under ECO3 + # These are 297 properties we might want to start with as an immediate priority + additional_properties["Same Postcode as Installed under ECO3"] = ( + ~additional_properties["Installed under ECO3"] & ( + additional_properties["Postcode"].isin(additional_list_eco3) + ) + ) + + # We do some additional manual checks, for ECO3 properties that were installed that didn't get matched to either + # dataaset + numeric_ids["In asset list"] = numeric_ids["STONEWATER UPRN"].isin( + stonewater_cavity_properties['Org. ref.'].astype(int).values + ) + numeric_ids["In asset list"] = numeric_ids["In asset list"] | ( + numeric_ids["STONEWATER UPRN"].isin( + additional_properties['Organisation Reference'].astype(int).values + ) + ) + + # eco3_installs_not_in_asset_list = numeric_ids[~numeric_ids["In asset list"]] + # # We now take samples of properties randomly and manually check the ID against the asset list + # print(eco3_installs_not_in_asset_list.sample(1)[["STONEWATER UPRN", "Post Code", "NO ", "Street / Block Name", ]]) + # # Checked STONEWATER UPRN + # # 9862, BH15 1NR, 33, THE QUAY FOYER [x] + # # 12785, S01 66PN, 57, SEACOLE GARDENS [x] + # # 26071, MK42 0TE, 51, De Havilland Avenue, Shortstown [x] + # # 18213, HR6 9UW, 20 Ford Street [x] + # # 24344, LU4 9FF, 6 SEAL CLOSE [x] + # # 31222, SN14 0QZ, 7 HARDBROOK COURT [x] + # # 9343, SP4 7XL, 10 OAK PLACE [x] + # # 34730, LU5 5TN, 4 TUDOR DRIVE [x] + # # 7021, BN27 2BZ, 32 BUTTS FIELD [] + # + # stonewater_cavity_properties[stonewater_cavity_properties['Org. ref.'] == 7021] + # stonewater_cavity_properties[stonewater_cavity_properties['Postcode'] == "BN27 2BZ"]["Name"] + # + # additional_properties[additional_properties['Organisation Reference'] == 7021] + # additional_properties[additional_properties['Postcode'] == "BN27 2BZ"][["Address"]] + # Pull the EPCs for these properties - additional_properties_epcs, errors = get_data(additional_properties) + # additional_properties_epcs, errors = get_data(additional_properties) # Save this data as a pickle # import pickle @@ -297,12 +432,20 @@ def app(): # "wb") as f: # pickle.dump(additional_properties_epcs, f) + additional_properties["Suspected Needs CWI - not surveyed"] = ( + ( + additional_properties["Postcode"].isin(postcodes_found_needing_cwi) + ) + ) + + additional_properties["Same Postcode as Installed under ECO3"].value_counts() + # We drop Full Address additional_properties = additional_properties.drop(columns=["Full Address"]) additional_properties2 = additional_properties[[ - "row_id", "Address", "Postcode", "Address ID", "SAP", "SAP Band", "Property Type", "Walls", "Roofs", "Glazing", - "Heating", "Main Fuel", "Hot Water", "Renewables", "Total Floor Area", - + "Address", "Postcode", "Address ID", "SAP", "SAP Band", "Property Type", "Walls", "Roofs", "Glazing", + "Heating", "Main Fuel", "Hot Water", "Renewables", "Total Floor Area", 'Installed under ECO3', + 'Same Postcode as Installed under ECO3' ]].rename( columns={ "SAP": "Parity - Predicted SAP", @@ -318,56 +461,58 @@ def app(): "Renewables": "Parity - Renewables", "Total Floor Area": "Parity - Total Floor Area" } - ).merge( - pd.DataFrame(additional_properties_epcs)[ - [ - "row_id", - "property-type", - "built-form", - "inspection-date", - "current-energy-rating", - "current-energy-efficiency", - "roof-description", - "walls-description", - "transaction-type", - "secondheat-description", - "total-floor-area", - "construction-age-band", - "floor-height", - "number-habitable-rooms", - "mainheat-description", - "energy-consumption-current" - ] - ].rename( - columns={ - "inspection-date": "Date of last EPC", - "current-energy-efficiency": "SAP score on register", - "current-energy-rating": "EPC rating on register", - "property-type": "Property Type", - "built-form": "Archetype", - "total-floor-area": "Property Floor Area", - "construction-age-band": "Property Age Band", - "floor-height": "Property Floor Height", - "number-habitable-rooms": "Number of Habitable Rooms", - "walls-description": "Wall Construction", - "roof-description": "Roof Construction", - "mainheat-description": "Heating Type", - "secondheat-description": "Secondary Heating", - "transaction-type": "Reason for last EPC", - "energy-consumption-current": "Heat Demand (kWh/m2)", - } - ), - how="left", - on="row_id" - ) + ) # .merge( + # pd.DataFrame(additional_properties_epcs)[ + # [ + # "row_id", + # "property-type", + # "built-form", + # "inspection-date", + # "current-energy-rating", + # "current-energy-efficiency", + # "roof-description", + # "walls-description", + # "transaction-type", + # "secondheat-description", + # "total-floor-area", + # "construction-age-band", + # "floor-height", + # "number-habitable-rooms", + # "mainheat-description", + # "energy-consumption-current" + # ] + # ].rename( + # columns={ + # "inspection-date": "Date of last EPC", + # "current-energy-efficiency": "SAP score on register", + # "current-energy-rating": "EPC rating on register", + # "property-type": "Property Type", + # "built-form": "Archetype", + # "total-floor-area": "Property Floor Area", + # "construction-age-band": "Property Age Band", + # "floor-height": "Property Floor Height", + # "number-habitable-rooms": "Number of Habitable Rooms", + # "walls-description": "Wall Construction", + # "roof-description": "Roof Construction", + # "mainheat-description": "Heating Type", + # "secondheat-description": "Secondary Heating", + # "transaction-type": "Reason for last EPC", + # "energy-consumption-current": "Heat Demand (kWh/m2)", + # } + # ), + # how="left", + # on="row_id" + # ) # We save the data locally stonewater_cavity_properties.to_csv( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater Cavity Properties.csv", + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater Cavity Properties - priority " + "postcodes.csv", index=False ) additional_properties2.to_csv( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater Additional Cavity Properties.csv", + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater Additional Cavity Properties - " + "non-priority postcodes.csv", index=False ) # Save the survey findings diff --git a/etl/customers/waltham_forest/whlg eligibile properties.py b/etl/customers/waltham_forest/whlg eligibile properties.py index fee988c1..9e1949f7 100644 --- a/etl/customers/waltham_forest/whlg eligibile properties.py +++ b/etl/customers/waltham_forest/whlg eligibile properties.py @@ -44,6 +44,10 @@ epc_data["has_conservation_restrictions"] = ( | (epc_data["is_heritage_building"] == True) ) +whlg_eligible_postcodes["Local Authority"].value_counts() + +whlg_eligible_postcodes = whlg_eligible_postcodes[whlg_eligible_postcodes["Local Authority"] == "Waltham Forest"] + # Pathway 1: # Match based on eligible postcodes pathway1 = epc_data[epc_data["postcode"].isin(whlg_eligible_postcodes["Postcode"].values)] @@ -67,6 +71,10 @@ pathway1["EPC Date"] = pd.to_datetime(pathway1["EPC Date"]).dt.strftime("%Y-%m-% # Create a year EPC was lodged pathway1["EPC Year"] = pd.to_datetime(pathway1["EPC Date"]).dt.year +low_epc = pathway1[pathway1["EPC Rating"].isin(["F", "G"])] +low_epc["EPC Rating"].value_counts() +low_epc.tail(1)[["address", "postcode"]] + pathway1.to_csv( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Waltham Forest WHLG - Pathway 1 Eligibility.csv", index=False