pulled data needed for stonewater

This commit is contained in:
Khalim Conn-Kowlessar 2025-01-28 15:10:23 +00:00
parent 8922fc7b8f
commit 36bb4b0f27
7 changed files with 495 additions and 126 deletions

2
.idea/Model.iml generated
View file

@ -7,7 +7,7 @@
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
</content>
<orderEntry type="jdk" jdkName="Fastapi-backend" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="Stonewater-wave-3" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyNamespacePackagesService">

2
.idea/misc.xml generated
View file

@ -3,7 +3,7 @@
<component name="Black">
<option name="sdkName" value="Python 3.10 (backend)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Fastapi-backend" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Stonewater-wave-3" project-jdk-type="Python SDK" />
<component name="PyCharmProfessionalAdvertiser">
<option name="shown" value="true" />
</component>

View file

@ -83,8 +83,11 @@ def api_call_decorator(func):
results = []
page_size = kwargs.get('page_size', None)
response_data = {}
n_calls = 0
while url:
logger.info("Making call for page: " + str(n_calls + 1))
n_calls += 1
response = requests.request(http_method, url, headers=self.headers, json=data)
# Handle the response
@ -93,6 +96,7 @@ def api_call_decorator(func):
if page_size:
results.extend(response_json.get('value', []))
url = response_json.get('@odata.nextLink', None)
logger.info(f"Next page URL: {url}")
else:
response_data = response_json # Capture the full response for consistency
break
@ -270,6 +274,48 @@ class SharePointClient:
return file_content
def download_sharepoint_folder(self, drive_id, folder_path, download_dir, excluded_file_types=None):
"""
Downloads all files in a SharePoint folder to the specified local directory.
:param drive_id: The ID of the SharePoint drive.
:param folder_path: The path of the folder in SharePoint.
:param download_dir: The local directory to save the downloaded files.
:param excluded_file_types: A list of file types to exclude from download (default is None).
"""
excluded_file_types = [] if excluded_file_types is None else excluded_file_types
# Ensure the download directory exists
os.makedirs(download_dir, exist_ok=True)
# List folder contents
folder_contents = self.list_folder_contents(drive_id, folder_path)
files = folder_contents.get('value', [])
for item in files:
if item.get('folder'): # Check if it's a folder
# Recursively handle subfolders
subfolder_path = f"{folder_path}/{item['name']}"
subfolder_dir = os.path.join(download_dir, item['name'])
self.download_sharepoint_folder(drive_id, subfolder_path, subfolder_dir)
else:
# It's a file, download it
file_name = item['name']
if file_name.split(".")[-1] in excluded_file_types:
continue
download_url = item['@microsoft.graph.downloadUrl']
logger.info(f"Downloading file: {file_name}")
file_content = self.download_sharepoint_file(download_url)
# Save the file locally
file_path = os.path.join(download_dir, file_name)
with open(file_path, 'wb') as f:
f.write(file_content.read())
logger.info(f"File saved to: {file_path}")
def app():
# Customers for WC 18/11/2024

View file

@ -2905,5 +2905,38 @@ def identify_incorrect_packages():
os.path.join(CUSTOMER_FOLDER_PATH, "Units with assigned packages - with flags.csv"), index=False
)
def revised_model():
"""
This function implements the revised model for Stonewater, where we are looking at new priority postcodes
This work was undertaken in January 2021.
"""
# 1) Create the new list of properties
new_priority_postcodes = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Jan 2025 Project/Updated 2025 to 2030 "
"priority list.xlsx"
)
original_archetypes = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24 "
"- Archetyped V3.1.xlsx",
header=4
)
original_archetypes = original_archetypes[~pd.isnull(original_archetypes["Address ID"])]
original_archetypes = original_archetypes[original_archetypes["Address ID"] != "Address ID"]
original_archetypes["Address ID"] = original_archetypes["Address ID"].astype(int)
original_archetypes = original_archetypes[
["Address ID", "Archetype ID", ""]
]
# Check if we have all of the addresses
missed = original_archetypes[
~original_archetypes["Address ID"].isin(new_priority_postcodes["Address ID"].values)
]["Archetype ID"].unique()
assert
# if __name__ == "__main__":
# main()

View file

@ -0,0 +1,137 @@
import os
import shutil
from tqdm import tqdm
def delete_large_files():
"""
This function deletes photos, designs and other files which we don't need
:return:
"""
folder_path = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Wave 2.1 Surveys"
# List the contents of this folder since in each sub-folder we have the property folders
contents = os.listdir(folder_path)
for subfolder in contents:
if not os.path.isdir(os.path.join(folder_path, subfolder)):
continue
subfolder_path = os.path.join(folder_path, subfolder)
# List the contents
property_folders = os.listdir(subfolder_path)
for property in tqdm(property_folders):
# Check if it's a directory
if not os.path.isdir(os.path.join(subfolder_path, property)):
continue
property_path = os.path.join(subfolder_path, property)
property_contents = os.listdir(property_path)
# We delete the contents of the following folders:
# '1. RA Property Pics'
# '4. Air Tightness Tests'
# '5. RD Design Info'
for folder_to_delete in ["1. RA Property Pics", "4. Air Tightness Tests", "5. RD Design Info",
"1. RA Property PIcs", "Post EPC Photos", "4. RD Design Info",
"5. Installer Info", "6. Trustmark lodgement", "7.Post Install Inspection Photos",
"6. Trustmark Lodgement", "7. Post Inspection Photos"]:
if folder_to_delete not in property_contents:
continue
folder_to_delete_path = os.path.join(property_path, folder_to_delete)
if os.path.isdir(folder_to_delete_path):
# Delete the folder, even if it's not empty
shutil.rmtree(folder_to_delete_path)
# We now check the '2. RA Coordinator Info' folder for any .MOV files and delete them
if "2. RA Coordinator Info" not in property_contents:
coordinator_folder = "1. RA Coordinator Info"
else:
coordinator_folder = "2. RA Coordinator Info"
coordinator_info_path = os.path.join(property_path, coordinator_folder)
coordinator_info_contents = os.listdir(coordinator_info_path)
# Look for .MOV files and .jpg files
for file in coordinator_info_contents:
if file.endswith(".MOV"):
os.remove(os.path.join(coordinator_info_path, file))
if file.endswith(".jpg"):
os.remove(os.path.join(coordinator_info_path, file))
if "Property Pics" in coordinator_info_contents:
# Delete folder and contents
shutil.rmtree(os.path.join(coordinator_info_path, "Property Pics"))
def download_data_from_sharepoint():
# Given a sharepoint location, this function will download the retrofit assessment folders from the locations
# specified in the sharepoint location
from etl.access_reporting.app import SharePointClient
sharepoint_client = SharePointClient(
tenant_id="10d5af8b-2cfd-4882-9ccd-b96e4812dacf",
client_id="6832a4c5-fb8c-4082-a746-4f51e1020f0d",
client_secret="xpC8Q~Frww48SM1V-D8lGy5iOY7P_cJ7FF3jgarQ",
site_id="bc925a9a-ad0b-4de9-9a3c-e61014cc7489"
)
# Retrieve the data from Sharepoint and write to local machine
contents = sharepoint_client.list_folder_contents(
drive_id=sharepoint_client.document_drive["id"],
folder_path="Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders"
)
len(contents["value"])
folders_to_pull = [
folder for folder in contents["value"] if folder["name"] in ["3. Wiltshire", "4. Bournemouth", "5. Coventry"]
]
for folder_to_pull in folders_to_pull:
# Get the contents
folder_contents = sharepoint_client.list_folder_contents(
drive_id=sharepoint_client.document_drive["id"],
folder_path="Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders" + "/" +
folder_to_pull["name"],
page_size=100
)
property_folders = [f for f in folder_contents["value"]]
for property_folder in property_folders:
# We go into each property folder and get the contents
property_folder_contents = sharepoint_client.list_folder_contents(
drive_id=sharepoint_client.document_drive["id"],
folder_path="Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders" + "/" +
folder_to_pull["name"] + "/" + property_folder["name"]
)
# We look for the retrofit assessment folder:
property_sub_folders = [
f for f in property_folder_contents["value"] if "ra coordinator info" in f["name"].lower()
]
if not property_sub_folders:
continue
# if we have this, we download the folder and store it on my laptop!
property_sub_folder = property_sub_folders[0]
property_folder_path = os.path.join(
"Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders",
folder_to_pull["name"],
property_folder["name"],
property_sub_folder["name"]
)
download_dir = os.path.join(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Wave 2.1 Surveys",
folder_to_pull["name"],
property_folder["name"],
property_sub_folder["name"]
)
# We download the folder
sharepoint_client.download_sharepoint_folder(
drive_id=sharepoint_client.document_drive["id"],
folder_path=property_folder_path,
download_dir=download_dir,
excluded_file_types=["MOV"]
)

View file

@ -7,6 +7,8 @@ from tqdm import tqdm
from dotenv import load_dotenv
from backend.SearchEpc import SearchEpc
from utils.s3 import read_from_s3, read_pickle_from_s3
import msoffcrypto
from io import BytesIO
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
@ -64,6 +66,28 @@ def app():
This code creates a list of cavity properties, for review
"""
# Read in the password protected master
# TODO: This file should be deleted!
# Path to the password-protected Excel file
file_path = ("/Users/khalimconn-kowlessar/Downloads/STONEWATER MASTER SHEET - UPDATED 20.5.24 - K- PASSWORD "
"PROTECTED.xlsx")
password = "STONE123" # Replace with the actual password
# Open the file and decrypt it
with open(file_path, "rb") as f:
decrypted_file = BytesIO()
office_file = msoffcrypto.OfficeFile(f)
office_file.load_key(password=password)
office_file.decrypt(decrypted_file)
# Read the decrypted file into a DataFrame
eco_rolling_master = pd.read_excel(decrypted_file, sheet_name="Sheet1", engine="openpyxl")
eco_rolling_master = eco_rolling_master[
~eco_rolling_master['INSTALL/CANCELLATION DATE'].str.contains("CANCELLED")
]
archetyped_properties = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24 - "
"Archetyped V3.1.xlsx",
@ -116,13 +140,16 @@ def app():
features_to_merge = features[
[
"Address ID", "Age", "Property Type", "Walls", "Roofs", "Glazing", "Heating", "Main Fuel", "Hot Water",
"Address ID", "Organisation Reference", "Age", "Property Type", "Walls", "Roofs", "Glazing", "Heating",
"Main Fuel",
"Hot Water",
"Renewables", "Total Floor Area"
]
]
stonewater_cavity_properties = archetyped_properties[
["Name", "Postcode", "Osm. ID", "Address ID", "UPRN", "UDPRN", "Archetype ID", "House no", "Street name",
["Name", "Postcode", "Osm. ID", "Org. ref.", "Address ID", "UPRN", "UDPRN", "Archetype ID", "House no",
"Street name",
"Address line 2", "City/Town", "Is Cavity Property", "Survey shows CWI needed for Archetype"]
].merge(
features_to_merge, how="left", on="Address ID"
@ -166,77 +193,137 @@ def app():
stonewater_cavity_properties["Reason Included"]
)
# We flag units that were installed under ECO3
numeric_ids = eco_rolling_master[eco_rolling_master["STONEWATER UPRN"] != "NOT ON ASSET LIST"]
numeric_ids = numeric_ids[~pd.isnull(numeric_ids["STONEWATER UPRN"])]
numeric_ids["STONEWATER UPRN"] = numeric_ids["STONEWATER UPRN"].astype(int)
stonewater_cavity_properties["Installed under ECO3"] = stonewater_cavity_properties["Org. ref."].isin(
numeric_ids['STONEWATER UPRN'].values
)
# Which postcodes were installed under ECO3
priority_list_eco3 = stonewater_cavity_properties[
stonewater_cavity_properties["Installed under ECO3"]
]["Postcode"].unique()
# These are properties that were not installed under ECO3, that have the same postcodes as properties
# installed under ECO3
# These are 66 properties we might want to start with as an immediate priority
stonewater_cavity_properties["Same Postcode as Installed under ECO3"] = (
~stonewater_cavity_properties["Installed under ECO3"] & (
stonewater_cavity_properties["Postcode"].isin(priority_list_eco3)
)
)
# We get the EPC data
epc_data = json.loads(
read_from_s3(
bucket_name="retrofit-data-dev",
s3_file_name="customers/Stonewater/clustering/epc_data.json"
)
)
epc_data = pd.DataFrame(epc_data)
epc_data["uprn"] = np.where(
epc_data["internal_id"] == 1091,
83143766,
epc_data["uprn"]
)
epc_data_batch_2 = read_pickle_from_s3(
s3_file_name="customers/Stonewater/clustering/epc_data_batch_2.pkl",
bucket_name="retrofit-data-dev"
)
epc_data_batch_2 = pd.DataFrame(epc_data_batch_2)
complete_epcs = pd.concat([epc_data, epc_data_batch_2])
epcs_to_merge = complete_epcs[
[
"uprn",
"address",
"postcode",
"property-type",
"built-form",
"inspection-date",
"current-energy-rating",
"current-energy-efficiency",
"roof-description",
"walls-description",
"transaction-type",
"secondheat-description",
"total-floor-area",
"construction-age-band",
"floor-height",
"number-habitable-rooms",
"mainheat-description",
"energy-consumption-current"
]
].rename(
columns={
"address": "Address",
"postcode": "Postcode",
"inspection-date": "Date of last EPC",
"current-energy-efficiency": "SAP score on register",
"current-energy-rating": "EPC rating on register",
"property-type": "Property Type",
"built-form": "Archetype",
"total-floor-area": "Property Floor Area",
"construction-age-band": "Property Age Band",
"floor-height": "Property Floor Height",
"number-habitable-rooms": "Number of Habitable Rooms",
"walls-description": "Wall Construction",
"roof-description": "Roof Construction",
"mainheat-description": "Heating Type",
"secondheat-description": "Secondary Heating",
"transaction-type": "Reason for last EPC",
"energy-consumption-current": "Heat Demand (kWh/m2)",
}
)
# We de-dupe, taking the newest on the date the EPC was lod
epcs_to_merge["Date of last EPC"] = pd.to_datetime(epcs_to_merge["Date of last EPC"])
epcs_to_merge = epcs_to_merge.sort_values("Date of last EPC", ascending=False)
epcs_to_merge = epcs_to_merge.drop_duplicates(subset="uprn")
# epc_data = json.loads(
# read_from_s3(
# bucket_name="retrofit-data-dev",
# s3_file_name="customers/Stonewater/clustering/epc_data.json"
# )
# )
# epc_data = pd.DataFrame(epc_data)
#
# epc_data["uprn"] = np.where(
# epc_data["internal_id"] == 1091,
# 83143766,
# epc_data["uprn"]
# )
#
# epc_data_batch_2 = read_pickle_from_s3(
# s3_file_name="customers/Stonewater/clustering/epc_data_batch_2.pkl",
# bucket_name="retrofit-data-dev"
# )
# epc_data_batch_2 = pd.DataFrame(epc_data_batch_2)
#
# complete_epcs = pd.concat([epc_data, epc_data_batch_2])
#
# epcs_to_merge = complete_epcs[
# [
# "uprn",
# "address",
# "postcode",
# "property-type",
# "built-form",
# "inspection-date",
# "current-energy-rating",
# "current-energy-efficiency",
# "roof-description",
# "walls-description",
# "transaction-type",
# "secondheat-description",
# "total-floor-area",
# "construction-age-band",
# "floor-height",
# "number-habitable-rooms",
# "mainheat-description",
# "energy-consumption-current"
# ]
# ].rename(
# columns={
# "address": "Address",
# "postcode": "Postcode",
# "inspection-date": "Date of last EPC",
# "current-energy-efficiency": "SAP score on register",
# "current-energy-rating": "EPC rating on register",
# "property-type": "Property Type",
# "built-form": "Archetype",
# "total-floor-area": "Property Floor Area",
# "construction-age-band": "Property Age Band",
# "floor-height": "Property Floor Height",
# "number-habitable-rooms": "Number of Habitable Rooms",
# "walls-description": "Wall Construction",
# "roof-description": "Roof Construction",
# "mainheat-description": "Heating Type",
# "secondheat-description": "Secondary Heating",
# "transaction-type": "Reason for last EPC",
# "energy-consumption-current": "Heat Demand (kWh/m2)",
# }
# )
# # We de-dupe, taking the newest on the date the EPC was lod
# epcs_to_merge["Date of last EPC"] = pd.to_datetime(epcs_to_merge["Date of last EPC"])
# epcs_to_merge = epcs_to_merge.sort_values("Date of last EPC", ascending=False)
# epcs_to_merge = epcs_to_merge.drop_duplicates(subset="uprn")
stonewater_cavity_properties["UPRN"] = stonewater_cavity_properties["UPRN"].astype("Int64").astype(str)
stonewater_cavity_properties["Reason Included"].value_counts()
# Find the postcodes where an Osmosis survey revealed a need for CWI
postcodes_found_needing_cwi = stonewater_cavity_properties[
stonewater_cavity_properties["Reason Included"].isin(
[
"Survey revealed potential need for CWI or extract and re-fill",
"Surveyed revealed potential need for CWI or extract and re-fill and is an as built cavity property",
"Survey showed this property needs CWI",
"Survey showed this property could need extract and re-fill"
]
)
]["Postcode"].unique()
stonewater_cavity_properties["Suspected Needs CWI - not surveyed"] = (
(
stonewater_cavity_properties[
"Postcode"].isin(
postcodes_found_needing_cwi)
) & (
~stonewater_cavity_properties[
"Reason Included"].isin(
[
"Survey revealed potential need "
"for CWI or extract and re-fill",
"Surveyed revealed potential "
"need for CWI or extract and "
"re-fill and is an as built "
"cavity property",
"Survey showed this property "
"needs CWI",
"Survey showed this property "
"could need extract and re-fill"
]
)
)
)
# Merge the EPCs on, with the data we need
stonewater_cavity_properties = stonewater_cavity_properties.rename(
@ -252,12 +339,12 @@ def app():
"Renewables": "Parity - Renewables",
"Total Floor Area": "Parity - Total Floor Area"
}
).merge(
epcs_to_merge,
how="left",
left_on="UPRN",
right_on="uprn"
)
) # .merge(
# epcs_to_merge,
# how="left",
# left_on="UPRN",
# right_on="uprn"
# )
# We now flag the additional properties in the as built list
@ -288,8 +375,56 @@ def app():
additional_properties = additional_properties.merge(house_numbers, how="left", on="Address ID")
additional_properties["row_id"] = additional_properties["Address ID"].copy()
# Flag any units in this list that were installed under ECO3
additional_properties["Installed under ECO3"] = additional_properties["Organisation Reference"].isin(
numeric_ids['STONEWATER UPRN'].values
)
# Additional list ECO3
additional_list_eco3 = additional_properties[additional_properties["Installed under ECO3"]]["Postcode"].unique()
# These are properties that were not installed under ECO3, that have the same postcodes as properties
# installed under ECO3
# These are 297 properties we might want to start with as an immediate priority
additional_properties["Same Postcode as Installed under ECO3"] = (
~additional_properties["Installed under ECO3"] & (
additional_properties["Postcode"].isin(additional_list_eco3)
)
)
# We do some additional manual checks, for ECO3 properties that were installed that didn't get matched to either
# dataaset
numeric_ids["In asset list"] = numeric_ids["STONEWATER UPRN"].isin(
stonewater_cavity_properties['Org. ref.'].astype(int).values
)
numeric_ids["In asset list"] = numeric_ids["In asset list"] | (
numeric_ids["STONEWATER UPRN"].isin(
additional_properties['Organisation Reference'].astype(int).values
)
)
# eco3_installs_not_in_asset_list = numeric_ids[~numeric_ids["In asset list"]]
# # We now take samples of properties randomly and manually check the ID against the asset list
# print(eco3_installs_not_in_asset_list.sample(1)[["STONEWATER UPRN", "Post Code", "NO ", "Street / Block Name", ]])
# # Checked STONEWATER UPRN
# # 9862, BH15 1NR, 33, THE QUAY FOYER [x]
# # 12785, S01 66PN, 57, SEACOLE GARDENS [x]
# # 26071, MK42 0TE, 51, De Havilland Avenue, Shortstown [x]
# # 18213, HR6 9UW, 20 Ford Street [x]
# # 24344, LU4 9FF, 6 SEAL CLOSE [x]
# # 31222, SN14 0QZ, 7 HARDBROOK COURT [x]
# # 9343, SP4 7XL, 10 OAK PLACE [x]
# # 34730, LU5 5TN, 4 TUDOR DRIVE [x]
# # 7021, BN27 2BZ, 32 BUTTS FIELD []
#
# stonewater_cavity_properties[stonewater_cavity_properties['Org. ref.'] == 7021]
# stonewater_cavity_properties[stonewater_cavity_properties['Postcode'] == "BN27 2BZ"]["Name"]
#
# additional_properties[additional_properties['Organisation Reference'] == 7021]
# additional_properties[additional_properties['Postcode'] == "BN27 2BZ"][["Address"]]
# Pull the EPCs for these properties
additional_properties_epcs, errors = get_data(additional_properties)
# additional_properties_epcs, errors = get_data(additional_properties)
# Save this data as a pickle
# import pickle
@ -297,12 +432,20 @@ def app():
# "wb") as f:
# pickle.dump(additional_properties_epcs, f)
additional_properties["Suspected Needs CWI - not surveyed"] = (
(
additional_properties["Postcode"].isin(postcodes_found_needing_cwi)
)
)
additional_properties["Same Postcode as Installed under ECO3"].value_counts()
# We drop Full Address
additional_properties = additional_properties.drop(columns=["Full Address"])
additional_properties2 = additional_properties[[
"row_id", "Address", "Postcode", "Address ID", "SAP", "SAP Band", "Property Type", "Walls", "Roofs", "Glazing",
"Heating", "Main Fuel", "Hot Water", "Renewables", "Total Floor Area",
"Address", "Postcode", "Address ID", "SAP", "SAP Band", "Property Type", "Walls", "Roofs", "Glazing",
"Heating", "Main Fuel", "Hot Water", "Renewables", "Total Floor Area", 'Installed under ECO3',
'Same Postcode as Installed under ECO3'
]].rename(
columns={
"SAP": "Parity - Predicted SAP",
@ -318,56 +461,58 @@ def app():
"Renewables": "Parity - Renewables",
"Total Floor Area": "Parity - Total Floor Area"
}
).merge(
pd.DataFrame(additional_properties_epcs)[
[
"row_id",
"property-type",
"built-form",
"inspection-date",
"current-energy-rating",
"current-energy-efficiency",
"roof-description",
"walls-description",
"transaction-type",
"secondheat-description",
"total-floor-area",
"construction-age-band",
"floor-height",
"number-habitable-rooms",
"mainheat-description",
"energy-consumption-current"
]
].rename(
columns={
"inspection-date": "Date of last EPC",
"current-energy-efficiency": "SAP score on register",
"current-energy-rating": "EPC rating on register",
"property-type": "Property Type",
"built-form": "Archetype",
"total-floor-area": "Property Floor Area",
"construction-age-band": "Property Age Band",
"floor-height": "Property Floor Height",
"number-habitable-rooms": "Number of Habitable Rooms",
"walls-description": "Wall Construction",
"roof-description": "Roof Construction",
"mainheat-description": "Heating Type",
"secondheat-description": "Secondary Heating",
"transaction-type": "Reason for last EPC",
"energy-consumption-current": "Heat Demand (kWh/m2)",
}
),
how="left",
on="row_id"
)
) # .merge(
# pd.DataFrame(additional_properties_epcs)[
# [
# "row_id",
# "property-type",
# "built-form",
# "inspection-date",
# "current-energy-rating",
# "current-energy-efficiency",
# "roof-description",
# "walls-description",
# "transaction-type",
# "secondheat-description",
# "total-floor-area",
# "construction-age-band",
# "floor-height",
# "number-habitable-rooms",
# "mainheat-description",
# "energy-consumption-current"
# ]
# ].rename(
# columns={
# "inspection-date": "Date of last EPC",
# "current-energy-efficiency": "SAP score on register",
# "current-energy-rating": "EPC rating on register",
# "property-type": "Property Type",
# "built-form": "Archetype",
# "total-floor-area": "Property Floor Area",
# "construction-age-band": "Property Age Band",
# "floor-height": "Property Floor Height",
# "number-habitable-rooms": "Number of Habitable Rooms",
# "walls-description": "Wall Construction",
# "roof-description": "Roof Construction",
# "mainheat-description": "Heating Type",
# "secondheat-description": "Secondary Heating",
# "transaction-type": "Reason for last EPC",
# "energy-consumption-current": "Heat Demand (kWh/m2)",
# }
# ),
# how="left",
# on="row_id"
# )
# We save the data locally
stonewater_cavity_properties.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater Cavity Properties.csv",
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater Cavity Properties - priority "
"postcodes.csv",
index=False
)
additional_properties2.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater Additional Cavity Properties.csv",
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater Additional Cavity Properties - "
"non-priority postcodes.csv",
index=False
)
# Save the survey findings

View file

@ -44,6 +44,10 @@ epc_data["has_conservation_restrictions"] = (
| (epc_data["is_heritage_building"] == True)
)
whlg_eligible_postcodes["Local Authority"].value_counts()
whlg_eligible_postcodes = whlg_eligible_postcodes[whlg_eligible_postcodes["Local Authority"] == "Waltham Forest"]
# Pathway 1:
# Match based on eligible postcodes
pathway1 = epc_data[epc_data["postcode"].isin(whlg_eligible_postcodes["Postcode"].values)]
@ -67,6 +71,10 @@ pathway1["EPC Date"] = pd.to_datetime(pathway1["EPC Date"]).dt.strftime("%Y-%m-%
# Create a year EPC was lodged
pathway1["EPC Year"] = pd.to_datetime(pathway1["EPC Date"]).dt.year
low_epc = pathway1[pathway1["EPC Rating"].isin(["F", "G"])]
low_epc["EPC Rating"].value_counts()
low_epc.tail(1)[["address", "postcode"]]
pathway1.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Waltham Forest WHLG - Pathway 1 Eligibility.csv",
index=False