mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
pulled data needed for stonewater
This commit is contained in:
parent
8922fc7b8f
commit
36bb4b0f27
7 changed files with 495 additions and 126 deletions
2
.idea/Model.iml
generated
2
.idea/Model.iml
generated
|
|
@ -7,7 +7,7 @@
|
|||
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
|
||||
</content>
|
||||
<orderEntry type="jdk" jdkName="Fastapi-backend" jdkType="Python SDK" />
|
||||
<orderEntry type="jdk" jdkName="Stonewater-wave-3" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
<component name="PyNamespacePackagesService">
|
||||
|
|
|
|||
2
.idea/misc.xml
generated
2
.idea/misc.xml
generated
|
|
@ -3,7 +3,7 @@
|
|||
<component name="Black">
|
||||
<option name="sdkName" value="Python 3.10 (backend)" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Fastapi-backend" project-jdk-type="Python SDK" />
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Stonewater-wave-3" project-jdk-type="Python SDK" />
|
||||
<component name="PyCharmProfessionalAdvertiser">
|
||||
<option name="shown" value="true" />
|
||||
</component>
|
||||
|
|
|
|||
|
|
@ -83,8 +83,11 @@ def api_call_decorator(func):
|
|||
results = []
|
||||
page_size = kwargs.get('page_size', None)
|
||||
response_data = {}
|
||||
n_calls = 0
|
||||
|
||||
while url:
|
||||
logger.info("Making call for page: " + str(n_calls + 1))
|
||||
n_calls += 1
|
||||
response = requests.request(http_method, url, headers=self.headers, json=data)
|
||||
|
||||
# Handle the response
|
||||
|
|
@ -93,6 +96,7 @@ def api_call_decorator(func):
|
|||
if page_size:
|
||||
results.extend(response_json.get('value', []))
|
||||
url = response_json.get('@odata.nextLink', None)
|
||||
logger.info(f"Next page URL: {url}")
|
||||
else:
|
||||
response_data = response_json # Capture the full response for consistency
|
||||
break
|
||||
|
|
@ -270,6 +274,48 @@ class SharePointClient:
|
|||
|
||||
return file_content
|
||||
|
||||
def download_sharepoint_folder(self, drive_id, folder_path, download_dir, excluded_file_types=None):
|
||||
"""
|
||||
Downloads all files in a SharePoint folder to the specified local directory.
|
||||
|
||||
:param drive_id: The ID of the SharePoint drive.
|
||||
:param folder_path: The path of the folder in SharePoint.
|
||||
:param download_dir: The local directory to save the downloaded files.
|
||||
:param excluded_file_types: A list of file types to exclude from download (default is None).
|
||||
"""
|
||||
|
||||
excluded_file_types = [] if excluded_file_types is None else excluded_file_types
|
||||
|
||||
# Ensure the download directory exists
|
||||
os.makedirs(download_dir, exist_ok=True)
|
||||
|
||||
# List folder contents
|
||||
folder_contents = self.list_folder_contents(drive_id, folder_path)
|
||||
files = folder_contents.get('value', [])
|
||||
|
||||
for item in files:
|
||||
if item.get('folder'): # Check if it's a folder
|
||||
# Recursively handle subfolders
|
||||
subfolder_path = f"{folder_path}/{item['name']}"
|
||||
subfolder_dir = os.path.join(download_dir, item['name'])
|
||||
self.download_sharepoint_folder(drive_id, subfolder_path, subfolder_dir)
|
||||
else:
|
||||
# It's a file, download it
|
||||
file_name = item['name']
|
||||
if file_name.split(".")[-1] in excluded_file_types:
|
||||
continue
|
||||
download_url = item['@microsoft.graph.downloadUrl']
|
||||
|
||||
logger.info(f"Downloading file: {file_name}")
|
||||
file_content = self.download_sharepoint_file(download_url)
|
||||
|
||||
# Save the file locally
|
||||
file_path = os.path.join(download_dir, file_name)
|
||||
with open(file_path, 'wb') as f:
|
||||
f.write(file_content.read())
|
||||
|
||||
logger.info(f"File saved to: {file_path}")
|
||||
|
||||
|
||||
def app():
|
||||
# Customers for WC 18/11/2024
|
||||
|
|
|
|||
|
|
@ -2905,5 +2905,38 @@ def identify_incorrect_packages():
|
|||
os.path.join(CUSTOMER_FOLDER_PATH, "Units with assigned packages - with flags.csv"), index=False
|
||||
)
|
||||
|
||||
|
||||
def revised_model():
|
||||
"""
|
||||
This function implements the revised model for Stonewater, where we are looking at new priority postcodes
|
||||
This work was undertaken in January 2021.
|
||||
"""
|
||||
|
||||
# 1) Create the new list of properties
|
||||
|
||||
new_priority_postcodes = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Jan 2025 Project/Updated 2025 to 2030 "
|
||||
"priority list.xlsx"
|
||||
)
|
||||
|
||||
original_archetypes = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24 "
|
||||
"- Archetyped V3.1.xlsx",
|
||||
header=4
|
||||
)
|
||||
original_archetypes = original_archetypes[~pd.isnull(original_archetypes["Address ID"])]
|
||||
original_archetypes = original_archetypes[original_archetypes["Address ID"] != "Address ID"]
|
||||
original_archetypes["Address ID"] = original_archetypes["Address ID"].astype(int)
|
||||
|
||||
original_archetypes = original_archetypes[
|
||||
["Address ID", "Archetype ID", ""]
|
||||
]
|
||||
|
||||
# Check if we have all of the addresses
|
||||
missed = original_archetypes[
|
||||
~original_archetypes["Address ID"].isin(new_priority_postcodes["Address ID"].values)
|
||||
]["Archetype ID"].unique()
|
||||
assert
|
||||
|
||||
# if __name__ == "__main__":
|
||||
# main()
|
||||
|
|
|
|||
137
etl/customers/stonewater/data_cleaning.py
Normal file
137
etl/customers/stonewater/data_cleaning.py
Normal file
|
|
@ -0,0 +1,137 @@
|
|||
import os
|
||||
import shutil
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
def delete_large_files():
|
||||
"""
|
||||
This function deletes photos, designs and other files which we don't need
|
||||
:return:
|
||||
"""
|
||||
|
||||
folder_path = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Wave 2.1 Surveys"
|
||||
|
||||
# List the contents of this folder since in each sub-folder we have the property folders
|
||||
contents = os.listdir(folder_path)
|
||||
|
||||
for subfolder in contents:
|
||||
if not os.path.isdir(os.path.join(folder_path, subfolder)):
|
||||
continue
|
||||
subfolder_path = os.path.join(folder_path, subfolder)
|
||||
# List the contents
|
||||
property_folders = os.listdir(subfolder_path)
|
||||
|
||||
for property in tqdm(property_folders):
|
||||
# Check if it's a directory
|
||||
if not os.path.isdir(os.path.join(subfolder_path, property)):
|
||||
continue
|
||||
|
||||
property_path = os.path.join(subfolder_path, property)
|
||||
property_contents = os.listdir(property_path)
|
||||
# We delete the contents of the following folders:
|
||||
# '1. RA Property Pics'
|
||||
# '4. Air Tightness Tests'
|
||||
# '5. RD Design Info'
|
||||
for folder_to_delete in ["1. RA Property Pics", "4. Air Tightness Tests", "5. RD Design Info",
|
||||
"1. RA Property PIcs", "Post EPC Photos", "4. RD Design Info",
|
||||
"5. Installer Info", "6. Trustmark lodgement", "7.Post Install Inspection Photos",
|
||||
"6. Trustmark Lodgement", "7. Post Inspection Photos"]:
|
||||
if folder_to_delete not in property_contents:
|
||||
continue
|
||||
folder_to_delete_path = os.path.join(property_path, folder_to_delete)
|
||||
if os.path.isdir(folder_to_delete_path):
|
||||
# Delete the folder, even if it's not empty
|
||||
shutil.rmtree(folder_to_delete_path)
|
||||
|
||||
# We now check the '2. RA Coordinator Info' folder for any .MOV files and delete them
|
||||
if "2. RA Coordinator Info" not in property_contents:
|
||||
coordinator_folder = "1. RA Coordinator Info"
|
||||
else:
|
||||
coordinator_folder = "2. RA Coordinator Info"
|
||||
coordinator_info_path = os.path.join(property_path, coordinator_folder)
|
||||
coordinator_info_contents = os.listdir(coordinator_info_path)
|
||||
# Look for .MOV files and .jpg files
|
||||
for file in coordinator_info_contents:
|
||||
if file.endswith(".MOV"):
|
||||
os.remove(os.path.join(coordinator_info_path, file))
|
||||
|
||||
if file.endswith(".jpg"):
|
||||
os.remove(os.path.join(coordinator_info_path, file))
|
||||
|
||||
if "Property Pics" in coordinator_info_contents:
|
||||
# Delete folder and contents
|
||||
shutil.rmtree(os.path.join(coordinator_info_path, "Property Pics"))
|
||||
|
||||
|
||||
def download_data_from_sharepoint():
|
||||
# Given a sharepoint location, this function will download the retrofit assessment folders from the locations
|
||||
# specified in the sharepoint location
|
||||
from etl.access_reporting.app import SharePointClient
|
||||
|
||||
sharepoint_client = SharePointClient(
|
||||
tenant_id="10d5af8b-2cfd-4882-9ccd-b96e4812dacf",
|
||||
client_id="6832a4c5-fb8c-4082-a746-4f51e1020f0d",
|
||||
client_secret="xpC8Q~Frww48SM1V-D8lGy5iOY7P_cJ7FF3jgarQ",
|
||||
site_id="bc925a9a-ad0b-4de9-9a3c-e61014cc7489"
|
||||
)
|
||||
|
||||
# Retrieve the data from Sharepoint and write to local machine
|
||||
contents = sharepoint_client.list_folder_contents(
|
||||
drive_id=sharepoint_client.document_drive["id"],
|
||||
folder_path="Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders"
|
||||
)
|
||||
|
||||
len(contents["value"])
|
||||
folders_to_pull = [
|
||||
folder for folder in contents["value"] if folder["name"] in ["3. Wiltshire", "4. Bournemouth", "5. Coventry"]
|
||||
]
|
||||
for folder_to_pull in folders_to_pull:
|
||||
# Get the contents
|
||||
folder_contents = sharepoint_client.list_folder_contents(
|
||||
drive_id=sharepoint_client.document_drive["id"],
|
||||
folder_path="Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders" + "/" +
|
||||
folder_to_pull["name"],
|
||||
page_size=100
|
||||
)
|
||||
|
||||
property_folders = [f for f in folder_contents["value"]]
|
||||
|
||||
for property_folder in property_folders:
|
||||
# We go into each property folder and get the contents
|
||||
property_folder_contents = sharepoint_client.list_folder_contents(
|
||||
drive_id=sharepoint_client.document_drive["id"],
|
||||
folder_path="Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders" + "/" +
|
||||
folder_to_pull["name"] + "/" + property_folder["name"]
|
||||
)
|
||||
# We look for the retrofit assessment folder:
|
||||
property_sub_folders = [
|
||||
f for f in property_folder_contents["value"] if "ra coordinator info" in f["name"].lower()
|
||||
]
|
||||
|
||||
if not property_sub_folders:
|
||||
continue
|
||||
|
||||
# if we have this, we download the folder and store it on my laptop!
|
||||
property_sub_folder = property_sub_folders[0]
|
||||
|
||||
property_folder_path = os.path.join(
|
||||
"Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders",
|
||||
folder_to_pull["name"],
|
||||
property_folder["name"],
|
||||
property_sub_folder["name"]
|
||||
)
|
||||
|
||||
download_dir = os.path.join(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Wave 2.1 Surveys",
|
||||
folder_to_pull["name"],
|
||||
property_folder["name"],
|
||||
property_sub_folder["name"]
|
||||
)
|
||||
|
||||
# We download the folder
|
||||
sharepoint_client.download_sharepoint_folder(
|
||||
drive_id=sharepoint_client.document_drive["id"],
|
||||
folder_path=property_folder_path,
|
||||
download_dir=download_dir,
|
||||
excluded_file_types=["MOV"]
|
||||
)
|
||||
|
|
@ -7,6 +7,8 @@ from tqdm import tqdm
|
|||
from dotenv import load_dotenv
|
||||
from backend.SearchEpc import SearchEpc
|
||||
from utils.s3 import read_from_s3, read_pickle_from_s3
|
||||
import msoffcrypto
|
||||
from io import BytesIO
|
||||
|
||||
load_dotenv(dotenv_path="backend/.env")
|
||||
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
|
||||
|
|
@ -64,6 +66,28 @@ def app():
|
|||
This code creates a list of cavity properties, for review
|
||||
"""
|
||||
|
||||
# Read in the password protected master
|
||||
# TODO: This file should be deleted!
|
||||
|
||||
# Path to the password-protected Excel file
|
||||
file_path = ("/Users/khalimconn-kowlessar/Downloads/STONEWATER MASTER SHEET - UPDATED 20.5.24 - K- PASSWORD "
|
||||
"PROTECTED.xlsx")
|
||||
password = "STONE123" # Replace with the actual password
|
||||
|
||||
# Open the file and decrypt it
|
||||
with open(file_path, "rb") as f:
|
||||
decrypted_file = BytesIO()
|
||||
office_file = msoffcrypto.OfficeFile(f)
|
||||
office_file.load_key(password=password)
|
||||
office_file.decrypt(decrypted_file)
|
||||
|
||||
# Read the decrypted file into a DataFrame
|
||||
eco_rolling_master = pd.read_excel(decrypted_file, sheet_name="Sheet1", engine="openpyxl")
|
||||
|
||||
eco_rolling_master = eco_rolling_master[
|
||||
~eco_rolling_master['INSTALL/CANCELLATION DATE'].str.contains("CANCELLED")
|
||||
]
|
||||
|
||||
archetyped_properties = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24 - "
|
||||
"Archetyped V3.1.xlsx",
|
||||
|
|
@ -116,13 +140,16 @@ def app():
|
|||
|
||||
features_to_merge = features[
|
||||
[
|
||||
"Address ID", "Age", "Property Type", "Walls", "Roofs", "Glazing", "Heating", "Main Fuel", "Hot Water",
|
||||
"Address ID", "Organisation Reference", "Age", "Property Type", "Walls", "Roofs", "Glazing", "Heating",
|
||||
"Main Fuel",
|
||||
"Hot Water",
|
||||
"Renewables", "Total Floor Area"
|
||||
]
|
||||
]
|
||||
|
||||
stonewater_cavity_properties = archetyped_properties[
|
||||
["Name", "Postcode", "Osm. ID", "Address ID", "UPRN", "UDPRN", "Archetype ID", "House no", "Street name",
|
||||
["Name", "Postcode", "Osm. ID", "Org. ref.", "Address ID", "UPRN", "UDPRN", "Archetype ID", "House no",
|
||||
"Street name",
|
||||
"Address line 2", "City/Town", "Is Cavity Property", "Survey shows CWI needed for Archetype"]
|
||||
].merge(
|
||||
features_to_merge, how="left", on="Address ID"
|
||||
|
|
@ -166,77 +193,137 @@ def app():
|
|||
stonewater_cavity_properties["Reason Included"]
|
||||
)
|
||||
|
||||
# We flag units that were installed under ECO3
|
||||
numeric_ids = eco_rolling_master[eco_rolling_master["STONEWATER UPRN"] != "NOT ON ASSET LIST"]
|
||||
numeric_ids = numeric_ids[~pd.isnull(numeric_ids["STONEWATER UPRN"])]
|
||||
numeric_ids["STONEWATER UPRN"] = numeric_ids["STONEWATER UPRN"].astype(int)
|
||||
|
||||
stonewater_cavity_properties["Installed under ECO3"] = stonewater_cavity_properties["Org. ref."].isin(
|
||||
numeric_ids['STONEWATER UPRN'].values
|
||||
)
|
||||
|
||||
# Which postcodes were installed under ECO3
|
||||
priority_list_eco3 = stonewater_cavity_properties[
|
||||
stonewater_cavity_properties["Installed under ECO3"]
|
||||
]["Postcode"].unique()
|
||||
|
||||
# These are properties that were not installed under ECO3, that have the same postcodes as properties
|
||||
# installed under ECO3
|
||||
|
||||
# These are 66 properties we might want to start with as an immediate priority
|
||||
stonewater_cavity_properties["Same Postcode as Installed under ECO3"] = (
|
||||
~stonewater_cavity_properties["Installed under ECO3"] & (
|
||||
stonewater_cavity_properties["Postcode"].isin(priority_list_eco3)
|
||||
)
|
||||
)
|
||||
|
||||
# We get the EPC data
|
||||
epc_data = json.loads(
|
||||
read_from_s3(
|
||||
bucket_name="retrofit-data-dev",
|
||||
s3_file_name="customers/Stonewater/clustering/epc_data.json"
|
||||
)
|
||||
)
|
||||
epc_data = pd.DataFrame(epc_data)
|
||||
|
||||
epc_data["uprn"] = np.where(
|
||||
epc_data["internal_id"] == 1091,
|
||||
83143766,
|
||||
epc_data["uprn"]
|
||||
)
|
||||
|
||||
epc_data_batch_2 = read_pickle_from_s3(
|
||||
s3_file_name="customers/Stonewater/clustering/epc_data_batch_2.pkl",
|
||||
bucket_name="retrofit-data-dev"
|
||||
)
|
||||
epc_data_batch_2 = pd.DataFrame(epc_data_batch_2)
|
||||
|
||||
complete_epcs = pd.concat([epc_data, epc_data_batch_2])
|
||||
|
||||
epcs_to_merge = complete_epcs[
|
||||
[
|
||||
"uprn",
|
||||
"address",
|
||||
"postcode",
|
||||
"property-type",
|
||||
"built-form",
|
||||
"inspection-date",
|
||||
"current-energy-rating",
|
||||
"current-energy-efficiency",
|
||||
"roof-description",
|
||||
"walls-description",
|
||||
"transaction-type",
|
||||
"secondheat-description",
|
||||
"total-floor-area",
|
||||
"construction-age-band",
|
||||
"floor-height",
|
||||
"number-habitable-rooms",
|
||||
"mainheat-description",
|
||||
"energy-consumption-current"
|
||||
]
|
||||
].rename(
|
||||
columns={
|
||||
"address": "Address",
|
||||
"postcode": "Postcode",
|
||||
"inspection-date": "Date of last EPC",
|
||||
"current-energy-efficiency": "SAP score on register",
|
||||
"current-energy-rating": "EPC rating on register",
|
||||
"property-type": "Property Type",
|
||||
"built-form": "Archetype",
|
||||
"total-floor-area": "Property Floor Area",
|
||||
"construction-age-band": "Property Age Band",
|
||||
"floor-height": "Property Floor Height",
|
||||
"number-habitable-rooms": "Number of Habitable Rooms",
|
||||
"walls-description": "Wall Construction",
|
||||
"roof-description": "Roof Construction",
|
||||
"mainheat-description": "Heating Type",
|
||||
"secondheat-description": "Secondary Heating",
|
||||
"transaction-type": "Reason for last EPC",
|
||||
"energy-consumption-current": "Heat Demand (kWh/m2)",
|
||||
}
|
||||
)
|
||||
# We de-dupe, taking the newest on the date the EPC was lod
|
||||
epcs_to_merge["Date of last EPC"] = pd.to_datetime(epcs_to_merge["Date of last EPC"])
|
||||
epcs_to_merge = epcs_to_merge.sort_values("Date of last EPC", ascending=False)
|
||||
epcs_to_merge = epcs_to_merge.drop_duplicates(subset="uprn")
|
||||
# epc_data = json.loads(
|
||||
# read_from_s3(
|
||||
# bucket_name="retrofit-data-dev",
|
||||
# s3_file_name="customers/Stonewater/clustering/epc_data.json"
|
||||
# )
|
||||
# )
|
||||
# epc_data = pd.DataFrame(epc_data)
|
||||
#
|
||||
# epc_data["uprn"] = np.where(
|
||||
# epc_data["internal_id"] == 1091,
|
||||
# 83143766,
|
||||
# epc_data["uprn"]
|
||||
# )
|
||||
#
|
||||
# epc_data_batch_2 = read_pickle_from_s3(
|
||||
# s3_file_name="customers/Stonewater/clustering/epc_data_batch_2.pkl",
|
||||
# bucket_name="retrofit-data-dev"
|
||||
# )
|
||||
# epc_data_batch_2 = pd.DataFrame(epc_data_batch_2)
|
||||
#
|
||||
# complete_epcs = pd.concat([epc_data, epc_data_batch_2])
|
||||
#
|
||||
# epcs_to_merge = complete_epcs[
|
||||
# [
|
||||
# "uprn",
|
||||
# "address",
|
||||
# "postcode",
|
||||
# "property-type",
|
||||
# "built-form",
|
||||
# "inspection-date",
|
||||
# "current-energy-rating",
|
||||
# "current-energy-efficiency",
|
||||
# "roof-description",
|
||||
# "walls-description",
|
||||
# "transaction-type",
|
||||
# "secondheat-description",
|
||||
# "total-floor-area",
|
||||
# "construction-age-band",
|
||||
# "floor-height",
|
||||
# "number-habitable-rooms",
|
||||
# "mainheat-description",
|
||||
# "energy-consumption-current"
|
||||
# ]
|
||||
# ].rename(
|
||||
# columns={
|
||||
# "address": "Address",
|
||||
# "postcode": "Postcode",
|
||||
# "inspection-date": "Date of last EPC",
|
||||
# "current-energy-efficiency": "SAP score on register",
|
||||
# "current-energy-rating": "EPC rating on register",
|
||||
# "property-type": "Property Type",
|
||||
# "built-form": "Archetype",
|
||||
# "total-floor-area": "Property Floor Area",
|
||||
# "construction-age-band": "Property Age Band",
|
||||
# "floor-height": "Property Floor Height",
|
||||
# "number-habitable-rooms": "Number of Habitable Rooms",
|
||||
# "walls-description": "Wall Construction",
|
||||
# "roof-description": "Roof Construction",
|
||||
# "mainheat-description": "Heating Type",
|
||||
# "secondheat-description": "Secondary Heating",
|
||||
# "transaction-type": "Reason for last EPC",
|
||||
# "energy-consumption-current": "Heat Demand (kWh/m2)",
|
||||
# }
|
||||
# )
|
||||
# # We de-dupe, taking the newest on the date the EPC was lod
|
||||
# epcs_to_merge["Date of last EPC"] = pd.to_datetime(epcs_to_merge["Date of last EPC"])
|
||||
# epcs_to_merge = epcs_to_merge.sort_values("Date of last EPC", ascending=False)
|
||||
# epcs_to_merge = epcs_to_merge.drop_duplicates(subset="uprn")
|
||||
|
||||
stonewater_cavity_properties["UPRN"] = stonewater_cavity_properties["UPRN"].astype("Int64").astype(str)
|
||||
stonewater_cavity_properties["Reason Included"].value_counts()
|
||||
# Find the postcodes where an Osmosis survey revealed a need for CWI
|
||||
postcodes_found_needing_cwi = stonewater_cavity_properties[
|
||||
stonewater_cavity_properties["Reason Included"].isin(
|
||||
[
|
||||
"Survey revealed potential need for CWI or extract and re-fill",
|
||||
"Surveyed revealed potential need for CWI or extract and re-fill and is an as built cavity property",
|
||||
"Survey showed this property needs CWI",
|
||||
"Survey showed this property could need extract and re-fill"
|
||||
]
|
||||
)
|
||||
]["Postcode"].unique()
|
||||
|
||||
stonewater_cavity_properties["Suspected Needs CWI - not surveyed"] = (
|
||||
(
|
||||
stonewater_cavity_properties[
|
||||
"Postcode"].isin(
|
||||
postcodes_found_needing_cwi)
|
||||
) & (
|
||||
~stonewater_cavity_properties[
|
||||
"Reason Included"].isin(
|
||||
[
|
||||
"Survey revealed potential need "
|
||||
"for CWI or extract and re-fill",
|
||||
"Surveyed revealed potential "
|
||||
"need for CWI or extract and "
|
||||
"re-fill and is an as built "
|
||||
"cavity property",
|
||||
"Survey showed this property "
|
||||
"needs CWI",
|
||||
"Survey showed this property "
|
||||
"could need extract and re-fill"
|
||||
]
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
# Merge the EPCs on, with the data we need
|
||||
stonewater_cavity_properties = stonewater_cavity_properties.rename(
|
||||
|
|
@ -252,12 +339,12 @@ def app():
|
|||
"Renewables": "Parity - Renewables",
|
||||
"Total Floor Area": "Parity - Total Floor Area"
|
||||
}
|
||||
).merge(
|
||||
epcs_to_merge,
|
||||
how="left",
|
||||
left_on="UPRN",
|
||||
right_on="uprn"
|
||||
)
|
||||
) # .merge(
|
||||
# epcs_to_merge,
|
||||
# how="left",
|
||||
# left_on="UPRN",
|
||||
# right_on="uprn"
|
||||
# )
|
||||
|
||||
# We now flag the additional properties in the as built list
|
||||
|
||||
|
|
@ -288,8 +375,56 @@ def app():
|
|||
additional_properties = additional_properties.merge(house_numbers, how="left", on="Address ID")
|
||||
additional_properties["row_id"] = additional_properties["Address ID"].copy()
|
||||
|
||||
# Flag any units in this list that were installed under ECO3
|
||||
additional_properties["Installed under ECO3"] = additional_properties["Organisation Reference"].isin(
|
||||
numeric_ids['STONEWATER UPRN'].values
|
||||
)
|
||||
|
||||
# Additional list ECO3
|
||||
additional_list_eco3 = additional_properties[additional_properties["Installed under ECO3"]]["Postcode"].unique()
|
||||
|
||||
# These are properties that were not installed under ECO3, that have the same postcodes as properties
|
||||
# installed under ECO3
|
||||
# These are 297 properties we might want to start with as an immediate priority
|
||||
additional_properties["Same Postcode as Installed under ECO3"] = (
|
||||
~additional_properties["Installed under ECO3"] & (
|
||||
additional_properties["Postcode"].isin(additional_list_eco3)
|
||||
)
|
||||
)
|
||||
|
||||
# We do some additional manual checks, for ECO3 properties that were installed that didn't get matched to either
|
||||
# dataaset
|
||||
numeric_ids["In asset list"] = numeric_ids["STONEWATER UPRN"].isin(
|
||||
stonewater_cavity_properties['Org. ref.'].astype(int).values
|
||||
)
|
||||
numeric_ids["In asset list"] = numeric_ids["In asset list"] | (
|
||||
numeric_ids["STONEWATER UPRN"].isin(
|
||||
additional_properties['Organisation Reference'].astype(int).values
|
||||
)
|
||||
)
|
||||
|
||||
# eco3_installs_not_in_asset_list = numeric_ids[~numeric_ids["In asset list"]]
|
||||
# # We now take samples of properties randomly and manually check the ID against the asset list
|
||||
# print(eco3_installs_not_in_asset_list.sample(1)[["STONEWATER UPRN", "Post Code", "NO ", "Street / Block Name", ]])
|
||||
# # Checked STONEWATER UPRN
|
||||
# # 9862, BH15 1NR, 33, THE QUAY FOYER [x]
|
||||
# # 12785, S01 66PN, 57, SEACOLE GARDENS [x]
|
||||
# # 26071, MK42 0TE, 51, De Havilland Avenue, Shortstown [x]
|
||||
# # 18213, HR6 9UW, 20 Ford Street [x]
|
||||
# # 24344, LU4 9FF, 6 SEAL CLOSE [x]
|
||||
# # 31222, SN14 0QZ, 7 HARDBROOK COURT [x]
|
||||
# # 9343, SP4 7XL, 10 OAK PLACE [x]
|
||||
# # 34730, LU5 5TN, 4 TUDOR DRIVE [x]
|
||||
# # 7021, BN27 2BZ, 32 BUTTS FIELD []
|
||||
#
|
||||
# stonewater_cavity_properties[stonewater_cavity_properties['Org. ref.'] == 7021]
|
||||
# stonewater_cavity_properties[stonewater_cavity_properties['Postcode'] == "BN27 2BZ"]["Name"]
|
||||
#
|
||||
# additional_properties[additional_properties['Organisation Reference'] == 7021]
|
||||
# additional_properties[additional_properties['Postcode'] == "BN27 2BZ"][["Address"]]
|
||||
|
||||
# Pull the EPCs for these properties
|
||||
additional_properties_epcs, errors = get_data(additional_properties)
|
||||
# additional_properties_epcs, errors = get_data(additional_properties)
|
||||
|
||||
# Save this data as a pickle
|
||||
# import pickle
|
||||
|
|
@ -297,12 +432,20 @@ def app():
|
|||
# "wb") as f:
|
||||
# pickle.dump(additional_properties_epcs, f)
|
||||
|
||||
additional_properties["Suspected Needs CWI - not surveyed"] = (
|
||||
(
|
||||
additional_properties["Postcode"].isin(postcodes_found_needing_cwi)
|
||||
)
|
||||
)
|
||||
|
||||
additional_properties["Same Postcode as Installed under ECO3"].value_counts()
|
||||
|
||||
# We drop Full Address
|
||||
additional_properties = additional_properties.drop(columns=["Full Address"])
|
||||
additional_properties2 = additional_properties[[
|
||||
"row_id", "Address", "Postcode", "Address ID", "SAP", "SAP Band", "Property Type", "Walls", "Roofs", "Glazing",
|
||||
"Heating", "Main Fuel", "Hot Water", "Renewables", "Total Floor Area",
|
||||
|
||||
"Address", "Postcode", "Address ID", "SAP", "SAP Band", "Property Type", "Walls", "Roofs", "Glazing",
|
||||
"Heating", "Main Fuel", "Hot Water", "Renewables", "Total Floor Area", 'Installed under ECO3',
|
||||
'Same Postcode as Installed under ECO3'
|
||||
]].rename(
|
||||
columns={
|
||||
"SAP": "Parity - Predicted SAP",
|
||||
|
|
@ -318,56 +461,58 @@ def app():
|
|||
"Renewables": "Parity - Renewables",
|
||||
"Total Floor Area": "Parity - Total Floor Area"
|
||||
}
|
||||
).merge(
|
||||
pd.DataFrame(additional_properties_epcs)[
|
||||
[
|
||||
"row_id",
|
||||
"property-type",
|
||||
"built-form",
|
||||
"inspection-date",
|
||||
"current-energy-rating",
|
||||
"current-energy-efficiency",
|
||||
"roof-description",
|
||||
"walls-description",
|
||||
"transaction-type",
|
||||
"secondheat-description",
|
||||
"total-floor-area",
|
||||
"construction-age-band",
|
||||
"floor-height",
|
||||
"number-habitable-rooms",
|
||||
"mainheat-description",
|
||||
"energy-consumption-current"
|
||||
]
|
||||
].rename(
|
||||
columns={
|
||||
"inspection-date": "Date of last EPC",
|
||||
"current-energy-efficiency": "SAP score on register",
|
||||
"current-energy-rating": "EPC rating on register",
|
||||
"property-type": "Property Type",
|
||||
"built-form": "Archetype",
|
||||
"total-floor-area": "Property Floor Area",
|
||||
"construction-age-band": "Property Age Band",
|
||||
"floor-height": "Property Floor Height",
|
||||
"number-habitable-rooms": "Number of Habitable Rooms",
|
||||
"walls-description": "Wall Construction",
|
||||
"roof-description": "Roof Construction",
|
||||
"mainheat-description": "Heating Type",
|
||||
"secondheat-description": "Secondary Heating",
|
||||
"transaction-type": "Reason for last EPC",
|
||||
"energy-consumption-current": "Heat Demand (kWh/m2)",
|
||||
}
|
||||
),
|
||||
how="left",
|
||||
on="row_id"
|
||||
)
|
||||
) # .merge(
|
||||
# pd.DataFrame(additional_properties_epcs)[
|
||||
# [
|
||||
# "row_id",
|
||||
# "property-type",
|
||||
# "built-form",
|
||||
# "inspection-date",
|
||||
# "current-energy-rating",
|
||||
# "current-energy-efficiency",
|
||||
# "roof-description",
|
||||
# "walls-description",
|
||||
# "transaction-type",
|
||||
# "secondheat-description",
|
||||
# "total-floor-area",
|
||||
# "construction-age-band",
|
||||
# "floor-height",
|
||||
# "number-habitable-rooms",
|
||||
# "mainheat-description",
|
||||
# "energy-consumption-current"
|
||||
# ]
|
||||
# ].rename(
|
||||
# columns={
|
||||
# "inspection-date": "Date of last EPC",
|
||||
# "current-energy-efficiency": "SAP score on register",
|
||||
# "current-energy-rating": "EPC rating on register",
|
||||
# "property-type": "Property Type",
|
||||
# "built-form": "Archetype",
|
||||
# "total-floor-area": "Property Floor Area",
|
||||
# "construction-age-band": "Property Age Band",
|
||||
# "floor-height": "Property Floor Height",
|
||||
# "number-habitable-rooms": "Number of Habitable Rooms",
|
||||
# "walls-description": "Wall Construction",
|
||||
# "roof-description": "Roof Construction",
|
||||
# "mainheat-description": "Heating Type",
|
||||
# "secondheat-description": "Secondary Heating",
|
||||
# "transaction-type": "Reason for last EPC",
|
||||
# "energy-consumption-current": "Heat Demand (kWh/m2)",
|
||||
# }
|
||||
# ),
|
||||
# how="left",
|
||||
# on="row_id"
|
||||
# )
|
||||
|
||||
# We save the data locally
|
||||
stonewater_cavity_properties.to_csv(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater Cavity Properties.csv",
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater Cavity Properties - priority "
|
||||
"postcodes.csv",
|
||||
index=False
|
||||
)
|
||||
additional_properties2.to_csv(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater Additional Cavity Properties.csv",
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater Additional Cavity Properties - "
|
||||
"non-priority postcodes.csv",
|
||||
index=False
|
||||
)
|
||||
# Save the survey findings
|
||||
|
|
|
|||
|
|
@ -44,6 +44,10 @@ epc_data["has_conservation_restrictions"] = (
|
|||
| (epc_data["is_heritage_building"] == True)
|
||||
)
|
||||
|
||||
whlg_eligible_postcodes["Local Authority"].value_counts()
|
||||
|
||||
whlg_eligible_postcodes = whlg_eligible_postcodes[whlg_eligible_postcodes["Local Authority"] == "Waltham Forest"]
|
||||
|
||||
# Pathway 1:
|
||||
# Match based on eligible postcodes
|
||||
pathway1 = epc_data[epc_data["postcode"].isin(whlg_eligible_postcodes["Postcode"].values)]
|
||||
|
|
@ -67,6 +71,10 @@ pathway1["EPC Date"] = pd.to_datetime(pathway1["EPC Date"]).dt.strftime("%Y-%m-%
|
|||
# Create a year EPC was lodged
|
||||
pathway1["EPC Year"] = pd.to_datetime(pathway1["EPC Date"]).dt.year
|
||||
|
||||
low_epc = pathway1[pathway1["EPC Rating"].isin(["F", "G"])]
|
||||
low_epc["EPC Rating"].value_counts()
|
||||
low_epc.tail(1)[["address", "postcode"]]
|
||||
|
||||
pathway1.to_csv(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Waltham Forest WHLG - Pathway 1 Eligibility.csv",
|
||||
index=False
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue