This commit is contained in:
Khalim Conn-Kowlessar 2024-11-15 14:56:00 +00:00
parent 69b3ec7961
commit 2eaf19c2bb
5 changed files with 343 additions and 1 deletions

View file

@ -0,0 +1,92 @@
"""
This is an adhoc script, used to pull together some of the figures that are being included in the
Warm Homes: Social Housing Wave 3 funding application
"""
import pandas as pd
import numpy as np
aiha_all_units = pd.read_excel(
"/Users/khalimconn-kowlessar/Downloads/AIHA Measures Packages 2024_11_13.xlsx",
sheet_name="All Properties - AIHA",
header=2
)
modelled_units = pd.read_excel(
"/Users/khalimconn-kowlessar/Downloads/AIHA Measures Packages 2024_11_13.xlsx",
sheet_name="Modelled Properties - Measures",
header=5
)
aiha_all_units = aiha_all_units.drop(columns=['Unnamed: 0', 'Unnamed: 1'])
aiha_extracted_property_data = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/extracted_property_data.csv"
)
aiha_wave_3_units = aiha_all_units[aiha_all_units["Expected Package Cost"].astype(float) > 0]
# TODO: The EPC C property isn't a C!
aiha_epc_breakdown = aiha_wave_3_units["Expected EPC Rating"].replace({"D or E": "E"}).value_counts()
# For CAHA
caha_epc_breakdown = modelled_units[
modelled_units['Survey Key'].str.contains("CAHA")
]['Current EPC Rating'].value_counts()
# For Hornsey
hornsey_epc_breakdown = modelled_units[
modelled_units['Survey Key'].str.contains("HORNSEY")
]['Current EPC Rating'].value_counts()
aiha_original_asset_data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/240924- KSQ & Domna Info Merge - AIHA - SHDF Wave 3 "
"bid - Supplementary information.xlsx",
sheet_name="Archetyping Data",
header=2
)
# Get the units in the bid:
aiha_wave_3_features = aiha_original_asset_data[
['Address letter or number', 'Street address', 'Postcode', "Wall type",
"Property type", "built-form", "floor"]
].merge(
aiha_wave_3_units[['Address letter or number', 'Street address', 'Postcode']],
how="inner",
on=["Address letter or number", "Street address", "Postcode"]
)
wall_type_breakdown = aiha_wave_3_features["Wall type"].value_counts()
property_type_breakdown = aiha_wave_3_features.groupby(["Property type", "floor"]).size().reset_index()
# Hornsey data - contained in original asset list
hornsey_asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/SHDF - Template - EOI - Hornsey Housing "
"Trust.xlsx",
sheet_name="Ksquared-All units information",
header=3
)
# We don't need the first row
hornsey_asset_list = hornsey_asset_list.iloc[1:]
# Fill NA values with empty strings
hornsey_asset_list = hornsey_asset_list.fillna("")
hornsey_asset_list["Address letter or number"] = hornsey_asset_list["Address letter or number"].astype(
str
).str.strip()
hornsey_asset_list["Postcode"] = hornsey_asset_list["Postcode"].astype(str).str.strip()
hornsey_asset_list["Street address"] = hornsey_asset_list["Street address"].astype(str).str.strip()
# Replace double spaces
for col in ["Address letter or number", "Street address", "Postcode"]:
hornsey_asset_list[col] = hornsey_asset_list[col].str.replace(" ", " ")
hornsey_asset_list = hornsey_asset_list[hornsey_asset_list["Address letter or number"] != ""]
hornsey_asset_list["Wall Type Cleaned"] = np.where(
hornsey_asset_list["Wall type"].str.contains("Cavity"),
"Cavity",
"Solid"
)
hornsey_asset_list["Property type"].value_counts()
# CAHA
caha_epc_data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/caha_extracted_property_data.xlsx"
)
caha_epc_data["property_type"].value_counts()
caha_epc_data["wall_type"].value_counts()

View file

@ -92,9 +92,13 @@ def main():
# THis is the data we need for the AIHA project
measures_data = extracted_surveys[
["survey_key", "address", "postcode", "current-energy-efficiency", "current-energy-rating", "number_of_floors"]
["survey_key", "address", "postcode", "current-energy-efficiency", "current-energy-rating",
"number_of_floors", "walls-description", "property-type", "built-form"]
]
measures_data = measures_data.sort_values("survey_key", ascending=True)
measures_data.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/extracted_property_data.csv",
)
# Note:
# The properties will still have "Very poor" ratings for their hot water

View file

@ -6,6 +6,7 @@ from etl.epc.settings import EARLIEST_EPC_DATE
from dotenv import load_dotenv
from tqdm import tqdm
import pandas as pd
import numpy as np
from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc
from backend.SearchEpc import SearchEpc
from utils.s3 import save_csv_to_s3
@ -46,6 +47,12 @@ def hornsey():
hornsey_asset_list = hornsey_asset_list[hornsey_asset_list["Address letter or number"] != ""]
hornsey_asset_list["Wall Type Cleaned"] = np.where(
"Cavity" in hornsey_asset_list["Wall type"],
"Cavity",
"Solid"
)
missed_uprns = {
"Flat 13A Stowell House": 100021213098,
"Flat 24 Stowell House": 100021213110,
@ -267,6 +274,9 @@ def caha():
"address": address,
"postcode": home["Postcode"],
"property_type": newest_epc["property-type"],
"wall_type": newest_epc["walls-description"],
"built_form": newest_epc["built-form"],
"flat_storey_count": newest_epc['flat-storey-count'],
}
)

View file

@ -0,0 +1,235 @@
import os
import time
import pandas as pd
from tqdm import tqdm
from dotenv import load_dotenv
from utils.s3 import read_excel_from_s3
from backend.SearchEpc import SearchEpc
from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
from recommendations.recommendation_utils import (
estimate_perimeter,
estimate_external_wall_area,
estimate_number_of_floors
)
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
def get_data(asset_list):
epc_data = []
errors = []
for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
try:
postcode = home["Postcode"]
address1 = home["address1"].split(",")[0]
full_address = home["Address"]
searcher = SearchEpc(
address1=str(address1),
postcode=postcode,
auth_token=EPC_AUTH_TOKEN,
os_api_key="",
property_type=None,
fast=True,
full_address=full_address,
max_retries=5
)
# Force the skipping of estimating the EPC
searcher.ordnance_survey_client.property_type = None
searcher.ordnance_survey_client.built_form = None
searcher.find_property(skip_os=True)
if searcher.newest_epc is None:
continue
# Look for EPC recommendatons
try:
property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"])
except:
property_recommendations = {"rows": []}
epc = {
"row_id": home["row_id"],
**searcher.newest_epc.copy(),
"recommendations": property_recommendations["rows"]
}
epc_data.append(epc)
except Exception as e:
errors.append(home["row_id"])
time.sleep(5)
return epc_data, errors
def app():
"""
This app is EPC pulling data for some properties owned by Livewest
Data request contents:
Date of last EPC
Reason for EPC
SAP score on register
Property Type
Property Area
Property Age
Any Dimensions (HLP,PW,RH)
Property Wall Construction
Heating Type
Secondary Heating
Loft Insulation Depth
Additional if possible:
Heat loss calculations
EPC recommendations
Property UPRN
"""
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/southend/Southend Planned programme.xlsx",
header=0,
sheet_name="Planned RM"
)
asset_list["row_id"] = asset_list.index
asset_list["address1"] = asset_list["Address"].str.split(",").str[0]
epc_data, errors = get_data(asset_list)
# We now retrieve any failed properties
asset_list_failed = asset_list[asset_list["row_id"].isin(errors)]
epc_data_failed, _ = get_data(asset_list_failed)
# Append the failed data to the main data
epc_data.extend(epc_data_failed)
epc_df = pd.DataFrame(epc_data)
# We expand out the recommendations
recommendations_df = epc_df[["row_id", "recommendations"]]
unique_recommendations = set()
for _, row in recommendations_df.iterrows():
unique_recommendations.update([rec["improvement-summary-text"] for rec in row["recommendations"]])
columns = ["row_id"] + list(unique_recommendations)
transformed_data = []
for _, row in recommendations_df.iterrows():
# Initialize a dictionary for this row with False for all recommendations
row_data = {col: False for col in columns}
row_data["row_id"] = row["row_id"]
# Set True for each recommendation present in this row
for rec in row["recommendations"]:
recommendation_text = rec["improvement-summary-text"]
row_data[recommendation_text] = True
# Append the row data to transformed_data
transformed_data.append(row_data)
transformed_df = pd.DataFrame(transformed_data)
# Drop the column that is ""
transformed_df = transformed_df.drop(columns=[""])
# Retrieve just the data we need
epc_df = epc_df[
[
"row_id",
"uprn",
"property-type",
"built-form",
"inspection-date",
"current-energy-rating",
"current-energy-efficiency",
"roof-description",
"walls-description",
"transaction-type",
# New fields needed
"secondheat-description",
"total-floor-area",
"construction-age-band",
"floor-height",
"number-habitable-rooms",
"mainheat-description",
#
"energy-consumption-current", # kwh/m2
"photo-supply",
]
]
asset_list = asset_list.merge(
epc_df,
how="left",
on="row_id"
).merge(
transformed_df,
how="left",
on="row_id"
)
asset_list = asset_list.drop(columns=["row_id"])
# Rename the columns
asset_list = asset_list.rename(columns={
"inspection-date": "Date of last EPC",
"current-energy-efficiency": "SAP score on register",
"current-energy-rating": "EPC rating on register",
"property-type": "Property Type",
"built-form": "Archetype",
"total-floor-area": "Property Floor Area",
"construction-age-band": "Property Age Band",
"floor-height": "Property Floor Height",
"number-habitable-rooms": "Number of Habitable Rooms",
"walls-description": "Wall Construction",
"roof-description": "Roof Construction",
"mainheat-description": "Heating Type",
"secondheat-description": "Secondary Heating",
"transaction-type": "Reason for last EPC",
"energy-consumption-current": "Heat Demand (kWh/m2)",
"photo-supply": "% of the Roof with PV"
})
asset_list["Estimated Number of Floors"] = asset_list.apply(
lambda x: estimate_number_of_floors(property_type=x["Property Type"]) if not pd.isnull(
x["Property Type"]) else None, axis=1
)
asset_list["Property Floor Area"] = asset_list["Property Floor Area"].astype(float)
# Replace "" value with None
asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].replace("", None)
asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].astype(float)
asset_list["Estimated Perimeter (m)"] = asset_list.apply(
lambda x: estimate_perimeter(
floor_area=x["Property Floor Area"] / x["Estimated Number of Floors"],
num_rooms=x["Number of Habitable Rooms"] / x["Estimated Number of Floors"],
), axis=1
)
asset_list["Estimated Heat Loss Perimeter (m2)"] = asset_list.apply(
lambda x: estimate_external_wall_area(
num_floors=x["Estimated Number of Floors"],
floor_height=float(x["Property Floor Height"]) if x["Property Floor Height"] else 2.5,
perimeter=x["Estimated Perimeter (m)"],
built_form=x["Archetype"]
),
axis=1
)
asset_list["Roof Insulation Thickness"] = asset_list.apply(
lambda x: RoofAttributes(description=x["Roof Construction"]).process()["insulation_thickness"] if not pd.isnull(
x["Roof Construction"]) else None,
axis=1
)
# Store as an excel
filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/southend/southend EPC Data pull - 14 Nov "
"2024.xlsx")
asset_list.to_excel(filename, index=False)
asset_list["% of the Roof with PV"].value_counts()
asset_list[asset_list["% of the Roof with PV"] == "50.0"][["Address", "Postcode"]]

View file

@ -117,6 +117,7 @@ def extract_summary_report(pdf_path):
- Fuel Bill
- Address
"""
data = {
"Address": None,
"Postcode": None,