mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
minor
This commit is contained in:
parent
69b3ec7961
commit
2eaf19c2bb
5 changed files with 343 additions and 1 deletions
92
etl/customers/aiha/bid_numbers.py
Normal file
92
etl/customers/aiha/bid_numbers.py
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
"""
|
||||
This is an adhoc script, used to pull together some of the figures that are being included in the
|
||||
Warm Homes: Social Housing Wave 3 funding application
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
aiha_all_units = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Downloads/AIHA Measures Packages 2024_11_13.xlsx",
|
||||
sheet_name="All Properties - AIHA",
|
||||
header=2
|
||||
)
|
||||
modelled_units = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Downloads/AIHA Measures Packages 2024_11_13.xlsx",
|
||||
sheet_name="Modelled Properties - Measures",
|
||||
header=5
|
||||
)
|
||||
aiha_all_units = aiha_all_units.drop(columns=['Unnamed: 0', 'Unnamed: 1'])
|
||||
aiha_extracted_property_data = pd.read_csv(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/extracted_property_data.csv"
|
||||
)
|
||||
aiha_wave_3_units = aiha_all_units[aiha_all_units["Expected Package Cost"].astype(float) > 0]
|
||||
# TODO: The EPC C property isn't a C!
|
||||
aiha_epc_breakdown = aiha_wave_3_units["Expected EPC Rating"].replace({"D or E": "E"}).value_counts()
|
||||
# For CAHA
|
||||
caha_epc_breakdown = modelled_units[
|
||||
modelled_units['Survey Key'].str.contains("CAHA")
|
||||
]['Current EPC Rating'].value_counts()
|
||||
# For Hornsey
|
||||
hornsey_epc_breakdown = modelled_units[
|
||||
modelled_units['Survey Key'].str.contains("HORNSEY")
|
||||
]['Current EPC Rating'].value_counts()
|
||||
|
||||
aiha_original_asset_data = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/240924- KSQ & Domna Info Merge - AIHA - SHDF Wave 3 "
|
||||
"bid - Supplementary information.xlsx",
|
||||
sheet_name="Archetyping Data",
|
||||
header=2
|
||||
)
|
||||
|
||||
# Get the units in the bid:
|
||||
aiha_wave_3_features = aiha_original_asset_data[
|
||||
['Address letter or number', 'Street address', 'Postcode', "Wall type",
|
||||
"Property type", "built-form", "floor"]
|
||||
].merge(
|
||||
aiha_wave_3_units[['Address letter or number', 'Street address', 'Postcode']],
|
||||
how="inner",
|
||||
on=["Address letter or number", "Street address", "Postcode"]
|
||||
)
|
||||
|
||||
wall_type_breakdown = aiha_wave_3_features["Wall type"].value_counts()
|
||||
property_type_breakdown = aiha_wave_3_features.groupby(["Property type", "floor"]).size().reset_index()
|
||||
|
||||
# Hornsey data - contained in original asset list
|
||||
hornsey_asset_list = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/SHDF - Template - EOI - Hornsey Housing "
|
||||
"Trust.xlsx",
|
||||
sheet_name="Ksquared-All units information",
|
||||
header=3
|
||||
)
|
||||
|
||||
# We don't need the first row
|
||||
hornsey_asset_list = hornsey_asset_list.iloc[1:]
|
||||
# Fill NA values with empty strings
|
||||
hornsey_asset_list = hornsey_asset_list.fillna("")
|
||||
hornsey_asset_list["Address letter or number"] = hornsey_asset_list["Address letter or number"].astype(
|
||||
str
|
||||
).str.strip()
|
||||
hornsey_asset_list["Postcode"] = hornsey_asset_list["Postcode"].astype(str).str.strip()
|
||||
hornsey_asset_list["Street address"] = hornsey_asset_list["Street address"].astype(str).str.strip()
|
||||
# Replace double spaces
|
||||
for col in ["Address letter or number", "Street address", "Postcode"]:
|
||||
hornsey_asset_list[col] = hornsey_asset_list[col].str.replace(" ", " ")
|
||||
|
||||
hornsey_asset_list = hornsey_asset_list[hornsey_asset_list["Address letter or number"] != ""]
|
||||
|
||||
hornsey_asset_list["Wall Type Cleaned"] = np.where(
|
||||
hornsey_asset_list["Wall type"].str.contains("Cavity"),
|
||||
"Cavity",
|
||||
"Solid"
|
||||
)
|
||||
|
||||
hornsey_asset_list["Property type"].value_counts()
|
||||
|
||||
# CAHA
|
||||
caha_epc_data = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/caha_extracted_property_data.xlsx"
|
||||
)
|
||||
|
||||
caha_epc_data["property_type"].value_counts()
|
||||
caha_epc_data["wall_type"].value_counts()
|
||||
|
|
@ -92,9 +92,13 @@ def main():
|
|||
|
||||
# THis is the data we need for the AIHA project
|
||||
measures_data = extracted_surveys[
|
||||
["survey_key", "address", "postcode", "current-energy-efficiency", "current-energy-rating", "number_of_floors"]
|
||||
["survey_key", "address", "postcode", "current-energy-efficiency", "current-energy-rating",
|
||||
"number_of_floors", "walls-description", "property-type", "built-form"]
|
||||
]
|
||||
measures_data = measures_data.sort_values("survey_key", ascending=True)
|
||||
measures_data.to_csv(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/extracted_property_data.csv",
|
||||
)
|
||||
|
||||
# Note:
|
||||
# The properties will still have "Very poor" ratings for their hot water
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ from etl.epc.settings import EARLIEST_EPC_DATE
|
|||
from dotenv import load_dotenv
|
||||
from tqdm import tqdm
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc
|
||||
from backend.SearchEpc import SearchEpc
|
||||
from utils.s3 import save_csv_to_s3
|
||||
|
|
@ -46,6 +47,12 @@ def hornsey():
|
|||
|
||||
hornsey_asset_list = hornsey_asset_list[hornsey_asset_list["Address letter or number"] != ""]
|
||||
|
||||
hornsey_asset_list["Wall Type Cleaned"] = np.where(
|
||||
"Cavity" in hornsey_asset_list["Wall type"],
|
||||
"Cavity",
|
||||
"Solid"
|
||||
)
|
||||
|
||||
missed_uprns = {
|
||||
"Flat 13A Stowell House": 100021213098,
|
||||
"Flat 24 Stowell House": 100021213110,
|
||||
|
|
@ -267,6 +274,9 @@ def caha():
|
|||
"address": address,
|
||||
"postcode": home["Postcode"],
|
||||
"property_type": newest_epc["property-type"],
|
||||
"wall_type": newest_epc["walls-description"],
|
||||
"built_form": newest_epc["built-form"],
|
||||
"flat_storey_count": newest_epc['flat-storey-count'],
|
||||
}
|
||||
)
|
||||
|
||||
|
|
|
|||
235
etl/customers/southend/epc_data_pull_2024_11_14.py
Normal file
235
etl/customers/southend/epc_data_pull_2024_11_14.py
Normal file
|
|
@ -0,0 +1,235 @@
|
|||
import os
|
||||
import time
|
||||
|
||||
import pandas as pd
|
||||
from tqdm import tqdm
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from utils.s3 import read_excel_from_s3
|
||||
from backend.SearchEpc import SearchEpc
|
||||
from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
|
||||
|
||||
from recommendations.recommendation_utils import (
|
||||
estimate_perimeter,
|
||||
estimate_external_wall_area,
|
||||
estimate_number_of_floors
|
||||
)
|
||||
|
||||
load_dotenv(dotenv_path="backend/.env")
|
||||
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
|
||||
|
||||
|
||||
def get_data(asset_list):
|
||||
epc_data = []
|
||||
errors = []
|
||||
for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
|
||||
try:
|
||||
postcode = home["Postcode"]
|
||||
address1 = home["address1"].split(",")[0]
|
||||
full_address = home["Address"]
|
||||
|
||||
searcher = SearchEpc(
|
||||
address1=str(address1),
|
||||
postcode=postcode,
|
||||
auth_token=EPC_AUTH_TOKEN,
|
||||
os_api_key="",
|
||||
property_type=None,
|
||||
fast=True,
|
||||
full_address=full_address,
|
||||
max_retries=5
|
||||
)
|
||||
# Force the skipping of estimating the EPC
|
||||
searcher.ordnance_survey_client.property_type = None
|
||||
searcher.ordnance_survey_client.built_form = None
|
||||
|
||||
searcher.find_property(skip_os=True)
|
||||
if searcher.newest_epc is None:
|
||||
continue
|
||||
|
||||
# Look for EPC recommendatons
|
||||
try:
|
||||
property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"])
|
||||
except:
|
||||
property_recommendations = {"rows": []}
|
||||
|
||||
epc = {
|
||||
"row_id": home["row_id"],
|
||||
**searcher.newest_epc.copy(),
|
||||
"recommendations": property_recommendations["rows"]
|
||||
}
|
||||
|
||||
epc_data.append(epc)
|
||||
except Exception as e:
|
||||
errors.append(home["row_id"])
|
||||
time.sleep(5)
|
||||
|
||||
return epc_data, errors
|
||||
|
||||
|
||||
def app():
|
||||
"""
|
||||
This app is EPC pulling data for some properties owned by Livewest
|
||||
|
||||
Data request contents:
|
||||
Date of last EPC
|
||||
Reason for EPC
|
||||
SAP score on register
|
||||
Property Type
|
||||
Property Area
|
||||
Property Age
|
||||
Any Dimensions (HLP,PW,RH)
|
||||
Property Wall Construction
|
||||
Heating Type
|
||||
Secondary Heating
|
||||
Loft Insulation Depth
|
||||
|
||||
Additional if possible:
|
||||
Heat loss calculations
|
||||
EPC recommendations
|
||||
Property UPRN
|
||||
|
||||
"""
|
||||
asset_list = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/southend/Southend Planned programme.xlsx",
|
||||
header=0,
|
||||
sheet_name="Planned RM"
|
||||
)
|
||||
asset_list["row_id"] = asset_list.index
|
||||
asset_list["address1"] = asset_list["Address"].str.split(",").str[0]
|
||||
|
||||
epc_data, errors = get_data(asset_list)
|
||||
|
||||
# We now retrieve any failed properties
|
||||
asset_list_failed = asset_list[asset_list["row_id"].isin(errors)]
|
||||
epc_data_failed, _ = get_data(asset_list_failed)
|
||||
|
||||
# Append the failed data to the main data
|
||||
epc_data.extend(epc_data_failed)
|
||||
|
||||
epc_df = pd.DataFrame(epc_data)
|
||||
|
||||
# We expand out the recommendations
|
||||
recommendations_df = epc_df[["row_id", "recommendations"]]
|
||||
|
||||
unique_recommendations = set()
|
||||
for _, row in recommendations_df.iterrows():
|
||||
unique_recommendations.update([rec["improvement-summary-text"] for rec in row["recommendations"]])
|
||||
|
||||
columns = ["row_id"] + list(unique_recommendations)
|
||||
transformed_data = []
|
||||
for _, row in recommendations_df.iterrows():
|
||||
# Initialize a dictionary for this row with False for all recommendations
|
||||
row_data = {col: False for col in columns}
|
||||
row_data["row_id"] = row["row_id"]
|
||||
|
||||
# Set True for each recommendation present in this row
|
||||
for rec in row["recommendations"]:
|
||||
recommendation_text = rec["improvement-summary-text"]
|
||||
row_data[recommendation_text] = True
|
||||
|
||||
# Append the row data to transformed_data
|
||||
transformed_data.append(row_data)
|
||||
|
||||
transformed_df = pd.DataFrame(transformed_data)
|
||||
# Drop the column that is ""
|
||||
transformed_df = transformed_df.drop(columns=[""])
|
||||
|
||||
# Retrieve just the data we need
|
||||
epc_df = epc_df[
|
||||
[
|
||||
"row_id",
|
||||
"uprn",
|
||||
"property-type",
|
||||
"built-form",
|
||||
"inspection-date",
|
||||
"current-energy-rating",
|
||||
"current-energy-efficiency",
|
||||
"roof-description",
|
||||
"walls-description",
|
||||
"transaction-type",
|
||||
# New fields needed
|
||||
"secondheat-description",
|
||||
"total-floor-area",
|
||||
"construction-age-band",
|
||||
"floor-height",
|
||||
"number-habitable-rooms",
|
||||
"mainheat-description",
|
||||
#
|
||||
"energy-consumption-current", # kwh/m2
|
||||
"photo-supply",
|
||||
]
|
||||
]
|
||||
|
||||
asset_list = asset_list.merge(
|
||||
epc_df,
|
||||
how="left",
|
||||
on="row_id"
|
||||
).merge(
|
||||
transformed_df,
|
||||
how="left",
|
||||
on="row_id"
|
||||
)
|
||||
|
||||
asset_list = asset_list.drop(columns=["row_id"])
|
||||
|
||||
# Rename the columns
|
||||
asset_list = asset_list.rename(columns={
|
||||
"inspection-date": "Date of last EPC",
|
||||
"current-energy-efficiency": "SAP score on register",
|
||||
"current-energy-rating": "EPC rating on register",
|
||||
"property-type": "Property Type",
|
||||
"built-form": "Archetype",
|
||||
"total-floor-area": "Property Floor Area",
|
||||
"construction-age-band": "Property Age Band",
|
||||
"floor-height": "Property Floor Height",
|
||||
"number-habitable-rooms": "Number of Habitable Rooms",
|
||||
"walls-description": "Wall Construction",
|
||||
"roof-description": "Roof Construction",
|
||||
"mainheat-description": "Heating Type",
|
||||
"secondheat-description": "Secondary Heating",
|
||||
"transaction-type": "Reason for last EPC",
|
||||
"energy-consumption-current": "Heat Demand (kWh/m2)",
|
||||
"photo-supply": "% of the Roof with PV"
|
||||
})
|
||||
|
||||
asset_list["Estimated Number of Floors"] = asset_list.apply(
|
||||
lambda x: estimate_number_of_floors(property_type=x["Property Type"]) if not pd.isnull(
|
||||
x["Property Type"]) else None, axis=1
|
||||
)
|
||||
|
||||
asset_list["Property Floor Area"] = asset_list["Property Floor Area"].astype(float)
|
||||
# Replace "" value with None
|
||||
asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].replace("", None)
|
||||
asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].astype(float)
|
||||
|
||||
asset_list["Estimated Perimeter (m)"] = asset_list.apply(
|
||||
lambda x: estimate_perimeter(
|
||||
floor_area=x["Property Floor Area"] / x["Estimated Number of Floors"],
|
||||
num_rooms=x["Number of Habitable Rooms"] / x["Estimated Number of Floors"],
|
||||
), axis=1
|
||||
)
|
||||
|
||||
asset_list["Estimated Heat Loss Perimeter (m2)"] = asset_list.apply(
|
||||
lambda x: estimate_external_wall_area(
|
||||
num_floors=x["Estimated Number of Floors"],
|
||||
floor_height=float(x["Property Floor Height"]) if x["Property Floor Height"] else 2.5,
|
||||
perimeter=x["Estimated Perimeter (m)"],
|
||||
built_form=x["Archetype"]
|
||||
),
|
||||
axis=1
|
||||
)
|
||||
|
||||
asset_list["Roof Insulation Thickness"] = asset_list.apply(
|
||||
lambda x: RoofAttributes(description=x["Roof Construction"]).process()["insulation_thickness"] if not pd.isnull(
|
||||
x["Roof Construction"]) else None,
|
||||
axis=1
|
||||
)
|
||||
|
||||
# Store as an excel
|
||||
filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/southend/southend EPC Data pull - 14 Nov "
|
||||
"2024.xlsx")
|
||||
asset_list.to_excel(filename, index=False)
|
||||
|
||||
asset_list["% of the Roof with PV"].value_counts()
|
||||
|
||||
asset_list[asset_list["% of the Roof with PV"] == "50.0"][["Address", "Postcode"]]
|
||||
|
|
@ -117,6 +117,7 @@ def extract_summary_report(pdf_path):
|
|||
- Fuel Bill
|
||||
- Address
|
||||
"""
|
||||
|
||||
data = {
|
||||
"Address": None,
|
||||
"Postcode": None,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue