Data pulled for cavity abs estimates

This commit is contained in:
Khalim Conn-Kowlessar 2025-06-25 20:43:40 +01:00
parent e7eb9b7aed
commit ba0d5e1473
4 changed files with 183 additions and 11 deletions

145
asset_list/abs_estimates.py Normal file
View file

@ -0,0 +1,145 @@
"""
Simple script to take a standardised asset list and calculate the abs. We'll use this code to estimate
the ABS for properties, going forward
"""
import os
import pandas as pd
import numpy as np
from dotenv import load_dotenv
from etl.find_my_epc.AssetListEpcData import AssetListEpcData
from backend.Funding import Funding
from backend.app.utils import sap_to_epc
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/Thrive Programme - reconciled.xlsx",
sheet_name="Cavity properties - for review"
)
abs_matrix = pd.read_csv(
"/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv"
)
pps_matrix = pd.read_excel(
"/Users/khalimconn-kowlessar/Downloads/ECO4 Partial Project Scores Matrix v5.xlsx",
header=1
)
pps_matrix.columns = [c.strip() for c in pps_matrix.columns]
# We need to estimate the number of points the work will produce and the finishing band. For this, we assume 7 for
# cavity and 15 for solar. We'll be more specific in the future, but for now, this is a good enough estimate.
cavity_route = asset_list[["domna_address_1", "domna_postcode", "epc_os_uprn"]].rename(
columns={"domna_address_1": "address", "domna_postcode": "postcode", "epc_os_uprn": "upr"}
)
cavity_route["address"] = cavity_route["address"].astype(str)
asset_list_epc_client = AssetListEpcData(
asset_list=cavity_route,
epc_auth_token=EPC_AUTH_TOKEN
)
asset_list_epc_client.get_data()
asset_list_epc_client.get_non_invasive_recommendations()
cwi_sap_points = []
for r in asset_list_epc_client.non_invasive_recommendations:
if not r.get("recommendations"):
continue
cwi_recommendations = [
x for x in r["recommendations"] if "cavity_wall_insulation" in x["type"]
]
if cwi_recommendations:
cwi_recommendations = cwi_recommendations[0]
else:
continue
address = r["address"]
postcode = r["postcode"]
cwi_sap_points.append(
{
"address": address,
"postcode": postcode,
"sap_points": cwi_recommendations["sap_points"]
}
)
cwi_sap_points = pd.DataFrame(cwi_sap_points)
# Store the sap points in the cavity route to csv
# cwi_sap_points.to_csv(
# "/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/cwi_sap_points_livewest_sw.csv",
# index=False
# )
# cwi_sap_points = pd.read_csv(
# "/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/cwi_sap_points_livewest_sw.csv"
# )
avg_cwi_points_by_postcode = cwi_sap_points.groupby(["postcode"]).agg({"sap_points": "mean"}).reset_index()
avg_cwi_points = cwi_sap_points["sap_points"].median()
asset_list = asset_list.merge(
cwi_sap_points, how="left", left_on=["domna_address_1", "domna_postcode"], right_on=["address", "postcode"]
).drop(
columns=["address", "postcode"]
)
# Fill the sap points with the average cwi points
asset_list = asset_list.merge(
avg_cwi_points_by_postcode.rename(columns={"postcode": "domna_postcode"}),
how="left", on=["domna_postcode"], suffixes=("", "_avg")
)
asset_list["sap_points"] = asset_list["sap_points"].fillna(asset_list["sap_points_avg"])
asset_list.drop(columns=["sap_points_avg"], inplace=True)
asset_list["sap_points"] = asset_list["sap_points"].fillna(avg_cwi_points)
asset_list["post_works_sap"] = asset_list["epc_sap_score_on_register"] + asset_list["sap_points"]
asset_list["post_works_epc"] = asset_list["post_works_sap"].apply(lambda x: sap_to_epc(x))
asset_list["starting_half_band"] = asset_list["epc_sap_score_on_register"].apply(lambda x: Funding.get_sap_band(x))
asset_list["ending_half_band"] = asset_list["post_works_sap"].apply(lambda x: Funding.get_sap_band(x))
asset_list["floor_area_band"] = asset_list["epc_total_floor_area"].apply(lambda x: Funding.get_floor_area_band(x))
asset_list["funding_scheme"] = np.where(
(
(asset_list["post_works_epc"] == asset_list["epc_rating_on_register"])
),
"GBIS",
"ECO4"
)
asset_list = asset_list.merge(
abs_matrix, how="left", left_on=["starting_half_band", "ending_half_band", "floor_area_band"],
right_on=['Starting Band', 'Finishing Band', 'Floor Area Segment', ]
)
asset_list = asset_list.drop(columns=['Starting Band', 'Finishing Band', 'Floor Area Segment'])
# Using CWI solid 1.7 -> 0.3 rates
cwi_pps_matrix = pps_matrix[
pps_matrix["Measure_Type"].isin(["CWI_0.033"])
]
# Merge on
asset_list = asset_list.merge(
cwi_pps_matrix[['Starting Band', 'Total Floor Area Band', 'Cost Savings']].rename(
columns={
"Cost Savings": "partial_project_score",
"Starting Band": "starting_half_band",
"Total Floor Area Band": "floor_area_band"
}
),
how="left",
on=["starting_half_band", "floor_area_band"],
)
asset_list["partial_project_score"] = np.where(
(asset_list["epc_sap_score_on_register"] > 69),
None,
asset_list["partial_project_score"]
)
asset_list["funding_abs"] = np.where(
asset_list["funding_scheme"] == "GBIS",
asset_list["partial_project_score"],
asset_list["Cost Savings"]
)
# Store this data
asset_list.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/thrive_abs_estimates.csv",
index=False
)

View file

@ -1,3 +1,4 @@
import random
import time
import pandas as pd
from tqdm import tqdm
@ -50,7 +51,7 @@ class AssetListEpcData:
"uprn": r.get("uprn"),
"address": r["address"],
"postcode": r["postcode"],
"recommendations": r["recommendations"]
"recommendations": r.get("recommendations")
} for r in self.extracted_data
]
@ -106,12 +107,31 @@ class AssetListEpcData:
logger.error(f"Error retrieving find my epc data: {e}")
if not pd.isnull(home.get("patch")):
epc_searcher.newest_epc["address1"] = add1
find_epc_searcher = RetrieveFindMyEpc(
address=epc_searcher.newest_epc["address1"],
postcode=epc_searcher.newest_epc["postcode"]
)
find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
time.sleep(0.5)
try:
find_epc_searcher = RetrieveFindMyEpc(
address=epc_searcher.newest_epc["address1"],
postcode=epc_searcher.newest_epc["postcode"]
)
find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
except Exception as e:
logger.error("Error retrieving find my epc data with alternative address format: {e}")
find_epc_data = {
"current_epc_rating": epc_searcher.newest_epc["current-energy-rating"],
"current_epc_efficiency": epc_searcher.newest_epc["current-energy-efficiency"],
"potential_epc_rating": None,
"potential_epc_efficiency": None,
"epc_data": {}
}
# Sleep for a random amount of time between 0.5 and 1 seconds to avoid hitting the API rate limit
time.sleep(random.sample(range(50, 100), 1)[0] / 100)
# Every 50 requests, we sleep for 10 seconds to avoid hitting the API rate limit
if len(extracted_data) % 50 == 0 and len(extracted_data) > 0:
logger.info("Sleeping for 10 seconds to avoid hitting API rate limit")
time.sleep(10)
# We need uprn
to_append = {

View file

@ -56,9 +56,11 @@ class RetrieveFindMyEpc:
results = {}
# 1. Total floor area
results['total-floor-area'] = int(self.get_text(
# We have some isntances of very old EPCs where the total floor area is not available
tfa = self.get_text(
soup.find("dt", string="Total floor area").find_next_sibling("dd")
).split(" ")[0])
).split(" ")[0]
results['total-floor-area'] = int(tfa) if tfa != "Not" else None
# Table with features
rows = soup.select("table.govuk-table tbody tr")
@ -387,7 +389,9 @@ class RetrieveFindMyEpc:
extracted_address = address_tag.text.strip()
extracted_address_url = address_tag['href']
extracted_address_cleaned = extracted_address.replace(",", "").replace(" ", "").lower()
extracted_address_cleaned = (
extracted_address.replace(",", "").replace(" ", "").lower()
)
if not extracted_address_cleaned.startswith(self.address_cleaned):
continue
@ -667,6 +671,9 @@ class RetrieveFindMyEpc:
"Condensing boiler (separate from the range cooker)": ["boiler_upgrade"],
"Heating controls (programmer and thermostatic radiator valves)": [
"roomstat_programmer_trvs", "time_temperature_zone_control"
],
'Heating controls (programmer room thermostat and thermostatic radiator valves)': [
"roomstat_programmer_trvs", "time_temperature_zone_control"
]
}

View file

@ -21,7 +21,7 @@ birmingham_epcs = birmingham_epcs.sort_values(
birmingham_epcs["postal_region"] = birmingham_epcs["POSTCODE"].str.split(" ").str[0]
addressable_market = birmingham_epcs[
(birmingham_epcs['CURRENT_ENERGY_RATING'].isin(['F', 'G', 'E'])) &
(birmingham_epcs['CURRENT_ENERGY_RATING'].isin(['F', 'G', 'E', 'D'])) &
(birmingham_epcs['LODGEMENT_DATE'] >= '2020-01-01') &
(birmingham_epcs['PROPERTY_TYPE'].isin(['House', 'Bungalow'])) &
(birmingham_epcs['TENURE'].isin(