mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Data pulled for cavity abs estimates
This commit is contained in:
parent
e7eb9b7aed
commit
ba0d5e1473
4 changed files with 183 additions and 11 deletions
145
asset_list/abs_estimates.py
Normal file
145
asset_list/abs_estimates.py
Normal file
|
|
@ -0,0 +1,145 @@
|
|||
"""
|
||||
Simple script to take a standardised asset list and calculate the abs. We'll use this code to estimate
|
||||
the ABS for properties, going forward
|
||||
"""
|
||||
import os
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from dotenv import load_dotenv
|
||||
from etl.find_my_epc.AssetListEpcData import AssetListEpcData
|
||||
from backend.Funding import Funding
|
||||
from backend.app.utils import sap_to_epc
|
||||
|
||||
load_dotenv(dotenv_path="backend/.env")
|
||||
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
|
||||
|
||||
asset_list = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/Thrive Programme - reconciled.xlsx",
|
||||
sheet_name="Cavity properties - for review"
|
||||
)
|
||||
|
||||
abs_matrix = pd.read_csv(
|
||||
"/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv"
|
||||
)
|
||||
pps_matrix = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Downloads/ECO4 Partial Project Scores Matrix v5.xlsx",
|
||||
header=1
|
||||
)
|
||||
pps_matrix.columns = [c.strip() for c in pps_matrix.columns]
|
||||
|
||||
# We need to estimate the number of points the work will produce and the finishing band. For this, we assume 7 for
|
||||
# cavity and 15 for solar. We'll be more specific in the future, but for now, this is a good enough estimate.
|
||||
cavity_route = asset_list[["domna_address_1", "domna_postcode", "epc_os_uprn"]].rename(
|
||||
columns={"domna_address_1": "address", "domna_postcode": "postcode", "epc_os_uprn": "upr"}
|
||||
)
|
||||
cavity_route["address"] = cavity_route["address"].astype(str)
|
||||
|
||||
asset_list_epc_client = AssetListEpcData(
|
||||
asset_list=cavity_route,
|
||||
epc_auth_token=EPC_AUTH_TOKEN
|
||||
)
|
||||
|
||||
asset_list_epc_client.get_data()
|
||||
asset_list_epc_client.get_non_invasive_recommendations()
|
||||
|
||||
cwi_sap_points = []
|
||||
for r in asset_list_epc_client.non_invasive_recommendations:
|
||||
if not r.get("recommendations"):
|
||||
continue
|
||||
cwi_recommendations = [
|
||||
x for x in r["recommendations"] if "cavity_wall_insulation" in x["type"]
|
||||
]
|
||||
if cwi_recommendations:
|
||||
cwi_recommendations = cwi_recommendations[0]
|
||||
else:
|
||||
continue
|
||||
|
||||
address = r["address"]
|
||||
postcode = r["postcode"]
|
||||
|
||||
cwi_sap_points.append(
|
||||
{
|
||||
"address": address,
|
||||
"postcode": postcode,
|
||||
"sap_points": cwi_recommendations["sap_points"]
|
||||
}
|
||||
)
|
||||
|
||||
cwi_sap_points = pd.DataFrame(cwi_sap_points)
|
||||
# Store the sap points in the cavity route to csv
|
||||
# cwi_sap_points.to_csv(
|
||||
# "/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/cwi_sap_points_livewest_sw.csv",
|
||||
# index=False
|
||||
# )
|
||||
# cwi_sap_points = pd.read_csv(
|
||||
# "/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/cwi_sap_points_livewest_sw.csv"
|
||||
# )
|
||||
avg_cwi_points_by_postcode = cwi_sap_points.groupby(["postcode"]).agg({"sap_points": "mean"}).reset_index()
|
||||
avg_cwi_points = cwi_sap_points["sap_points"].median()
|
||||
asset_list = asset_list.merge(
|
||||
cwi_sap_points, how="left", left_on=["domna_address_1", "domna_postcode"], right_on=["address", "postcode"]
|
||||
).drop(
|
||||
columns=["address", "postcode"]
|
||||
)
|
||||
|
||||
# Fill the sap points with the average cwi points
|
||||
asset_list = asset_list.merge(
|
||||
avg_cwi_points_by_postcode.rename(columns={"postcode": "domna_postcode"}),
|
||||
how="left", on=["domna_postcode"], suffixes=("", "_avg")
|
||||
)
|
||||
asset_list["sap_points"] = asset_list["sap_points"].fillna(asset_list["sap_points_avg"])
|
||||
asset_list.drop(columns=["sap_points_avg"], inplace=True)
|
||||
|
||||
asset_list["sap_points"] = asset_list["sap_points"].fillna(avg_cwi_points)
|
||||
asset_list["post_works_sap"] = asset_list["epc_sap_score_on_register"] + asset_list["sap_points"]
|
||||
asset_list["post_works_epc"] = asset_list["post_works_sap"].apply(lambda x: sap_to_epc(x))
|
||||
asset_list["starting_half_band"] = asset_list["epc_sap_score_on_register"].apply(lambda x: Funding.get_sap_band(x))
|
||||
asset_list["ending_half_band"] = asset_list["post_works_sap"].apply(lambda x: Funding.get_sap_band(x))
|
||||
asset_list["floor_area_band"] = asset_list["epc_total_floor_area"].apply(lambda x: Funding.get_floor_area_band(x))
|
||||
|
||||
asset_list["funding_scheme"] = np.where(
|
||||
(
|
||||
(asset_list["post_works_epc"] == asset_list["epc_rating_on_register"])
|
||||
),
|
||||
"GBIS",
|
||||
"ECO4"
|
||||
)
|
||||
asset_list = asset_list.merge(
|
||||
abs_matrix, how="left", left_on=["starting_half_band", "ending_half_band", "floor_area_band"],
|
||||
right_on=['Starting Band', 'Finishing Band', 'Floor Area Segment', ]
|
||||
)
|
||||
asset_list = asset_list.drop(columns=['Starting Band', 'Finishing Band', 'Floor Area Segment'])
|
||||
|
||||
# Using CWI solid 1.7 -> 0.3 rates
|
||||
cwi_pps_matrix = pps_matrix[
|
||||
pps_matrix["Measure_Type"].isin(["CWI_0.033"])
|
||||
]
|
||||
# Merge on
|
||||
asset_list = asset_list.merge(
|
||||
cwi_pps_matrix[['Starting Band', 'Total Floor Area Band', 'Cost Savings']].rename(
|
||||
columns={
|
||||
"Cost Savings": "partial_project_score",
|
||||
"Starting Band": "starting_half_band",
|
||||
"Total Floor Area Band": "floor_area_band"
|
||||
}
|
||||
),
|
||||
how="left",
|
||||
on=["starting_half_band", "floor_area_band"],
|
||||
)
|
||||
asset_list["partial_project_score"] = np.where(
|
||||
(asset_list["epc_sap_score_on_register"] > 69),
|
||||
None,
|
||||
asset_list["partial_project_score"]
|
||||
)
|
||||
|
||||
asset_list["funding_abs"] = np.where(
|
||||
asset_list["funding_scheme"] == "GBIS",
|
||||
asset_list["partial_project_score"],
|
||||
asset_list["Cost Savings"]
|
||||
)
|
||||
|
||||
# Store this data
|
||||
asset_list.to_csv(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Instagroup Review/thrive_abs_estimates.csv",
|
||||
index=False
|
||||
)
|
||||
|
|
@ -1,3 +1,4 @@
|
|||
import random
|
||||
import time
|
||||
import pandas as pd
|
||||
from tqdm import tqdm
|
||||
|
|
@ -50,7 +51,7 @@ class AssetListEpcData:
|
|||
"uprn": r.get("uprn"),
|
||||
"address": r["address"],
|
||||
"postcode": r["postcode"],
|
||||
"recommendations": r["recommendations"]
|
||||
"recommendations": r.get("recommendations")
|
||||
} for r in self.extracted_data
|
||||
]
|
||||
|
||||
|
|
@ -106,12 +107,31 @@ class AssetListEpcData:
|
|||
logger.error(f"Error retrieving find my epc data: {e}")
|
||||
if not pd.isnull(home.get("patch")):
|
||||
epc_searcher.newest_epc["address1"] = add1
|
||||
find_epc_searcher = RetrieveFindMyEpc(
|
||||
address=epc_searcher.newest_epc["address1"],
|
||||
postcode=epc_searcher.newest_epc["postcode"]
|
||||
)
|
||||
find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
|
||||
time.sleep(0.5)
|
||||
|
||||
try:
|
||||
find_epc_searcher = RetrieveFindMyEpc(
|
||||
address=epc_searcher.newest_epc["address1"],
|
||||
postcode=epc_searcher.newest_epc["postcode"]
|
||||
)
|
||||
find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
|
||||
except Exception as e:
|
||||
logger.error("Error retrieving find my epc data with alternative address format: {e}")
|
||||
find_epc_data = {
|
||||
"current_epc_rating": epc_searcher.newest_epc["current-energy-rating"],
|
||||
"current_epc_efficiency": epc_searcher.newest_epc["current-energy-efficiency"],
|
||||
"potential_epc_rating": None,
|
||||
"potential_epc_efficiency": None,
|
||||
"epc_data": {}
|
||||
}
|
||||
|
||||
# Sleep for a random amount of time between 0.5 and 1 seconds to avoid hitting the API rate limit
|
||||
time.sleep(random.sample(range(50, 100), 1)[0] / 100)
|
||||
|
||||
# Every 50 requests, we sleep for 10 seconds to avoid hitting the API rate limit
|
||||
if len(extracted_data) % 50 == 0 and len(extracted_data) > 0:
|
||||
logger.info("Sleeping for 10 seconds to avoid hitting API rate limit")
|
||||
time.sleep(10)
|
||||
|
||||
# We need uprn
|
||||
|
||||
to_append = {
|
||||
|
|
|
|||
|
|
@ -56,9 +56,11 @@ class RetrieveFindMyEpc:
|
|||
results = {}
|
||||
|
||||
# 1. Total floor area
|
||||
results['total-floor-area'] = int(self.get_text(
|
||||
# We have some isntances of very old EPCs where the total floor area is not available
|
||||
tfa = self.get_text(
|
||||
soup.find("dt", string="Total floor area").find_next_sibling("dd")
|
||||
).split(" ")[0])
|
||||
).split(" ")[0]
|
||||
results['total-floor-area'] = int(tfa) if tfa != "Not" else None
|
||||
|
||||
# Table with features
|
||||
rows = soup.select("table.govuk-table tbody tr")
|
||||
|
|
@ -387,7 +389,9 @@ class RetrieveFindMyEpc:
|
|||
extracted_address = address_tag.text.strip()
|
||||
extracted_address_url = address_tag['href']
|
||||
|
||||
extracted_address_cleaned = extracted_address.replace(",", "").replace(" ", "").lower()
|
||||
extracted_address_cleaned = (
|
||||
extracted_address.replace(",", "").replace(" ", "").lower()
|
||||
)
|
||||
if not extracted_address_cleaned.startswith(self.address_cleaned):
|
||||
continue
|
||||
|
||||
|
|
@ -667,6 +671,9 @@ class RetrieveFindMyEpc:
|
|||
"Condensing boiler (separate from the range cooker)": ["boiler_upgrade"],
|
||||
"Heating controls (programmer and thermostatic radiator valves)": [
|
||||
"roomstat_programmer_trvs", "time_temperature_zone_control"
|
||||
],
|
||||
'Heating controls (programmer room thermostat and thermostatic radiator valves)': [
|
||||
"roomstat_programmer_trvs", "time_temperature_zone_control"
|
||||
]
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ birmingham_epcs = birmingham_epcs.sort_values(
|
|||
birmingham_epcs["postal_region"] = birmingham_epcs["POSTCODE"].str.split(" ").str[0]
|
||||
|
||||
addressable_market = birmingham_epcs[
|
||||
(birmingham_epcs['CURRENT_ENERGY_RATING'].isin(['F', 'G', 'E'])) &
|
||||
(birmingham_epcs['CURRENT_ENERGY_RATING'].isin(['F', 'G', 'E', 'D'])) &
|
||||
(birmingham_epcs['LODGEMENT_DATE'] >= '2020-01-01') &
|
||||
(birmingham_epcs['PROPERTY_TYPE'].isin(['House', 'Bungalow'])) &
|
||||
(birmingham_epcs['TENURE'].isin(
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue