adding in new kwh client

This commit is contained in:
Khalim Conn-Kowlessar 2024-08-09 12:29:20 +01:00
parent c9720cd78c
commit f3c53847bf
4 changed files with 91 additions and 43 deletions

View file

@ -585,14 +585,14 @@ class Property:
def get_components(
self,
cleaned,
energy_consumption_client,
kwh_client,
kwh_predictions
):
"""
Given the cleaning that has been performed, we'll use this to identify the property
components, from roof to walls to windows, heating and hot water
:param cleaned: This is the dictionary of components found in cleaner.cleaned
:param energy_consumption_client: The client that will be used to convert the energy costs to today's costs
:param kwh_client: The client that will be used to convert the energy costs to today's costs
:param kwh_predictions: Contains the kwh predictions for heating and hot water
:return:
"""
@ -658,7 +658,7 @@ class Property:
self.set_windows_count()
self.set_energy_source()
self.find_energy_sources()
self.set_current_energy_bill(energy_consumption_client, kwh_predictions)
self.set_current_energy_bill(kwh_client, kwh_predictions)
def set_solar_panel_configuration(
self, solar_panel_configuration, roof_area
@ -671,7 +671,7 @@ class Property:
# We also set the roof area
self.roof_area = roof_area
def set_current_energy_bill(self, energy_consumption_client, kwh_predictions):
def set_current_energy_bill(self, kwh_client, kwh_predictions):
"""
Given what we know about the property now, estimates the current energy consumption using the UCL paper
https://www.sciencedirect.com/science/article/pii/S0378778823002542
@ -683,7 +683,7 @@ class Property:
# 2) Predicted KwH
# Today's costs
todays_lighting_cost = energy_consumption_client.convert_cost_to_today(
todays_lighting_cost = kwh_client.convert_cost_to_today(
original_cost=float(self.data["lighting-cost-current"]),
lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"]).tz_localize(None)
)

View file

@ -431,9 +431,11 @@ async def trigger_plan(body: PlanTriggerRequest):
environment=get_settings().ENVIRONMENT
)
kwh_client = KwhData(bucket=get_settings().DATA_BUCKET, read_consumption_data=True)
model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
epcs_for_scoring = KwhData().transform(data=KwhData().prepare_epc(input_properties), cleaned=cleaned)
epcs_for_scoring = kwh_client.transform(data=kwh_client.prepare_epc(input_properties), cleaned=cleaned)
kwh_predictions = model_api.predict_all(
df=epcs_for_scoring,
@ -444,14 +446,13 @@ async def trigger_plan(body: PlanTriggerRequest):
)
# Insert the spatial data
logger.info("Getting spatial data")
input_properties = OpenUprnClient.set_spatial_data(input_properties, bucket_name=get_settings().DATA_BUCKET)
logger.info("Getting spatial data")
logger.info("Setting property components")
for p in tqdm(input_properties):
p.get_components(
cleaned=cleaned,
energy_consumption_client=energy_consumption_client,
kwh_predictions=kwh_predictions
cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=kwh_predictions
)
logger.info("Performing solar analysis")

View file

@ -126,37 +126,6 @@ class EnergyConsumptionModel:
self.retail_price_comparison = pd.DataFrame(data_rows, columns=header)
self.retail_price_comparison['Date'] = pd.to_datetime(self.retail_price_comparison['Date'], errors='coerce')
def convert_cost_to_today(self, original_cost, lodgement_date):
"""
Given energy costs in an EPC, this function converts that energy cost to a figure based on today's energy costs
(or as close to today as possible)
:param original_cost: The original energy cost
:param lodgement_date: The date the EPC was lodged
:return:
"""
closest_date = self.retail_price_comparison.iloc[
(self.retail_price_comparison['Date'] - lodgement_date).abs().argsort()[:1]
]['Date'].values[0]
closest_date = pd.Timestamp(closest_date)
# Extract the tariff price on the closest date
tariff_2024 = self.retail_price_comparison[
self.retail_price_comparison['Date'] == closest_date
]['Average standard variable tariff (Large legacy suppliers)'].values[0]
# Extract the latest available tariff price
latest_tariff = self.retail_price_comparison[
'Average standard variable tariff (Large legacy suppliers)'
].iloc[-1]
# Calculate the ratio
ratio = float(latest_tariff) / float(tariff_2024)
# Calculate the updated heating cost
updated_cost = original_cost * ratio
return updated_cost
def read_dataset(self, file_path):
"""Reads the dataset from the specified file path."""
logger.info(f"Reading dataset from {file_path}")

View file

@ -4,7 +4,10 @@ import numpy as np
from datetime import datetime
from tqdm import tqdm
from utils.logger import setup_logger
from utils.s3 import list_files_in_s3_folder, read_pickle_from_s3, save_dataframe_to_s3_parquet
from utils.s3 import (
list_files_in_s3_folder, read_pickle_from_s3, save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet,
read_csv_from_s3
)
from backend.Property import Property
logger = setup_logger()
@ -30,7 +33,7 @@ class KwhData:
'heating-cost-potential', 'hot-water-cost-current', 'current-energy-efficiency'
]
def __init__(self, bucket=None):
def __init__(self, bucket=None, read_consumption_data=False):
self.run_date = datetime.now().strftime("%Y-%m-%d")
self.bucket = bucket
self.data = None
@ -39,6 +42,50 @@ class KwhData:
self.consumption_averages_filepath = None
self.model_training_data_filepath = None
self.consumption_averages = None
self.retail_price_comparison = None
if read_consumption_data:
self.get_consumption_data()
self.read_retail_price_comparison()
def get_consumption_data(self):
# Look for the latest version of this file
s3_contents = list_files_in_s3_folder(bucket_name=self.bucket, folder_name="energy_consumption/")
consumption_averages = [
{"run_date": pd.to_datetime(x.split("/")[1]), "filepath": x}
for x in s3_contents if "consumption_averages.parquet" in x
]
# Get the file with the soonest run date
consumption_averages = sorted(consumption_averages, key=lambda x: x["run_date"])
if not consumption_averages:
raise ValueError("No consumption averages data found, something went wrong")
self.consumption_averages = read_dataframe_from_s3_parquet(
bucket_name=self.bucket,
file_key=consumption_averages[-1]["filepath"]
)
def read_retail_price_comparison(self):
data = read_csv_from_s3(
bucket_name=self.bucket,
filepath="energy_consumption/retail-price-comparison.csv"
)
header = ['Date', 'Average standard variable tariff (Large legacy suppliers)',
'Average standard variable tariff (Other suppliers)', 'Average fixed tariff',
'Cheapest tariff (Large legacy suppliers)', 'Cheapest tariff (All suppliers)',
'Cheapest tariff (Basket)', 'Default tariff cap level']
# Extract data rows
data_rows = []
for row in data[1:]:
date = row['\ufeff"']
values = row[None]
data_rows.append([date] + values)
self.retail_price_comparison = pd.DataFrame(data_rows, columns=header)
self.retail_price_comparison['Date'] = pd.to_datetime(self.retail_price_comparison['Date'], errors='coerce')
@staticmethod
def extract_kwh_value(text: str):
"""
@ -282,3 +329,34 @@ class KwhData:
scoring_data["id"] = scoring_data["uprn"].copy()
return scoring_data
def convert_cost_to_today(self, original_cost, lodgement_date):
"""
Given energy costs in an EPC, this function converts that energy cost to a figure based on today's energy costs
(or as close to today as possible)
:param original_cost: The original energy cost
:param lodgement_date: The date the EPC was lodged
:return:
"""
closest_date = self.retail_price_comparison.iloc[
(self.retail_price_comparison['Date'] - lodgement_date).abs().argsort()[:1]
]['Date'].values[0]
closest_date = pd.Timestamp(closest_date)
# Extract the tariff price on the closest date
tariff_2024 = self.retail_price_comparison[
self.retail_price_comparison['Date'] == closest_date
]['Average standard variable tariff (Large legacy suppliers)'].values[0]
# Extract the latest available tariff price
latest_tariff = self.retail_price_comparison[
'Average standard variable tariff (Large legacy suppliers)'
].iloc[-1]
# Calculate the ratio
ratio = float(latest_tariff) / float(tariff_2024)
# Calculate the updated heating cost
updated_cost = original_cost * ratio
return updated_cost