Completed set up of bills costs

This commit is contained in:
Khalim Conn-Kowlessar 2024-07-09 14:03:21 +01:00
parent 5274b9b713
commit 4c79abe512
3 changed files with 115 additions and 44 deletions

View file

@ -643,14 +643,24 @@ class Property:
https://www.sciencedirect.com/science/article/pii/S0378778823002542
:return:
"""
# We should adjust the costs first and then calculate the energy consumption
# These are the costs at the time that the EPC was created - we need a cost based on today's prices so
# we do the following:
# 1) Score the models to get kwh for heating and hot water
# 2) Adjust the kwh values with the UCL paper
# 3) Convert the kwh values to costs today using current prices
#
# For the moment, we'll just use the same cost for lighting, since the value is quite low
# We get the following things:
# 1) Today's cost. This give us a basline figure for what the cost is today
# 2) Predicted KwH
# Today's costs
todays_heating_cost = energy_consumption_client.convert_cost_to_today(
original_cost=float(self.data["heating-cost-current"]),
lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"])
)
todays_hot_water_cost = energy_consumption_client.convert_cost_to_today(
original_cost=float(self.data["hot-water-cost-current"]),
lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"])
)
todays_lighting_cost = energy_consumption_client.convert_cost_to_today(
original_cost=float(self.data["lighting-cost-current"]),
lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"])
)
scoring_df = pd.DataFrame([self.epc_record.prepared_epc])
# Change columns from underscores to hyphens
@ -694,33 +704,34 @@ class Property:
current_epc_rating=self.data["current-energy-rating"],
)
# Convert to cost
if self.heating_energy_source == "Electricity":
adjusted_heating_cost = adjusted_heating_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP
unadjusted_heating_cost = heating_prediction * AnnualBillSavings.ELECTRICITY_PRICE_CAP
elif self.heating_energy_source == "Natural Gas":
adjusted_heating_cost = adjusted_heating_kwh * AnnualBillSavings.GAS_PRICE_CAP
unadjusted_heating_cost = heating_prediction * AnnualBillSavings.GAS_PRICE_CAP
else:
raise NotImplementedError("Not implemented cost for this fuel type")
# Adjust today's cost figures with the UCL model
adjusted_heating_cost = AnnualBillSavings.adjust_energy_cost_to_metered(
epc_energy_cost=todays_heating_cost,
current_epc_rating=self.data["current-energy-rating"],
)
if self.hot_water_energy_source == "Electricity":
adjusted_hot_water_cost = adjusted_hot_water_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP
unadjusted_hot_water_cost = hot_water_prediction * AnnualBillSavings.ELECTRICITY_PRICE_CAP
elif self.hot_water_energy_source == "Natural Gas":
adjusted_hot_water_cost = adjusted_hot_water_kwh * AnnualBillSavings.GAS_PRICE_CAP
unadjusted_hot_water_cost = hot_water_prediction * AnnualBillSavings.GAS_PRICE_CAP
else:
raise NotImplementedError("Not implemented cost for this fuel type")
adjusted_hot_water_cost = AnnualBillSavings.adjust_energy_cost_to_metered(
epc_energy_cost=todays_hot_water_cost,
current_epc_rating=self.data["current-energy-rating"],
)
adjusted_lighting_cost = adjusted_lighting_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP
adjusted_lighting_cost = AnnualBillSavings.adjust_energy_cost_to_metered(
epc_energy_cost=todays_lighting_cost,
current_epc_rating=self.data["current-energy-rating"],
)
adjusted_appliances_cost = adjusted_applicances_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP
adjusted_appliances_cost = AnnualBillSavings.adjust_energy_cost_to_metered(
epc_energy_cost=appliances_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP,
current_epc_rating=self.data["current-energy-rating"],
)
# Sum up the adjusted kwh figures
self.current_adjusted_energy = (
adjusted_heating_kwh + adjusted_hot_water_kwh + adjusted_lighting_kwh + adjusted_applicances_kwh
)
self.expected_energy_bill = (
adjusted_heating_cost + adjusted_hot_water_cost + adjusted_lighting_cost + adjusted_appliances_cost
)
self.energy_cost_estimates = {
"adjusted": {
@ -730,9 +741,9 @@ class Property:
"appliances": adjusted_appliances_cost
},
"unadjusted": {
"heating": unadjusted_heating_cost,
"hot_water": unadjusted_hot_water_cost,
"lighting": float(self.data["lighting-cost-current"]),
"heating": todays_heating_cost,
"hot_water": todays_hot_water_cost,
"lighting": todays_lighting_cost,
"appliances": appliances_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP
},
"epc": {
@ -757,10 +768,6 @@ class Property:
}
}
self.expected_energy_bill = (
adjusted_heating_cost + adjusted_hot_water_cost + adjusted_lighting_cost + adjusted_appliances_cost
)
def set_spatial(self, spatial: pd.DataFrame):
"""
Sets whether the property is in a conservation area given the output of the ConservationAreaClient

View file

@ -359,7 +359,8 @@ async def trigger_plan(body: PlanTriggerRequest):
},
dummy_schema_path=f"model_directory/energy_consumption_model/{dataset_version}_dummy_schema.pkl",
consumption_average_path=f"energy_consumption/{dataset_version}/consumption_averages.parquet",
cleaned=cleaned
cleaned=cleaned,
environment=get_settings().ENVIRONMENT
)
logger.info("Getting spatial data")

View file

@ -1,11 +1,10 @@
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.feature_selection import RFECV
from utils.s3 import save_pickle_to_s3, read_pickle_from_s3, read_dataframe_from_s3_parquet
from utils.s3 import save_pickle_to_s3, read_pickle_from_s3, read_dataframe_from_s3_parquet, read_csv_from_s3
from utils.logger import setup_logger
logger = setup_logger()
@ -46,11 +45,17 @@ class EnergyConsumptionModel:
"low-energy-lighting", "environment-impact-current", "energy-tariff", "current-energy-rating"
]
def __init__(self, cleaned, model_paths=None, dummy_schema_path=None, consumption_average_path=None, n_jobs=1):
retail_price_comparison = None
def __init__(
self, cleaned, model_paths=None, dummy_schema_path=None, consumption_average_path=None, n_jobs=1,
environment="dev"
):
self.cleaned = cleaned
self.models = {}
self.model_paths = model_paths or {}
self.n_jobs = n_jobs
self.environment = environment
self.data = None
self.input_data = None
@ -76,26 +81,84 @@ class EnergyConsumptionModel:
if model_paths:
for target, path in model_paths.items():
# Read model
self.models[target] = read_pickle_from_s3(bucket_name="retrofit-model-directory-dev", s3_file_name=path)
self.models[target] = read_pickle_from_s3(
bucket_name=f"retrofit-model-directory-{environment}", s3_file_name=path
)
# Read dummy schema
if dummy_schema_path:
self.dummy_schema = read_pickle_from_s3(
bucket_name="retrofit-model-directory-dev",
bucket_name=f"retrofit-model-directory-{environment}",
s3_file_name=dummy_schema_path
)
self.consumption_averages = None
if consumption_average_path:
self.consumption_averages = read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev",
bucket_name=f"retrofit-data-{environment}",
file_key=consumption_average_path
)
# We also retrieve the newest retail price comparison data which comes from Ofgem:
# https://www.ofgem.gov.uk/energy-data-and-research/data-portal/retail-market-indicators
# We use the detail price comparison by company and tariff type data
self.read_retail_price_comparison()
def read_retail_price_comparison(self):
data = read_csv_from_s3(
bucket_name=f"retrofit-data-{self.environment}",
filepath="energy_consumption/retail-price-comparison.csv"
)
header = ['Date', 'Average standard variable tariff (Large legacy suppliers)',
'Average standard variable tariff (Other suppliers)', 'Average fixed tariff',
'Cheapest tariff (Large legacy suppliers)', 'Cheapest tariff (All suppliers)',
'Cheapest tariff (Basket)', 'Default tariff cap level']
# Extract data rows
data_rows = []
for row in data[1:]:
date = row['\ufeff"']
values = row[None]
data_rows.append([date] + values)
self.retail_price_comparison = pd.DataFrame(data_rows, columns=header)
self.retail_price_comparison['Date'] = pd.to_datetime(self.retail_price_comparison['Date'], errors='coerce')
def convert_cost_to_today(self, original_cost, lodgement_date):
"""
Given energy costs in an EPC, this function converts that energy cost to a figure based on today's energy costs
(or as close to today as possible)
:param original_cost: The original energy cost
:param lodgement_date: The date the EPC was lodged
:return:
"""
closest_date = self.retail_price_comparison.iloc[
(self.retail_price_comparison['Date'] - lodgement_date).abs().argsort()[:1]
]['Date'].values[0]
closest_date = pd.Timestamp(closest_date)
# Extract the tariff price on the closest date
tariff_2024 = self.retail_price_comparison[
self.retail_price_comparison['Date'] == closest_date
]['Average standard variable tariff (Large legacy suppliers)'].values[0]
# Extract the latest available tariff price
latest_tariff = self.retail_price_comparison[
'Average standard variable tariff (Large legacy suppliers)'
].iloc[-1]
# Calculate the ratio
ratio = float(latest_tariff) / float(tariff_2024)
# Calculate the updated heating cost
updated_cost = original_cost * ratio
return updated_cost
def read_dataset(self, file_path):
"""Reads the dataset from the specified file path."""
logger.info(f"Reading dataset from {file_path}")
self.data = read_dataframe_from_s3_parquet(bucket_name="retrofit-data-dev", file_key=file_path)
self.data = read_dataframe_from_s3_parquet(bucket_name=f"retrofit-data-{self.environment}", file_key=file_path)
self.input_data = self.data.copy()
def feature_engineering(self, drop_first=False):
@ -392,7 +455,7 @@ class EnergyConsumptionModel:
logger.info(f"Saving model for target {target}")
save_pickle_to_s3(
self.models[target],
bucket_name="retrofit-model-directory-dev",
bucket_name=f"retrofit-model-directory-{self.environment}",
s3_file_name=f"model_directory/energy_consumption_model/{target}_{dataset_version}.pkl"
)
@ -400,7 +463,7 @@ class EnergyConsumptionModel:
logger.info("Saving dummy schema for target {target}")
save_pickle_to_s3(
self.dummy_schema,
bucket_name="retrofit-model-directory-dev",
bucket_name=f"retrofit-model-directory-{self.environment}",
s3_file_name=f"model_directory/energy_consumption_model/{dataset_version}_dummy_schema.pkl"
)