simplified extraction of costs and kwh predictions

This commit is contained in:
Khalim Conn-Kowlessar 2024-08-06 17:04:58 +01:00
parent a3e119001c
commit 891545804e
7 changed files with 174 additions and 134 deletions

View file

@ -235,12 +235,9 @@ class Property:
def parse_kwargs(self, kwargs):
# We extract the elements from kwargs that we recognise. Anything additional is ignored
self.n_bathrooms = kwargs.get("n_bathrooms", None)
self.n_bedrooms = kwargs.get("n_bedrooms", None)
self.building_id = kwargs.get("building_id", None)
self.number_of_floors = kwargs.get("number_of_floors", None)
self.insulation_floor_area = kwargs.get("insulation_floor_area", None)
self.insulation_wall_area = kwargs.get("insulation_wall_area", None)
for arg, val in kwargs.items():
if val is not None:
setattr(self, arg, val)
def create_base_difference_epc_record(self, cleaned_lookup: dict):
"""
@ -574,7 +571,8 @@ class Property:
def get_components(
self,
cleaned,
energy_consumption_client
energy_consumption_client,
kwh_predictions
):
"""
Given the cleaning that has been performed, we'll use this to identify the property
@ -582,6 +580,7 @@ class Property:
:param cleaned: This is the dictionary of components found in cleaner.cleaned
:param energy_consumption_client: Contains the heating and hot water kwh models - used to predict current
energy annual consumption in kWh
:param kwh_predictions: Contains the kwh predictions for heating and hot water
:return:
"""
@ -646,7 +645,7 @@ class Property:
self.set_windows_count()
self.set_energy_source()
self.find_energy_sources()
self.set_current_energy_bill(energy_consumption_client)
self.set_current_energy_bill(energy_consumption_client, kwh_predictions)
def set_solar_panel_configuration(
self, solar_panel_configuration, roof_area
@ -659,7 +658,7 @@ class Property:
# We also set the roof area
self.roof_area = roof_area
def set_current_energy_bill(self, energy_consumption_client):
def set_current_energy_bill(self, energy_consumption_client, kwh_predictions):
"""
Given what we know about the property now, estimates the current energy consumption using the UCL paper
https://www.sciencedirect.com/science/article/pii/S0378778823002542
@ -687,97 +686,86 @@ class Property:
# If we have the kwh figures, we don't need to predict them
condition_data = self.energy_assessment_condition_data.copy()
scoring_df = pd.DataFrame([self.epc_record.prepared_epc])
# Change columns from underscores to hyphens
scoring_df.columns = [
x.lower().replace("_", "-") for x in scoring_df.columns
]
for col in ["heating_kwh", "hot_water_kwh"]:
scoring_df[col] = None
energy_consumption_client.data = None
# scoring_df = pd.DataFrame([self.epc_record.prepared_epc])
# # Change columns from underscores to hyphens
# scoring_df.columns = [
# x.lower().replace("_", "-") for x in scoring_df.columns
# ]
# for col in ["heating_kwh", "hot_water_kwh"]:
# scoring_df[col] = None
#
# energy_consumption_client.data = None
heating_kwh_predictions = kwh_predictions["heating_kwh_predictions"]
hotwater_kwh_predictions = kwh_predictions["hotwater_kwh_predictions"]
heating_prediction = (
float(condition_data["space_heating_kwh"]) if condition_data.get("space_heating_kwh") is not None
else energy_consumption_client.score_new_data(
new_data=scoring_df, target="heating_kwh"
)[0]
condition_data.get("space_heating_kwh") if condition_data.get("space_heating_kwh") is not None else
heating_kwh_predictions[
heating_kwh_predictions["id"].astype(int) == self.uprn
]["predictions"].values[0]
)
# heating_prediction = (
# float(condition_data["space_heating_kwh"]) if condition_data.get("space_heating_kwh") is not None
# else energy_consumption_client.score_new_data(
# new_data=scoring_df, target="heating_kwh"
# )[0]
# )
hot_water_prediction = (
float(condition_data["water_heating_kwh"]) if condition_data.get("water_heating_kwh") is not None
else energy_consumption_client.score_new_data(
new_data=scoring_df, target="hot_water_kwh"
)[0]
condition_data.get("water_heating_kwh") if condition_data.get("water_heating_kwh") is not None else
hotwater_kwh_predictions[
hotwater_kwh_predictions["id"].astype(int) == self.uprn
]["predictions"].values[0]
)
# hot_water_prediction = (
# float(condition_data["water_heating_kwh"]) if condition_data.get("water_heating_kwh") is not None
# else energy_consumption_client.score_new_data(
# new_data=scoring_df, target="hot_water_kwh"
# )[0]
# )
# We convert the lighting cost into kwh, just using the price cap
lighting_kwh = float(self.data["lighting-cost-current"]) / AnnualBillSavings.ELECTRICITY_PRICE_CAP
lighting_kwh = todays_lighting_cost / AnnualBillSavings.ELECTRICITY_PRICE_CAP
appliances_kwh = AnnualBillSavings.estimate_appliances_energy_use(total_floor_area=self.floor_area)
adjusted_heating_kwh = AnnualBillSavings.adjust_energy_to_metered(
epc_energy=heating_prediction,
current_epc_rating=self.data["current-energy-rating"],
)
unadjusted_kwh_estimates = {
"heating": heating_prediction,
"hot_water": hot_water_prediction,
"lighting": lighting_kwh,
"appliances": appliances_kwh
}
adjusted_hot_water_kwh = AnnualBillSavings.adjust_energy_to_metered(
epc_energy=hot_water_prediction,
current_epc_rating=self.data["current-energy-rating"],
)
adjusted_kwh_estimates = {
k: AnnualBillSavings.adjust_energy_to_metered(
epc_energy=v,
current_epc_rating=self.data["current-energy-rating"],
) for k, v in unadjusted_kwh_estimates.items()
}
adjusted_lighting_kwh = AnnualBillSavings.adjust_energy_to_metered(
epc_energy=lighting_kwh,
current_epc_rating=self.data["current-energy-rating"],
)
unadjusted_heating_costs = {
"heating": todays_heating_cost,
"hot_water": todays_hot_water_cost,
"lighting": todays_lighting_cost,
"appliances": appliances_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP
}
adjusted_applicances_kwh = AnnualBillSavings.adjust_energy_to_metered(
epc_energy=appliances_kwh,
current_epc_rating=self.data["current-energy-rating"],
)
# Adjust today's cost figures with the UCL model
adjusted_heating_cost = AnnualBillSavings.adjust_energy_to_metered(
epc_energy=todays_heating_cost,
current_epc_rating=self.data["current-energy-rating"],
)
adjusted_hot_water_cost = AnnualBillSavings.adjust_energy_to_metered(
epc_energy=todays_hot_water_cost,
current_epc_rating=self.data["current-energy-rating"],
)
adjusted_lighting_cost = AnnualBillSavings.adjust_energy_to_metered(
epc_energy=todays_lighting_cost,
current_epc_rating=self.data["current-energy-rating"],
)
adjusted_appliances_cost = AnnualBillSavings.adjust_energy_to_metered(
epc_energy=appliances_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP,
current_epc_rating=self.data["current-energy-rating"],
)
adjusted_heating_costs = {
k: AnnualBillSavings.adjust_energy_to_metered(
epc_energy=v,
current_epc_rating=self.data["current-energy-rating"],
) for k, v in unadjusted_heating_costs.items()
}
# Sum up the adjusted kwh figures
self.current_adjusted_energy = (
adjusted_heating_kwh + adjusted_hot_water_kwh + adjusted_lighting_kwh + adjusted_applicances_kwh
)
self.current_energy_bill = (
adjusted_heating_cost + adjusted_hot_water_cost + adjusted_lighting_cost + adjusted_appliances_cost
)
self.current_adjusted_energy = sum(list(adjusted_kwh_estimates.values()))
self.current_energy_bill = sum(list(adjusted_heating_costs.values()))
self.energy_cost_estimates = {
"adjusted": {
"heating": adjusted_heating_cost,
"hot_water": adjusted_hot_water_cost,
"lighting": adjusted_lighting_cost,
"appliances": adjusted_appliances_cost
},
"unadjusted": {
"heating": todays_heating_cost,
"hot_water": todays_hot_water_cost,
"lighting": todays_lighting_cost,
"appliances": appliances_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP
},
"adjusted": adjusted_heating_costs,
"unadjusted": unadjusted_heating_costs,
"epc": {
"heating": float(self.data["heating-cost-current"]),
"hot_water": float(self.data["hot-water-cost-current"]),
@ -786,18 +774,8 @@ class Property:
}
self.energy_consumption_estimates = {
"adjusted": {
"heating": adjusted_heating_kwh,
"hot_water": adjusted_hot_water_kwh,
"lighting": adjusted_lighting_kwh,
"appliances": adjusted_applicances_kwh
},
"unadjusted": {
"heating": heating_prediction,
"hot_water": hot_water_prediction,
"lighting": lighting_kwh,
"appliances": appliances_kwh
}
"adjusted": adjusted_kwh_estimates,
"unadjusted": unadjusted_kwh_estimates
}
def set_spatial(self, spatial: pd.DataFrame):

View file

@ -30,6 +30,8 @@ class Settings(BaseSettings):
LIGHTING_COST_PREDICTIONS_BUCKET: str
HEATING_COST_PREDICTIONS_BUCKET: str
HOT_WATER_COST_PREDICTIONS_BUCKET: str
HEATING_KWH_PREDICTIONS_BUCKET: str
HOTWATER_KWH_PREDICTIONS_BUCKET: str
class Config:
env_file = "backend/.env"
@ -48,5 +50,7 @@ def get_prediction_buckets():
"carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET,
"lighting_cost_predictions": get_settings().LIGHTING_COST_PREDICTIONS_BUCKET,
"heating_cost_predictions": get_settings().HEATING_COST_PREDICTIONS_BUCKET,
"hot_water_cost_predictions": get_settings().HOT_WATER_COST_PREDICTIONS_BUCKET
"hot_water_cost_predictions": get_settings().HOT_WATER_COST_PREDICTIONS_BUCKET,
"heating_kwh_predictions": get_settings().HEATING_KWH_PREDICTIONS_BUCKET,
"hotwater_kwh_predictions": get_settings().HOTWATER_KWH_PREDICTIONS_BUCKET,
}

View file

@ -432,24 +432,20 @@ async def trigger_plan(body: PlanTriggerRequest):
environment=get_settings().ENVIRONMENT
)
epcs_for_scoring = pd.DataFrame([energy_consumption_client.prepare_new_data(p) for p in input_properties])
# What do we need?
# We need an estimate of each properties energy consumption now, as well as the cost of heating and hot water
# The newest EPC may have been done quite some time ago, and so we should take this into consideration when
# producing the estimate for cost. With that said, we already have a methodology which will re-map the cost
# when the EPC was produced to a cost for today, however could we use the ML models.
# In theory, we could just score the kwh models via the API, pass the results into the get_components function
# and insert the kwh figures into the property and we're done
# TODO: Need to check if we need to re-map when scoring new data or not
model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
# We need to prepare the EPC so it's in the same format as the training data
# TODO: DELETE ME
# from utils.s3 import read_dataframe_from_s3_parquet
# train = read_dataframe_from_s3_parquet(
# bucket_name="retrofit-data-dev",
# file_key="energy_consumption/2024-07-08/energy_consumption_dataset.parquet"
# )
# We need to prepare the EPC so it's in the same format as the training data
epcs_for_scoring = energy_consumption_client.prepare_new_data(input_properties)
# prepare the data
# TODO: Some junk is being returned by the heating kwh model!
kwh_predictions = model_api.predict_all(
df=epcs_for_scoring,
bucket=get_settings().DATA_BUCKET,
prediction_buckets=get_prediction_buckets(),
model_prefixes=["heating_kwh_predictions", "hotwater_kwh_predictions"],
extract_ids=False
)
# TODO: Move this/tidy it up
uprn_map = {}
@ -478,7 +474,11 @@ async def trigger_plan(body: PlanTriggerRequest):
for p in tqdm(input_properties):
if p.spatial is None:
raise Exception("Missed setting of spatial data for a property")
p.get_components(cleaned=cleaned, energy_consumption_client=energy_consumption_client)
p.get_components(
cleaned=cleaned,
# energy_consumption_client=energy_consumption_client # TODO: Full remove me
kwh_predictions=kwh_predictions
)
logger.info("Performing solar analysis")
# TODO: Tidy this up
@ -663,8 +663,6 @@ async def trigger_plan(body: PlanTriggerRequest):
"carbon_ending"]
)
model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
all_predictions = model_api.predictions_template()
to_loop_over = range(0, recommendations_scoring_data.shape[0], SCORING_BATCH_SIZE)
for chunk in tqdm(to_loop_over, total=len(to_loop_over)):

View file

@ -15,6 +15,8 @@ class ModelApi:
"lighting_cost_predictions",
"heating_cost_predictions",
"hot_water_cost_predictions",
"hotwater_kwh_predictions",
"heating_kwh_predictions",
]
MODEL_URLS = {
@ -24,6 +26,8 @@ class ModelApi:
"lighting_cost_predictions": "lightingmodel",
"heating_cost_predictions": "heatingmodel",
"hot_water_cost_predictions": "hotwatermodel",
"hotwater_kwh_predictions": "hotwaterkwhmodel",
"heating_kwh_predictions": "heatingkwhmodel",
}
def __init__(
@ -123,7 +127,7 @@ class ModelApi:
else:
return None
def predict_all(self, df, bucket, prediction_buckets) -> dict:
def predict_all(self, df, bucket, prediction_buckets, model_prefixes=None, extract_ids=True) -> dict:
"""
For each model prefix, this method will upload the scoring data to s3 and then make a request to the
@ -133,11 +137,17 @@ class ModelApi:
:param df: Pandas dataframe with scoring data to be uploaded to s3
:param bucket: Name of the bucket in s3 to upload to
:param prediction_buckets: Dictionary containing the prediction buckets for each model prefix
:param model_prefixes: List of model prefixes to generate predictions for. If None, all model prefixes will be
used
:param extract_ids: Boolean to determine if the property_id and recommendation_id should be extracted from the
id column
:return:
"""
model_prefixes = self.MODEL_PREFIXES if model_prefixes is None else model_prefixes
predictions = {}
for model_prefix in self.MODEL_PREFIXES:
for model_prefix in model_prefixes:
logger.info(f"Scoring for model prefix: {model_prefix}")
file_location = self.upload_scoring_data(df, bucket, model_prefix)
response = self.predict(
@ -155,15 +165,17 @@ class ModelApi:
)
predictions_df['predictions'] = predictions_df["predictions"].astype(float).round(1)
predictions_df[['property_id', 'recommendation_id']] = predictions_df['id'].str.split('+', expand=True)
# To grab the phase, we pull the integer after "phase=" in the recommendation_id. We can do this with a
# string split on phase= and then grab the second element of the resulting list. We could also use a
# regular expression to do this but we use the string split method here, for safety.
# We may not always have a phase to split on, so we need to handle this case. We can do this by using the
# str[1] method to grab the second element of the resulting list. We then grab the first character of this
# string to get the phase. We then convert this to an integer.
# Convert back to int
predictions_df['phase'] = predictions_df['recommendation_id'].apply(self.extract_phase)
if extract_ids:
predictions_df[['property_id', 'recommendation_id']] = predictions_df['id'].str.split('+', expand=True)
# To grab the phase, we pull the integer after "phase=" in the recommendation_id. We can do this with a
# string split on phase= and then grab the second element of the resulting list. We could also use a
# regular expression to do this but we use the string split method here, for safety.
# We may not always have a phase to split on, so we need to handle this case. We can do this by using
# the str[1] method to grab the second element of the resulting list. We then grab the first
# character of this
# string to get the phase. We then convert this to an integer.
# Convert back to int
predictions_df['phase'] = predictions_df['recommendation_id'].apply(self.extract_phase)
predictions[model_prefix] = predictions_df

View file

@ -508,7 +508,7 @@ class EnergyConsumptionModel:
return prediction
@staticmethod
def prepare_new_data(p: Property):
def _prepare_new_data(p: Property):
"""
Given an instance of the property class, this method will ensure that the EPC is ready for scoring with the
kwh models. In the backend, we perform some cleaning and transformation on an EPC so we just ensure that the
@ -558,6 +558,15 @@ class EnergyConsumptionModel:
return epc
def prepare_new_data(self, input_properties: list[Property]):
scoring_data = pd.DataFrame([self._prepare_new_data(p) for p in input_properties])
scoring_data["lodgement-year"] = pd.to_datetime(scoring_data["lodgement-date"]).dt.year
scoring_data["lodgement-month"] = pd.to_datetime(scoring_data["lodgement-date"]).dt.month
scoring_data["id"] = scoring_data["uprn"].copy()
return scoring_data
@staticmethod
def calculate_percentage_decrease(start_efficiency, end_efficiency, consumption_averages):

View file

@ -30,6 +30,8 @@ def make_asset_list():
epc_data = epc_data[~pd.isnull(epc_data["uprn"])]
epc_data["uprn"] = epc_data["uprn"].astype(int).astype(str)
# Take the newest EPC per uprn
epc_data = epc_data.sort_values("lodgement-date").groupby("uprn").last().reset_index()
# /Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/Data/
# We read in the multiple data sources
address_base = pd.read_csv(
@ -72,7 +74,7 @@ def make_asset_list():
].merge(
epc_data[
["uprn", "current-energy-efficiency", "current-energy-rating", "address1", "postcode", "floor-height",
"property-type", "built-form"]],
"property-type", "built-form", "co2-emissions-current"]],
how="left",
left_on="UPRN",
right_on="uprn"
@ -86,6 +88,21 @@ def make_asset_list():
columns={"Wall Area [m^2]": "insulation_wall_area", "Building Area [m^2]": "floor_area"}
)
had_an_epc = asset_list[~pd.isnull(asset_list["current-energy-efficiency"])]
below_b = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 80].shape
below_c = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 69].shape
had_an_epc["energy-efficiency-rating"].value_counts()
asset_list["current-energy-rating"].value_counts()
asset_list["co2-emissions-current"].mean()
# Get the underlying data of a histograme
import matplotlib.pyplot as plt
n, bins, patches = plt.hist(asset_list["co2-emissions-current"], bins=100, color="blue", alpha=0.7)
bins = np.arange(0, asset_list["co2-emissions-current"].max(), 1) # Bins from 50 to 150 with a step of 10
# Step 3: Calculate the frequency of data in each bin
hist, bin_edges = np.histogram(asset_list["co2-emissions-current"], bins=bins)
# Take properties below a B - there are 2844 units
asset_list = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 80]
# Drop caravans
@ -235,8 +252,7 @@ def make_asset_list():
file_name=non_invasive_recommendations_filename
)
# Create two scenarios
# Scenario A
# Create three scenarios
body1 = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
@ -246,14 +262,16 @@ def make_asset_list():
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"scenario_name": "Fabric - no solid wall",
"scenario_name": "Demand Reduction - no solid wall",
"multi_plan": True,
"exclusions": ["internal_wall_insulation", "external_wall_insulation", "floor_insulation"],
"exclusions": [
"internal_wall_insulation", "external_wall_insulation", "floor_insulation", "heating", "solar_pv"
],
"budget": None,
}
print(body1)
# Scenario B - deep fabric, no exclusions
# Scenario B
body2 = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
@ -263,8 +281,25 @@ def make_asset_list():
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"scenario_name": "Deep Fabric",
"scenario_name": "Demand Reduction, Heating Systems, Solar PV - no solid wall",
"multi_plan": True,
"exclusions": ["internal_wall_insulation", "external_wall_insulation", "floor_insulation"],
"budget": None,
}
print(body2)
# Scenario C - deep fabric, no exclusions
body3 = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "A",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"scenario_name": "Whole House",
"multi_plan": True,
"budget": None,
}
print(body3)

View file

@ -61,10 +61,12 @@ class WallRecommendations(Definitions):
"Cavity wall, as built, insulated": "Cavity wall, filled cavity and external insulation",
"Solid brick, as built, no insulation": "Solid brick, with external insulation",
"Solid brick, as built, insulated": "Solid brick, with external insulation",
"Solid brick, as built, partial insulation": "Solid brick, with external insulation",
"Cob, as built": "Cob, with external insulation",
"System built, as built, no insulation": "System built, with external insulation",
"Granite or whinstone, as built, no insulation": 'Granite or whinstone, with external insulation',
"Timber frame, as built, no insulation": "Timber frame, with external insulation",
'Timber frame, as built, partial insulation': 'Timber frame, with external insulation',
}
# These are the ending descriptions we consider for walls with internal insulation
@ -72,10 +74,12 @@ class WallRecommendations(Definitions):
"Cavity wall, as built, insulated": "Cavity wall, filled cavity and internal insulation",
"Solid brick, as built, no insulation": "Solid brick, with internal insulation",
"Solid brick, as built, insulated": "Solid brick, with internal insulation",
"Solid brick, as built, partial insulation": "Solid brick, with internal insulation",
"Cob, as built": "Cob, with internal insulation",
"System built, as built, no insulation": "System built, with internal insulation",
"Granite or whinstone, as built, no insulation": 'Granite or whinstone, with internal insulation',
"Timber frame, as built, no insulation": "Timber frame, with internal insulation",
'Timber frame, as built, partial insulation': 'Timber frame, with internal insulation',
}
def __init__(