mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
scrappy testing
This commit is contained in:
parent
c77f642861
commit
58374e7a6d
6 changed files with 217 additions and 31 deletions
|
|
@ -721,13 +721,6 @@ class Property:
|
|||
]["predictions"].values[0]
|
||||
)
|
||||
|
||||
# heating_prediction = (
|
||||
# float(condition_data["space_heating_kwh"]) if condition_data.get("space_heating_kwh") is not None
|
||||
# else energy_consumption_client.score_new_data(
|
||||
# new_data=scoring_df, target="heating_kwh"
|
||||
# )[0]
|
||||
# )
|
||||
|
||||
hot_water_prediction = (
|
||||
condition_data.get("water_heating_kwh") if condition_data.get("water_heating_kwh") is not None else
|
||||
hotwater_kwh_predictions[
|
||||
|
|
@ -735,23 +728,16 @@ class Property:
|
|||
]["predictions"].values[0]
|
||||
)
|
||||
|
||||
# hot_water_prediction = (
|
||||
# float(condition_data["water_heating_kwh"]) if condition_data.get("water_heating_kwh") is not None
|
||||
# else energy_consumption_client.score_new_data(
|
||||
# new_data=scoring_df, target="hot_water_kwh"
|
||||
# )[0]
|
||||
# )
|
||||
|
||||
# We convert the lighting cost into kwh, just using the price cap
|
||||
lighting_kwh = todays_lighting_cost / AnnualBillSavings.ELECTRICITY_PRICE_CAP
|
||||
|
||||
appliances_kwh = AnnualBillSavings.estimate_appliances_energy_use(total_floor_area=self.floor_area)
|
||||
|
||||
unadjusted_kwh_estimates = {
|
||||
"heating": heating_prediction,
|
||||
"hot_water": hot_water_prediction,
|
||||
"lighting": lighting_kwh,
|
||||
"appliances": appliances_kwh
|
||||
"heating": float(heating_prediction),
|
||||
"hot_water": float(hot_water_prediction),
|
||||
"lighting": float(lighting_kwh),
|
||||
"appliances": float(appliances_kwh)
|
||||
}
|
||||
|
||||
adjusted_kwh_estimates = {
|
||||
|
|
@ -762,10 +748,10 @@ class Property:
|
|||
}
|
||||
|
||||
unadjusted_heating_costs = {
|
||||
"heating": todays_heating_cost,
|
||||
"hot_water": todays_hot_water_cost,
|
||||
"lighting": todays_lighting_cost,
|
||||
"appliances": appliances_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP
|
||||
"heating": float(todays_heating_cost),
|
||||
"hot_water": float(todays_hot_water_cost),
|
||||
"lighting": float(todays_lighting_cost),
|
||||
"appliances": float(appliances_kwh) * AnnualBillSavings.ELECTRICITY_PRICE_CAP
|
||||
}
|
||||
|
||||
adjusted_heating_costs = {
|
||||
|
|
|
|||
|
|
@ -326,7 +326,6 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
|
||||
input_properties = []
|
||||
for config in tqdm(plan_input):
|
||||
|
||||
# We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
|
||||
uprn = config.get("uprn", None)
|
||||
if uprn:
|
||||
|
|
@ -782,7 +781,7 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
predictions_dict = model_api.predict_all(
|
||||
df=recommendations_scoring_data.iloc[chunk:chunk + SCORING_BATCH_SIZE],
|
||||
bucket=get_settings().DATA_BUCKET,
|
||||
prediction_buckets=get_prediction_buckets()
|
||||
prediction_buckets=get_prediction_buckets(),
|
||||
)
|
||||
|
||||
# Append the predictions to the predictions dictionary
|
||||
|
|
@ -791,10 +790,6 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
|
||||
# We now produce predictions for the kwh models
|
||||
|
||||
# TODO!!!!! In order to score the kwh models, we need to insert the new SAP, heat demand, carbon, cost
|
||||
# etc values, into the simulated EPC, otherwise it won't work. We might also want to drop all potential
|
||||
# columns and env-efficiency columns (POTENTIAL COLUMNS ALREADY GONE, JUST NEED TO DROP ENV EFFICIENCY)
|
||||
|
||||
# Insert the predictions into the recommendations and run the optimiser
|
||||
# TODO: If a recommendation has a negative impact on SAP, we should remove it - this seems to have become a
|
||||
# possibility with heating system
|
||||
|
|
|
|||
|
|
@ -131,7 +131,6 @@ def app():
|
|||
sample_size = 500
|
||||
|
||||
energy_consumption_data = []
|
||||
cavity_walls_data = []
|
||||
for i, directory in tqdm(enumerate(epc_directories), total=len(epc_directories)):
|
||||
|
||||
# Skip the first 50
|
||||
|
|
|
|||
|
|
@ -58,3 +58,208 @@ def app():
|
|||
"budget": None,
|
||||
}
|
||||
print(body)
|
||||
|
||||
|
||||
# This is some temp code, which is for diagnosing the issues with the bills models
|
||||
heating_training_data_filepath = "sap_change_model/2024-08-06-11-19-49/dataset_rooms.parquet"
|
||||
|
||||
# For the heating model:
|
||||
heating_drop_columns = [
|
||||
"sap_ending", "heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending",
|
||||
"lighting_cost_ending", "hot_water_cost_ending",
|
||||
# "days_to_ending", "days_to_starting", # TODO This is in the live version
|
||||
'number_habitable_rooms_starting', 'number_habitable_rooms_ending', 'number_heated_rooms_starting',
|
||||
'number_heated_rooms_ending',
|
||||
'number_habitable_rooms', 'number_heated_rooms'
|
||||
]
|
||||
|
||||
heating_response = "heating_cost_ending"
|
||||
|
||||
# for the hot water model (older dataset)
|
||||
hot_water_training_data_filepath = "sap_change_model/2024-07-10-20-28-54/dataset_rooms.parquet"
|
||||
|
||||
hot_water_drop_columns = [
|
||||
"sap_ending", "heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending",
|
||||
"lighting_cost_ending", "heating_cost_ending",
|
||||
"days_to_starting", "days_to_ending",
|
||||
'number_habitable_rooms_starting', 'number_habitable_rooms_ending', 'number_heated_rooms_starting',
|
||||
'number_heated_rooms_ending',
|
||||
'number_habitable_rooms', 'number_heated_rooms'
|
||||
]
|
||||
|
||||
# Diagnose heating
|
||||
from utils.s3 import read_dataframe_from_s3_parquet
|
||||
|
||||
train = read_dataframe_from_s3_parquet(
|
||||
bucket_name="retrofit-data-dev",
|
||||
file_key=heating_training_data_filepath
|
||||
)
|
||||
|
||||
# Drop the columns that aren't used
|
||||
train = train.drop(columns=heating_drop_columns)
|
||||
|
||||
# if the value is postive, it means the ending cost is bigger than the starting (which means it got more expensive)
|
||||
train["cost_diference"] = (train["heating_cost_ending"] - train["heating_cost_starting"])
|
||||
change_direction = train["cost_diference"] > 0
|
||||
change_direction.value_counts(normalize=True)
|
||||
|
||||
average_costs_by_time_starting = train.groupby(
|
||||
["lodgement_year_starting", "lodgement_month_starting"]
|
||||
)["heating_cost_starting"].mean().reset_index().sort_values(["lodgement_year_starting", "lodgement_month_starting"])
|
||||
|
||||
average_costs_by_time_ending = train.groupby(
|
||||
["lodgement_year_ending", "lodgement_month_ending"]
|
||||
)["heating_cost_ending"].mean().reset_index().sort_values(["lodgement_year_ending", "lodgement_month_ending"])
|
||||
|
||||
# Check by photo supply values - if the property is gas, solar panels won't have an affect on the heating or hot
|
||||
# water so let's look for electric homes
|
||||
# Across the entire dataset, there is no correlation
|
||||
# Even for electric properties, there is no correlation
|
||||
photo_supply_averages = train[
|
||||
train["fuel_type_ending"] == "electricity"
|
||||
].groupby(["photo_supply_ending"])["heating_cost_ending"].mean().reset_index()
|
||||
|
||||
photo_supply_to_size = train.groupby("photo_supply_ending")["total_floor_area_ending"].mean().reset_index()
|
||||
photo_supply_to_size[["photo_supply_ending", "total_floor_area_ending"]].corr()
|
||||
train[["total_floor_area_ending", "heating_cost_ending"]].corr()
|
||||
# Bigger properties end up with smaller photo_supply values. This will be because the array size likely remains fairly
|
||||
# consistent but takes up a smaller proportion of the roof. Typically, the bigger the floor area, the higher the heating
|
||||
# costs, but bigger units also have smaller photo_supply
|
||||
adding_solar = train[
|
||||
(train["photo_supply_ending"] > 0) & (train["photo_supply_starting"] == 0)
|
||||
]
|
||||
is_positive = (adding_solar["cost_diference"] > 0)
|
||||
is_positive.value_counts(normalize=True)
|
||||
|
||||
photo_supply_by_time = (
|
||||
train[
|
||||
train["fuel_type_ending"] == "electricity"
|
||||
].groupby(
|
||||
["lodgement_year_ending", "photo_supply_ending"]
|
||||
)["heating_cost_ending"].mean().reset_index().sort_values(
|
||||
["lodgement_year_ending", "photo_supply_ending"], ascending=True)
|
||||
)
|
||||
# Plot
|
||||
photo_supply_by_time[["photo_supply_ending", "heating_cost_ending"]].corr()
|
||||
photo_supply_by_time.plot()
|
||||
|
||||
# Observations
|
||||
# 1) We retain all of the potential columns, however they are just based on the starting EPC
|
||||
# 2) 21% of the the time, the ending heating cost is more than the starting but this is clearly a minority
|
||||
# 3) Let's get ride of estimated perimeter starting and ending
|
||||
|
||||
# Things I should check
|
||||
# 1) Do we updated the lodgment_year_ending and lodgement_month_ending
|
||||
# 2) Should we adjust costs to now, as well as lodgement_dates to today? Since 2023, costs have increased a lot so
|
||||
# any savings should be benchmarked against what a customer is paying now
|
||||
# 3) It might make sense to create a feature between floor area and photo supply, to give a more consistent estimate
|
||||
# of a panel size for the property
|
||||
|
||||
# Get an example and score with the models
|
||||
example = train[
|
||||
(train["photo_supply_starting"] == 0) &
|
||||
(train["photo_supply_ending"] > 0) &
|
||||
(train["heating_cost_starting"] > train["heating_cost_ending"])
|
||||
].sample(1)
|
||||
|
||||
# example["lodgement_month_starting"]
|
||||
# example["lodgement_year_starting"]
|
||||
# example["lodgement_month_ending"]
|
||||
# example["lodgement_year_ending"].values[0]
|
||||
#
|
||||
# example["lodgement_year_ending"] = 2023
|
||||
# example["days_to_ending"] = 3500
|
||||
# example["days_to_starting"]
|
||||
|
||||
# {'heating_cost_predictions': predictions
|
||||
# 0 378.5}
|
||||
resp = model_api.predict_all(
|
||||
df=example,
|
||||
bucket="retrofit-data-dev",
|
||||
prediction_buckets=get_prediction_buckets(),
|
||||
model_prefixes=["heating_cost_predictions"],
|
||||
extract_ids=False
|
||||
)
|
||||
|
||||
# Step 1: get a cost for today
|
||||
p.create_base_difference_epc_record(cleaned)
|
||||
cwi_impact = p.base_difference_record.df.copy()
|
||||
for k in property_recommendations[0][0]["simulation_config"]:
|
||||
cwi_impact[k] = property_recommendations[0][0]["simulation_config"][k]
|
||||
|
||||
# 2212.4 - Baseline
|
||||
today = model_api.predict_all(
|
||||
df=p.base_difference_record.df.copy(),
|
||||
bucket="retrofit-data-dev",
|
||||
prediction_buckets=get_prediction_buckets(),
|
||||
model_prefixes=["heating_cost_predictions"],
|
||||
extract_ids=False
|
||||
)
|
||||
|
||||
# impact of CWI - 1908
|
||||
cwi_response = model_api.predict_all(
|
||||
df=cwi_impact,
|
||||
bucket="retrofit-data-dev",
|
||||
prediction_buckets=get_prediction_buckets(),
|
||||
model_prefixes=["heating_cost_predictions"],
|
||||
extract_ids=False
|
||||
)
|
||||
|
||||
pv_impact = cwi_impact.copy()
|
||||
pv_impact["photo_supply_ending"] = 50
|
||||
pv_impact["heating_cost_starting"] = 2212.4
|
||||
|
||||
pv_response = model_api.predict_all(
|
||||
df=pv_impact,
|
||||
bucket="retrofit-data-dev",
|
||||
prediction_buckets=get_prediction_buckets(),
|
||||
model_prefixes=["heating_cost_predictions"],
|
||||
extract_ids=False
|
||||
)
|
||||
|
||||
# Testing kwh for vde
|
||||
base_prediction = model_api.predict_all(
|
||||
df=epcs_for_scoring,
|
||||
bucket=get_settings().DATA_BUCKET,
|
||||
prediction_buckets=get_prediction_buckets(),
|
||||
model_prefixes=["heating_kwh_predictions"],
|
||||
extract_ids=False
|
||||
)
|
||||
|
||||
cwi_epc = epcs_for_scoring.copy()
|
||||
cwi_epc["walls-description"] = "Cavity wall, filled cavity"
|
||||
cwi_epc["walls-energy-eff"] = "Good"
|
||||
cwi_epc["heating-cost-current"] = 1650
|
||||
cwi_epc["current-energy-efficiency"] = 72
|
||||
cwi_epc["current-energy-rating"] = "C"
|
||||
cwi_epc["co2-emissions-current"] = 3.7
|
||||
cwi_epc["energy-consumption-current"] = 121
|
||||
cwi_epc["co2-emiss-curr-per-floor-area"] = 19
|
||||
cwi_epc["photo-supply"] = 0
|
||||
# cwi_epc["energy-consumption-current"] =
|
||||
# cwi_epc["roof-description"] = "Pitched, 300 mm loft insulation"
|
||||
# cwi_epc["roof-energy-eff"] = "Very Good"
|
||||
# cwi_epc["heating-cost-current"] = 1264
|
||||
|
||||
# "heating-cost-current": rec_impact["epc_heating_cost"],
|
||||
# "hot-water-cost-current": rec_impact["epc_hot_water_cost"],
|
||||
# # CO₂ emissions per square metre floor area per year in kg/m². Since CO₂ emissions are in tonnes
|
||||
# # per year, we multiply by 1000 to get kg/m²
|
||||
# "co2-emiss-curr-per-floor-area": round(
|
||||
# 1000 * (rec_impact["carbon"] / self.data["total-floor-area"])
|
||||
# ),
|
||||
# "co2-emissions-current": rec_impact["carbon"],
|
||||
# "current-energy-rating": sap_to_epc(rec_impact["sap"]),
|
||||
# "current-energy-efficiency": int(np.floor(rec_impact["sap"])),
|
||||
# "energy-consumption-current": rec_impact["heat_demand"],
|
||||
# "lighting-cost-current": rec_impact["epc_lighting_cost"],
|
||||
# "id": "+".join([str(self.id), rec_id])
|
||||
|
||||
cwi_prediction = model_api.predict_all(
|
||||
df=cwi_epc,
|
||||
bucket=get_settings().DATA_BUCKET,
|
||||
prediction_buckets=get_prediction_buckets(),
|
||||
model_prefixes=["heating_kwh_predictions"],
|
||||
extract_ids=False
|
||||
)
|
||||
2344 - 2060
|
||||
|
|
|
|||
|
|
@ -166,6 +166,7 @@ def main():
|
|||
# For each property, we download the xmls and extract the data
|
||||
database_data = []
|
||||
for uprn, xmls in assessments_map.items():
|
||||
|
||||
extracted_data = {}
|
||||
for xml in xmls:
|
||||
xml_data = read_from_s3(bucket_name=BUCKET, s3_file_name=xml)
|
||||
|
|
|
|||
|
|
@ -160,7 +160,7 @@ class SolarPvRecommendations:
|
|||
if not non_invasive_recommendation["suitable"]:
|
||||
return
|
||||
|
||||
if non_invasive_recommendation:
|
||||
if non_invasive_recommendation.get("array_wattage") is not None:
|
||||
|
||||
roof_area = esimtate_pitched_roof_area(
|
||||
floor_area=self.property.insulation_floor_area, floor_height=self.property.data["floor-height"]
|
||||
|
|
@ -186,7 +186,7 @@ class SolarPvRecommendations:
|
|||
cost_result = self.costs.solar_pv(
|
||||
wattage=recommendation_config["array_wattage"],
|
||||
has_battery=has_battery,
|
||||
array_cost=non_invasive_recommendation["cost"] if non_invasive_recommendation else None
|
||||
array_cost=non_invasive_recommendation.get("cost", None)
|
||||
)
|
||||
kw = np.floor(recommendation_config["array_wattage"] / 100) / 10
|
||||
if has_battery:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue