mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
integrated scoring new data
This commit is contained in:
parent
96235ed3a9
commit
eb65ff538e
4 changed files with 24 additions and 25 deletions
|
|
@ -596,7 +596,7 @@ class Property:
|
|||
)
|
||||
self.set_energy_source()
|
||||
self.find_energy_sources()
|
||||
self.set_current_energy_bill()
|
||||
self.set_current_energy_bill(energy_consumption_client)
|
||||
|
||||
def set_current_energy_bill(self, energy_consumption_client):
|
||||
"""
|
||||
|
|
@ -611,6 +611,7 @@ class Property:
|
|||
]
|
||||
for col in ["heating_kwh", "hot_water_kwh"]:
|
||||
scoring_df[col] = None
|
||||
|
||||
energy_consumption_client.data = None
|
||||
heating_prediction = energy_consumption_client.score_new_data(
|
||||
new_data=scoring_df, target="heating_kwh"
|
||||
|
|
|
|||
|
|
@ -339,11 +339,6 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
if not input_properties:
|
||||
return Response(status_code=204)
|
||||
|
||||
# TOOD: TEMP - store locally as pickle
|
||||
# import pickle
|
||||
# with open("input_properties.pkl", "wb") as f:
|
||||
# pickle.dump(input_properties, f)
|
||||
|
||||
# The materials data could be cached or local so we don't need to make
|
||||
# consistent requests to the backend for
|
||||
# the same data
|
||||
|
|
@ -363,21 +358,10 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
"heating_kwh": f"model_directory/energy_consumption_model/heating_kwh_{dataset_version}.pkl",
|
||||
"hot_water_kwh": f"model_directory/energy_consumption_model/hot_water_kwh_{dataset_version}.pkl"
|
||||
},
|
||||
dummy_schema_path=f"model_directory/energy_consumption_model/dummy_schema_{dataset_version}.pkl",
|
||||
cleaned=cleaned
|
||||
)
|
||||
|
||||
# Store all of these locally
|
||||
# with open("temp_inputs.pkl", "wb") as f:
|
||||
# pickle.dump({
|
||||
# "input_properties": input_properties,
|
||||
# "materials": materials,
|
||||
# "cleaned": cleaned,
|
||||
# "uprn_filenames": uprn_filenames,
|
||||
# "photo_supply_lookup": photo_supply_lookup,
|
||||
# "floor_area_decile_thresholds": floor_area_decile_thresholds,
|
||||
# "model_client": model_client
|
||||
# }, f)
|
||||
|
||||
logger.info("Getting spatial data")
|
||||
for p in input_properties:
|
||||
p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds, energy_consumption_client)
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ class EnergyConsumptionModel:
|
|||
"low-energy-lighting", "environment-impact-current", "energy-tariff", "current-energy-rating"
|
||||
]
|
||||
|
||||
def __init__(self, cleaned, model_paths=None, n_jobs=1):
|
||||
def __init__(self, cleaned, model_paths=None, dummy_schema_path=None, n_jobs=1):
|
||||
self.cleaned = cleaned
|
||||
self.models = {}
|
||||
self.model_paths = model_paths or {}
|
||||
|
|
@ -75,7 +75,15 @@ class EnergyConsumptionModel:
|
|||
|
||||
if model_paths:
|
||||
for target, path in model_paths.items():
|
||||
# Read model
|
||||
self.models[target] = read_pickle_from_s3(bucket_name="retrofit-model-directory-dev", s3_file_name=path)
|
||||
# Read dummy schema
|
||||
|
||||
if dummy_schema_path:
|
||||
self.dummy_schema = read_pickle_from_s3(
|
||||
bucket_name="retrofit-model-directory-dev",
|
||||
s3_file_name=dummy_schema_path
|
||||
)
|
||||
|
||||
def read_dataset(self, file_path):
|
||||
"""Reads the dataset from the specified file path."""
|
||||
|
|
@ -380,11 +388,13 @@ class EnergyConsumptionModel:
|
|||
bucket_name="retrofit-model-directory-dev",
|
||||
s3_file_name=f"model_directory/energy_consumption_model/{target}_{dataset_version}.pkl"
|
||||
)
|
||||
|
||||
def save_dummy_schema(self, dataset_version):
|
||||
logger.info("Saving dummy schema for target {target}")
|
||||
save_pickle_to_s3(
|
||||
self.dummy_schema,
|
||||
bucket_name="retrofit-model-directory-dev",
|
||||
s3_file_name=f"model_directory/energy_consumption_model/{target}_{dataset_version}_dummy_schema.pkl"
|
||||
s3_file_name=f"model_directory/energy_consumption_model/{dataset_version}_dummy_schema.pkl"
|
||||
)
|
||||
|
||||
def score_new_data(self, new_data, target):
|
||||
|
|
@ -400,16 +410,19 @@ class EnergyConsumptionModel:
|
|||
self.data = new_data.copy()
|
||||
|
||||
# Run feature engineering
|
||||
# TODO: This needs to be dummied out according to the training data
|
||||
self.feature_engineering(drop_first=False)
|
||||
|
||||
# Select the transformed data
|
||||
new_data_transformed = self.data[self.dummy_columns[target]].copy()
|
||||
new_data_transformed = self.data.copy()
|
||||
|
||||
for col in self.dummy_schema:
|
||||
if col not in new_data_transformed.columns:
|
||||
new_data_transformed[col] = 0
|
||||
|
||||
new_data_transformed = new_data_transformed[self.dummy_schema]
|
||||
missed_dummies = [c for c in self.models[target].feature_names_in_ if c not in new_data_transformed.columns]
|
||||
zero_df = pd.DataFrame([dict(zip(missed_dummies, [0, ] * len(missed_dummies)))])
|
||||
|
||||
new_data_transformed = pd.concat([new_data_transformed, zero_df], axis=1)
|
||||
|
||||
# When we dummy in this case, we run with drop_first = False so we may end up with some of those
|
||||
# first columns, we we'll need to dorp them
|
||||
new_data_transformed = new_data_transformed[self.models[target].feature_names_in_]
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ def handler():
|
|||
:return:
|
||||
"""
|
||||
|
||||
dataset_version = "2024-07-05"
|
||||
dataset_version = "2024-07-08"
|
||||
|
||||
# Usage:
|
||||
cleaned = read_from_s3(
|
||||
|
|
@ -23,6 +23,7 @@ def handler():
|
|||
model = EnergyConsumptionModel(cleaned=cleaned, n_jobs=2)
|
||||
model.read_dataset(f'energy_consumption/{dataset_version}/energy_consumption_dataset.parquet')
|
||||
model.feature_engineering()
|
||||
model.save_dummy_schema(dataset_version=dataset_version)
|
||||
|
||||
# For heating_kwh
|
||||
model.split_dataset(target='heating_kwh')
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue