mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
created distributed scoring for prediction
This commit is contained in:
parent
0fbf004512
commit
7b080094fd
1 changed files with 30 additions and 16 deletions
|
|
@ -1166,10 +1166,9 @@ def get_epc_data(
|
|||
]
|
||||
|
||||
# condition 1 - identified for gbis and not eligible
|
||||
condition_1 = (
|
||||
identified_for_gbis and not eligibility.gbis_warmfront and not eligibility.eco4_warmfront[
|
||||
"eligible"]
|
||||
) & consider_penultimate_epc
|
||||
condition_1 = (identified_for_gbis and not eligibility.gbis_warmfront
|
||||
and not eligibility.eco4_warmfront["eligible"]
|
||||
) & consider_penultimate_epc
|
||||
|
||||
# condition 2 - identified for eco4 and not eligible
|
||||
condition_2 = (identified_for_eco4 and not eligibility.eco4_warmfront[
|
||||
|
|
@ -1246,10 +1245,12 @@ def get_epc_data(
|
|||
"uprn": eligibility.epc["uprn"],
|
||||
"is_estimated": searcher.newest_epc.get("estimated") is not None,
|
||||
"property_type": eligibility.epc["property-type"],
|
||||
"gbis_eligible": eligibility.gbis_warmfront,
|
||||
"eco4_eligible": eligibility.eco4_warmfront["eligible"],
|
||||
"eco4_message": eligibility.eco4_warmfront["message"],
|
||||
"eco4_strict": eligibility.eco4_warmfront["strict"],
|
||||
"gbis_eligible": eligibility.gbis_warmfront["eligible"],
|
||||
"gbis_message": eligibility.gbis_warmfront["message"],
|
||||
"gbis_strict": eligibility.gbis_warmfront["strict"],
|
||||
"sap": float(eligibility.epc["current-energy-efficiency"]),
|
||||
# Property components
|
||||
"roof": eligibility.roof["clean_description"],
|
||||
|
|
@ -1279,24 +1280,32 @@ def get_epc_data(
|
|||
)
|
||||
|
||||
model_api = ModelApi(portfolio_id="-".join([ha_name, "eligibility"]), timestamp=created_at)
|
||||
model_api.MODEL_PREFIXES = ["sap_change_predictions"]
|
||||
|
||||
all_predictions = model_api.predict_all(
|
||||
df=scoring_df,
|
||||
bucket="retrofit-data-dev",
|
||||
prediction_buckets={
|
||||
"sap_change_predictions": "retrofit-sap-predictions-dev",
|
||||
"heat_demand_predictions": "retrofit-heat-predictions-dev",
|
||||
"carbon_change_predictions": "retrofit-carbon-predictions-dev"
|
||||
}
|
||||
)
|
||||
scoring_df["id"] = scoring_df["id"] + "phase=0"
|
||||
# We split up the scoring_df and score
|
||||
predictions = []
|
||||
to_loop_over = range(0, scoring_df.shape[0], 400)
|
||||
for chunk in tqdm(to_loop_over, total=len(to_loop_over)):
|
||||
predictions_dict = model_api.predict_all(
|
||||
df=scoring_df.iloc[chunk:chunk + 400],
|
||||
bucket="retrofit-data-dev",
|
||||
prediction_buckets={
|
||||
"sap_change_predictions": "retrofit-sap-predictions-dev",
|
||||
}
|
||||
)
|
||||
|
||||
predictions = all_predictions["sap_change_predictions"].copy()
|
||||
predictions.append(predictions_dict["sap_change_predictions"])
|
||||
|
||||
predictions = pd.concat(predictions)
|
||||
predictions_size = predictions.shape[0]
|
||||
|
||||
predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
|
||||
results_df[["row_id", "sap"]], how="left", on="row_id"
|
||||
)
|
||||
if predictions.shape[0] != predictions_size:
|
||||
raise ValueError("Predictions size has changed")
|
||||
predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
|
||||
predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
|
||||
|
||||
results_df = results_df.merge(
|
||||
predictions[["sap_uplift", "row_id"]],
|
||||
|
|
@ -1339,9 +1348,12 @@ def get_epc_data(
|
|||
|
||||
eligibility_assessment = pd.DataFrame(eligibility_assessment)
|
||||
|
||||
# Make sure the results haven't changed in size
|
||||
results_df = results_df.merge(
|
||||
eligibility_assessment, how="left", on="row_id"
|
||||
)
|
||||
if results_df.shape[0] != len(results):
|
||||
raise ValueError("results has changed size")
|
||||
|
||||
# We store the results in S3 as a pickle
|
||||
save_pickle_to_s3(
|
||||
|
|
@ -1809,6 +1821,8 @@ def app():
|
|||
loader.load()
|
||||
loader.ha_facts_and_figures()
|
||||
|
||||
loader.facts_and_figures.to_csv("facts_and_figures.csv", index=False)
|
||||
|
||||
# We load in the additional data required to perform the analysis
|
||||
cleaned = read_from_s3(
|
||||
s3_file_name="cleaned_epc_data/cleaned.bson",
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue