Implementing sap model into the backend, almost complete

This commit is contained in:
Khalim Conn-Kowlessar 2023-09-05 18:29:38 +01:00
parent 02208cbf4a
commit ee15ae8466
8 changed files with 136 additions and 81 deletions

View file

@ -0,0 +1,32 @@
columntypes = {
'UPRN': 'object', 'TOTAL_FLOOR_AREA': 'float64', 'FLOOR_HEIGHT': 'float64', 'PROPERTY_TYPE': 'object',
'BUILT_FORM': 'object', 'CONSTITUENCY': 'object', 'NUMBER_HABITABLE_ROOMS': 'float64',
'NUMBER_HEATED_ROOMS': 'float64', 'FIXED_LIGHTING_OUTLETS_COUNT': 'float64', 'FLOOR_LEVEL': 'float64',
'CONSTRUCTION_AGE_BAND': 'object', 'TRANSACTION_TYPE_STARTING': 'object',
'WALLS_DESCRIPTION_STARTING': 'object',
'FLOOR_DESCRIPTION_STARTING': 'object', 'LIGHTING_DESCRIPTION_STARTING': 'object',
'ROOF_DESCRIPTION_STARTING': 'object', 'MAINHEAT_DESCRIPTION_STARTING': 'object',
'HOTWATER_DESCRIPTION_STARTING': 'object', 'MAIN_FUEL_STARTING': 'object',
'MECHANICAL_VENTILATION_STARTING': 'object',
'SECONDHEAT_DESCRIPTION_STARTING': 'object', 'ENERGY_TARIFF_STARTING': 'object',
'SOLAR_WATER_HEATING_FLAG_STARTING': 'object', 'PHOTO_SUPPLY_STARTING': 'float64',
'WINDOWS_DESCRIPTION_STARTING': 'object', 'GLAZED_TYPE_STARTING': 'object',
'MULTI_GLAZE_PROPORTION_STARTING': 'float64', 'LOW_ENERGY_LIGHTING_STARTING': 'float64',
'NUMBER_OPEN_FIREPLACES_STARTING': 'float64', 'MAINHEATCONT_DESCRIPTION_STARTING': 'object',
'EXTENSION_COUNT_STARTING': 'float64', 'LODGEMENT_DATE_STARTING': 'object',
'TRANSACTION_TYPE_ENDING': 'object',
'WALLS_DESCRIPTION_ENDING': 'object', 'FLOOR_DESCRIPTION_ENDING': 'object',
'LIGHTING_DESCRIPTION_ENDING': 'object',
'ROOF_DESCRIPTION_ENDING': 'object', 'MAINHEAT_DESCRIPTION_ENDING': 'object',
'HOTWATER_DESCRIPTION_ENDING': 'object',
'MAIN_FUEL_ENDING': 'object', 'MECHANICAL_VENTILATION_ENDING': 'object',
'SECONDHEAT_DESCRIPTION_ENDING': 'object',
'ENERGY_TARIFF_ENDING': 'object', 'SOLAR_WATER_HEATING_FLAG_ENDING': 'object',
'PHOTO_SUPPLY_ENDING': 'float64',
'WINDOWS_DESCRIPTION_ENDING': 'object', 'GLAZED_TYPE_ENDING': 'object',
'MULTI_GLAZE_PROPORTION_ENDING': 'float64',
'LOW_ENERGY_LIGHTING_ENDING': 'float64', 'NUMBER_OPEN_FIREPLACES_ENDING': 'float64',
'MAINHEATCONT_DESCRIPTION_ENDING': 'object', 'EXTENSION_COUNT_ENDING': 'float64',
'LODGEMENT_DATE_ENDING': 'object',
'id': 'object'
}

View file

@ -17,8 +17,7 @@ from sqlalchemy.orm import sessionmaker
from sqlalchemy.exc import IntegrityError, OperationalError
from datetime import datetime
import pandas as pd
from io import BytesIO
import boto3
import requests
# database interaction functions
from backend.app.db.functions.property_functions import (
@ -26,16 +25,20 @@ from backend.app.db.functions.property_functions import (
)
from backend.app.db.functions.materials_functions import get_materials
from backend.app.db.functions.recommendations_functions import (
create_plan, create_plan_recommendations, upload_recommendations
create_plan, create_plan_recommendations, upload_recommendations
)
from backend.app.db.functions.portfolio_functions import aggregate_portfolio_recommendations
from backend.app.db.connection import db_engine
from backend.app.plan.columntypes import columntypes
from model_data.optimiser.GainOptimiser import GainOptimiser
from model_data.optimiser.CostOptimiser import CostOptimiser
from backend.app.utils import epc_to_sap_lower_bound
from backend.app.utils import epc_to_sap_lower_bound, save_dataframe_to_s3_parquet, read_parquet_from_s3
from model_data.optimiser.optimiser_functions import prepare_input_measures
from model_data.simulation_system.core.DataProcessor import DataProcessor
from model_data.simulation_system.core.Settings import (
FIXED_FEATURES, COMPONENT_FEATURES, COLUMNS_TO_MERGE_ON
)
# TODO: This is placeholder until data is stored in DB
from backend.app.plan.uvalue_estimates_walls import uvalue_estimates_walls
@ -133,19 +136,6 @@ def insert_temp_recommendation_id(property_recommendations):
return property_recommendations
def read_parquet_from_s3(bucket_name, file_key):
client = boto3.client('s3')
# Get the object
s3_object = client.get_object(Bucket=bucket_name, Key=file_key)
# Read the CSV body into a DataFrame
csv_body = s3_object["Body"].read()
df = pd.read_parquet(BytesIO(csv_body))
return df
@router.post("/trigger")
async def trigger_plan(body: PlanTriggerRequest):
logger.info("Connecting to db")
@ -343,7 +333,6 @@ async def trigger_plan(body: PlanTriggerRequest):
recommendations[p.id] = property_recommendations
# Finally, we'll prepare data for predicting the impact on SAP
from model_data.simulation_system.core.Settings import FIXED_FEATURES, COMPONENT_FEATURES, COLUMNS_TO_MERGE_ON
epc_data = p.data.copy()
epc_data = pd.DataFrame([epc_data])
epc_data.columns = [col.upper().replace("-", "_") for col in epc_data.columns]
@ -407,59 +396,43 @@ async def trigger_plan(body: PlanTriggerRequest):
data_processor.remap_columns()
recommendations_scoring_data = data_processor.data
column_types = data.dtypes.to_dict()
columntypes = {}
for k, v in column_types.items():
if k not in recommendations_scoring_data.columns:
continue
columntypes[k] = v.name
columntypes["id"] = "object"
for col in recommendations_scoring_data.columns:
recommendations_scoring_data[col] = recommendations_scoring_data[col].astype(columntypes[col])
# Remap column types
recommendations_scoring_data = recommendations_scoring_data.astype(columntypes)
column_types = {'UPRN': dtype('O'), 'RDSAP_CHANGE': dtype('int64'), 'HEAT_DEMAND_CHANGE': dtype('int64'),
'TOTAL_FLOOR_AREA': dtype('float64'), 'FLOOR_HEIGHT': dtype('float64'), 'PROPERTY_TYPE': dtype('O'),
'BUILT_FORM': dtype('O'), 'CONSTITUENCY': dtype('O'), 'NUMBER_HABITABLE_ROOMS': dtype('float64'),
'NUMBER_HEATED_ROOMS': dtype('float64'), 'FIXED_LIGHTING_OUTLETS_COUNT': dtype('float64'),
'FLOOR_LEVEL': dtype('float64'), 'CONSTRUCTION_AGE_BAND': dtype('O'), 'TRANSACTION_TYPE_STARTING': dtype('O'),
'WALLS_DESCRIPTION_STARTING': dtype('O'), 'FLOOR_DESCRIPTION_STARTING': dtype('O'),
'LIGHTING_DESCRIPTION_STARTING': dtype('O'), 'ROOF_DESCRIPTION_STARTING': dtype('O'),
'MAINHEAT_DESCRIPTION_STARTING': dtype('O'), 'HOTWATER_DESCRIPTION_STARTING': dtype('O'),
'MAIN_FUEL_STARTING': dtype('O'), 'MECHANICAL_VENTILATION_STARTING': dtype('O'),
'SECONDHEAT_DESCRIPTION_STARTING': dtype('O'), 'ENERGY_TARIFF_STARTING': dtype('O'),
'SOLAR_WATER_HEATING_FLAG_STARTING': dtype('O'), 'PHOTO_SUPPLY_STARTING': dtype('float64'),
'WINDOWS_DESCRIPTION_STARTING': dtype('O'), 'GLAZED_TYPE_STARTING': dtype('O'),
'MULTI_GLAZE_PROPORTION_STARTING': dtype('float64'), 'LOW_ENERGY_LIGHTING_STARTING': dtype('float64'),
'NUMBER_OPEN_FIREPLACES_STARTING': dtype('float64'), 'MAINHEATCONT_DESCRIPTION_STARTING': dtype('O'),
'EXTENSION_COUNT_STARTING': dtype('float64'), 'LODGEMENT_DATE_STARTING': dtype('O'),
'TRANSACTION_TYPE_ENDING': dtype('O'), 'WALLS_DESCRIPTION_ENDING': dtype('O'),
'FLOOR_DESCRIPTION_ENDING': dtype('O'), 'LIGHTING_DESCRIPTION_ENDING': dtype('O'),
'ROOF_DESCRIPTION_ENDING': dtype('O'), 'MAINHEAT_DESCRIPTION_ENDING': dtype('O'),
'HOTWATER_DESCRIPTION_ENDING': dtype('O'), 'MAIN_FUEL_ENDING': dtype('O'),
'MECHANICAL_VENTILATION_ENDING': dtype('O'), 'SECONDHEAT_DESCRIPTION_ENDING': dtype('O'),
'ENERGY_TARIFF_ENDING': dtype('O'), 'SOLAR_WATER_HEATING_FLAG_ENDING': dtype('O'),
'PHOTO_SUPPLY_ENDING': dtype('float64'), 'WINDOWS_DESCRIPTION_ENDING': dtype('O'),
'GLAZED_TYPE_ENDING': dtype('O'), 'MULTI_GLAZE_PROPORTION_ENDING': dtype('float64'),
'LOW_ENERGY_LIGHTING_ENDING': dtype('float64'), 'NUMBER_OPEN_FIREPLACES_ENDING': dtype('float64'),
'MAINHEATCONT_DESCRIPTION_ENDING': dtype('O'), 'EXTENSION_COUNT_ENDING': dtype('float64'),
'LODGEMENT_DATE_ENDING': dtype('O'), 'id': dtype('int64')}
# Example data file
data = read_parquet_from_s3(
bucket_name="retrofit-data-dev", file_key="model_build_data/change_data/rdsap_full/test_data_with_id.parquet"
# Store parquet file in s3 for scoring
created_at = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
file_location = "sap_change_predictions/{portfolio_id}/{timestamp}.parquet".format(
portfolio_id=body.portfolio_id,
timestamp=created_at
)
data = data.head(5)
save_dataframe_to_s3_parquet(
df=recommendations_scoring_data,
bucket_name="retrofit-data-dev",
file_key=file_location
)
# We query the sap difference model api to get the estimated impact on sap
for property_id, recommendations in recommendations.items():
# Call the sap change model
response = requests.post(
url="https://api.dev.hestia.homes/sapmodel/predict",
json={
"file_location": "s3://retrofit-data-dev/" + file_location,
"property_id": 999,
"portfolio_id": 4,
"created_at": created_at
}
)
# TODO: Handle the response depending on response code
# Retrieve the predictions
predictions = read_csv_from_s3(
bucket_name="retrofit-sap-predictions-dev",
filepath=f"{body.portfolio_id}/999/{created_at}.csv"
)
predictions = pd.DataFrame(predictions)
# We round the predictions
predictions["RDSAP_CHANGE"] = predictions["RDSAP_CHANGE"].astype(float).round(0)
# Extract property_id and recommendation_id
predictions[['property_id', 'recommendation_id']] = predictions['id'].str.split('+', expand=True)
# 1) the property data
# 2) the property details (epc)
@ -481,6 +454,16 @@ async def trigger_plan(body: PlanTriggerRequest):
if not recommendations_to_upload:
continue
property_predictions = predictions[predictions["property_id"] == str(p.id)]
for rec in recommendations_to_upload:
# Insert the prediction for sap points
rec["sap_points"] = property_predictions[property_predictions["recommendation_id"] == str(
rec["recommendation_id"]
)]["RDSAP_CHANGE"].values[0]
if not rec["sap_points"]:
raise ValueError("Sap points missing")
# Create a plan
new_plan_id = create_plan(
session,

View file

@ -4,6 +4,8 @@ from io import StringIO
import string
import secrets
import logging
import pandas as pd
from io import BytesIO
def setup_logger(log_file=None, level=logging.INFO, overwrite_handler=False):
@ -117,3 +119,36 @@ def epc_to_sap_lower_bound(epc: str):
return 1
else:
raise ValueError("EPC rating should be between A and G")
def read_parquet_from_s3(bucket_name, file_key):
client = boto3.client('s3')
# Get the object
s3_object = client.get_object(Bucket=bucket_name, Key=file_key)
# Read the CSV body into a DataFrame
csv_body = s3_object["Body"].read()
df = pd.read_parquet(BytesIO(csv_body))
return df
def save_dataframe_to_s3_parquet(df, bucket_name, file_key):
"""
Save a pandas DataFrame to S3 as a Parquet file.
:param df: The pandas DataFrame.
:param bucket_name: Name of the S3 bucket.
:param file_key: Key of the file (including directory path within the bucket)
"""
# Convert the DataFrame to a Parquet format in memory
parquet_buffer = BytesIO()
df.to_parquet(parquet_buffer)
# Create the boto3 client
s3 = boto3.resource('s3')
# Upload the Parquet file to S3
s3.Object(bucket_name, file_key).put(Body=parquet_buffer.getvalue())

View file

@ -40,6 +40,7 @@ COPY ./model_data/config.py ./model_data/config.py
COPY ./model_data/optimiser/ ./model_data/optimiser/
COPY ./model_data/__init__.py ./model_data/__init__.py
COPY ./model_data/EpcClean.py ./model_data/EpcClean.py
COPT ./model_data/simulation_system/core/ ./model_data/simulation_system/core/
COPY ./model_data/utils.py ./model_data/utils.py
COPY ./model_data/epc_attributes/ ./model_data/epc_attributes/
COPY ./datatypes/ ./datatypes/

View file

@ -70,11 +70,24 @@ def handler(event, context):
s3_file_name=storage_filepath
)
return storage_filepath
return {
"statusCode": 200,
"body": json.dumps({
"message": "Successfully processed input",
"storage_filepath": storage_filepath
})
}
except (Exception, KeyError, ValueError) as e:
logger.info("Prediction failed")
logger.info(e)
return {
"statusCode": 500,
"body": json.dumps({
"message": "Prediction failed",
"error": str(e)
})
}
if __name__ == "__main__":

View file

@ -6,7 +6,7 @@ from backend.Property import Property
from recommendations.rdsap_tables import default_wall_thickness, age_band_data
from recommendations.recommendation_utils import (
r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns, update_lowest_selected_u_value,
get_recommended_part, get_uvalue_estimate, estimate_sap_points
get_recommended_part, get_uvalue_estimate
)
suspended_floor_insulation_parts = [
@ -323,7 +323,7 @@ class FloorRecommendations(Definitions):
"description": self._make_floor_description(part, depth),
"starting_u_value": u_value,
"new_u_value": new_u_value,
"sap_points": estimate_sap_points(),
"sap_points": None,
"cost": estimated_cost,
}
)

View file

@ -6,7 +6,7 @@ from backend.Property import Property
from model_data.BaseUtility import Definitions
from recommendations.recommendation_utils import (
r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns, update_lowest_selected_u_value,
get_recommended_part, get_uvalue_estimate, estimate_sap_points
get_recommended_part, get_uvalue_estimate
)
external_wall_insulation_parts = [
@ -354,7 +354,7 @@ class WallRecommendations(Definitions):
"description": "Install " + self._make_description(part, depth),
"starting_u_value": u_value,
"new_u_value": new_u_value,
"sap_points": estimate_sap_points(),
"sap_points": None,
"cost": estimated_cost,
}
)
@ -436,7 +436,7 @@ class WallRecommendations(Definitions):
),
"starting_u_value": u_value,
"new_u_value": combined_new_u_value,
"sap_points": estimate_sap_points(),
"sap_points": None,
"cost": ewi_esimtated_cost + iwi_esimtated_cost,
}
self.recommendations.append(recommendation)

View file

@ -4,15 +4,6 @@ from statistics import mean
import random
def estimate_sap_points():
"""
This is a placeholder function. We will implement the proper version soon
:return:
"""
return random.sample(range(4, 12), 1)[0]
def r_value_per_mm_to_u_value(depth_mm: int, r_value_per_mm: float):
"""
Converts R-value per mm to U-value in W/m²K.