diff --git a/backend/app/plan/columntypes.py b/backend/app/plan/columntypes.py new file mode 100644 index 00000000..5c859300 --- /dev/null +++ b/backend/app/plan/columntypes.py @@ -0,0 +1,32 @@ +columntypes = { + 'UPRN': 'object', 'TOTAL_FLOOR_AREA': 'float64', 'FLOOR_HEIGHT': 'float64', 'PROPERTY_TYPE': 'object', + 'BUILT_FORM': 'object', 'CONSTITUENCY': 'object', 'NUMBER_HABITABLE_ROOMS': 'float64', + 'NUMBER_HEATED_ROOMS': 'float64', 'FIXED_LIGHTING_OUTLETS_COUNT': 'float64', 'FLOOR_LEVEL': 'float64', + 'CONSTRUCTION_AGE_BAND': 'object', 'TRANSACTION_TYPE_STARTING': 'object', + 'WALLS_DESCRIPTION_STARTING': 'object', + 'FLOOR_DESCRIPTION_STARTING': 'object', 'LIGHTING_DESCRIPTION_STARTING': 'object', + 'ROOF_DESCRIPTION_STARTING': 'object', 'MAINHEAT_DESCRIPTION_STARTING': 'object', + 'HOTWATER_DESCRIPTION_STARTING': 'object', 'MAIN_FUEL_STARTING': 'object', + 'MECHANICAL_VENTILATION_STARTING': 'object', + 'SECONDHEAT_DESCRIPTION_STARTING': 'object', 'ENERGY_TARIFF_STARTING': 'object', + 'SOLAR_WATER_HEATING_FLAG_STARTING': 'object', 'PHOTO_SUPPLY_STARTING': 'float64', + 'WINDOWS_DESCRIPTION_STARTING': 'object', 'GLAZED_TYPE_STARTING': 'object', + 'MULTI_GLAZE_PROPORTION_STARTING': 'float64', 'LOW_ENERGY_LIGHTING_STARTING': 'float64', + 'NUMBER_OPEN_FIREPLACES_STARTING': 'float64', 'MAINHEATCONT_DESCRIPTION_STARTING': 'object', + 'EXTENSION_COUNT_STARTING': 'float64', 'LODGEMENT_DATE_STARTING': 'object', + 'TRANSACTION_TYPE_ENDING': 'object', + 'WALLS_DESCRIPTION_ENDING': 'object', 'FLOOR_DESCRIPTION_ENDING': 'object', + 'LIGHTING_DESCRIPTION_ENDING': 'object', + 'ROOF_DESCRIPTION_ENDING': 'object', 'MAINHEAT_DESCRIPTION_ENDING': 'object', + 'HOTWATER_DESCRIPTION_ENDING': 'object', + 'MAIN_FUEL_ENDING': 'object', 'MECHANICAL_VENTILATION_ENDING': 'object', + 'SECONDHEAT_DESCRIPTION_ENDING': 'object', + 'ENERGY_TARIFF_ENDING': 'object', 'SOLAR_WATER_HEATING_FLAG_ENDING': 'object', + 'PHOTO_SUPPLY_ENDING': 'float64', + 'WINDOWS_DESCRIPTION_ENDING': 'object', 'GLAZED_TYPE_ENDING': 'object', + 'MULTI_GLAZE_PROPORTION_ENDING': 'float64', + 'LOW_ENERGY_LIGHTING_ENDING': 'float64', 'NUMBER_OPEN_FIREPLACES_ENDING': 'float64', + 'MAINHEATCONT_DESCRIPTION_ENDING': 'object', 'EXTENSION_COUNT_ENDING': 'float64', + 'LODGEMENT_DATE_ENDING': 'object', + 'id': 'object' +} diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 4669ddfa..bd15525d 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -17,8 +17,7 @@ from sqlalchemy.orm import sessionmaker from sqlalchemy.exc import IntegrityError, OperationalError from datetime import datetime import pandas as pd -from io import BytesIO -import boto3 +import requests # database interaction functions from backend.app.db.functions.property_functions import ( @@ -26,16 +25,20 @@ from backend.app.db.functions.property_functions import ( ) from backend.app.db.functions.materials_functions import get_materials from backend.app.db.functions.recommendations_functions import ( -create_plan, create_plan_recommendations, upload_recommendations + create_plan, create_plan_recommendations, upload_recommendations ) from backend.app.db.functions.portfolio_functions import aggregate_portfolio_recommendations from backend.app.db.connection import db_engine +from backend.app.plan.columntypes import columntypes from model_data.optimiser.GainOptimiser import GainOptimiser from model_data.optimiser.CostOptimiser import CostOptimiser -from backend.app.utils import epc_to_sap_lower_bound +from backend.app.utils import epc_to_sap_lower_bound, save_dataframe_to_s3_parquet, read_parquet_from_s3 from model_data.optimiser.optimiser_functions import prepare_input_measures from model_data.simulation_system.core.DataProcessor import DataProcessor +from model_data.simulation_system.core.Settings import ( + FIXED_FEATURES, COMPONENT_FEATURES, COLUMNS_TO_MERGE_ON +) # TODO: This is placeholder until data is stored in DB from backend.app.plan.uvalue_estimates_walls import uvalue_estimates_walls @@ -133,19 +136,6 @@ def insert_temp_recommendation_id(property_recommendations): return property_recommendations -def read_parquet_from_s3(bucket_name, file_key): - client = boto3.client('s3') - - # Get the object - s3_object = client.get_object(Bucket=bucket_name, Key=file_key) - - # Read the CSV body into a DataFrame - csv_body = s3_object["Body"].read() - df = pd.read_parquet(BytesIO(csv_body)) - - return df - - @router.post("/trigger") async def trigger_plan(body: PlanTriggerRequest): logger.info("Connecting to db") @@ -343,7 +333,6 @@ async def trigger_plan(body: PlanTriggerRequest): recommendations[p.id] = property_recommendations # Finally, we'll prepare data for predicting the impact on SAP - from model_data.simulation_system.core.Settings import FIXED_FEATURES, COMPONENT_FEATURES, COLUMNS_TO_MERGE_ON epc_data = p.data.copy() epc_data = pd.DataFrame([epc_data]) epc_data.columns = [col.upper().replace("-", "_") for col in epc_data.columns] @@ -407,59 +396,43 @@ async def trigger_plan(body: PlanTriggerRequest): data_processor.remap_columns() recommendations_scoring_data = data_processor.data - column_types = data.dtypes.to_dict() - columntypes = {} - for k, v in column_types.items(): - if k not in recommendations_scoring_data.columns: - continue - columntypes[k] = v.name - - columntypes["id"] = "object" - for col in recommendations_scoring_data.columns: - recommendations_scoring_data[col] = recommendations_scoring_data[col].astype(columntypes[col]) - + # Remap column types recommendations_scoring_data = recommendations_scoring_data.astype(columntypes) - - - column_types = {'UPRN': dtype('O'), 'RDSAP_CHANGE': dtype('int64'), 'HEAT_DEMAND_CHANGE': dtype('int64'), - 'TOTAL_FLOOR_AREA': dtype('float64'), 'FLOOR_HEIGHT': dtype('float64'), 'PROPERTY_TYPE': dtype('O'), - 'BUILT_FORM': dtype('O'), 'CONSTITUENCY': dtype('O'), 'NUMBER_HABITABLE_ROOMS': dtype('float64'), - 'NUMBER_HEATED_ROOMS': dtype('float64'), 'FIXED_LIGHTING_OUTLETS_COUNT': dtype('float64'), - 'FLOOR_LEVEL': dtype('float64'), 'CONSTRUCTION_AGE_BAND': dtype('O'), 'TRANSACTION_TYPE_STARTING': dtype('O'), - 'WALLS_DESCRIPTION_STARTING': dtype('O'), 'FLOOR_DESCRIPTION_STARTING': dtype('O'), - 'LIGHTING_DESCRIPTION_STARTING': dtype('O'), 'ROOF_DESCRIPTION_STARTING': dtype('O'), - 'MAINHEAT_DESCRIPTION_STARTING': dtype('O'), 'HOTWATER_DESCRIPTION_STARTING': dtype('O'), - 'MAIN_FUEL_STARTING': dtype('O'), 'MECHANICAL_VENTILATION_STARTING': dtype('O'), - 'SECONDHEAT_DESCRIPTION_STARTING': dtype('O'), 'ENERGY_TARIFF_STARTING': dtype('O'), - 'SOLAR_WATER_HEATING_FLAG_STARTING': dtype('O'), 'PHOTO_SUPPLY_STARTING': dtype('float64'), - 'WINDOWS_DESCRIPTION_STARTING': dtype('O'), 'GLAZED_TYPE_STARTING': dtype('O'), - 'MULTI_GLAZE_PROPORTION_STARTING': dtype('float64'), 'LOW_ENERGY_LIGHTING_STARTING': dtype('float64'), - 'NUMBER_OPEN_FIREPLACES_STARTING': dtype('float64'), 'MAINHEATCONT_DESCRIPTION_STARTING': dtype('O'), - 'EXTENSION_COUNT_STARTING': dtype('float64'), 'LODGEMENT_DATE_STARTING': dtype('O'), - 'TRANSACTION_TYPE_ENDING': dtype('O'), 'WALLS_DESCRIPTION_ENDING': dtype('O'), - 'FLOOR_DESCRIPTION_ENDING': dtype('O'), 'LIGHTING_DESCRIPTION_ENDING': dtype('O'), - 'ROOF_DESCRIPTION_ENDING': dtype('O'), 'MAINHEAT_DESCRIPTION_ENDING': dtype('O'), - 'HOTWATER_DESCRIPTION_ENDING': dtype('O'), 'MAIN_FUEL_ENDING': dtype('O'), - 'MECHANICAL_VENTILATION_ENDING': dtype('O'), 'SECONDHEAT_DESCRIPTION_ENDING': dtype('O'), - 'ENERGY_TARIFF_ENDING': dtype('O'), 'SOLAR_WATER_HEATING_FLAG_ENDING': dtype('O'), - 'PHOTO_SUPPLY_ENDING': dtype('float64'), 'WINDOWS_DESCRIPTION_ENDING': dtype('O'), - 'GLAZED_TYPE_ENDING': dtype('O'), 'MULTI_GLAZE_PROPORTION_ENDING': dtype('float64'), - 'LOW_ENERGY_LIGHTING_ENDING': dtype('float64'), 'NUMBER_OPEN_FIREPLACES_ENDING': dtype('float64'), - 'MAINHEATCONT_DESCRIPTION_ENDING': dtype('O'), 'EXTENSION_COUNT_ENDING': dtype('float64'), - 'LODGEMENT_DATE_ENDING': dtype('O'), 'id': dtype('int64')} - - # Example data file - - - data = read_parquet_from_s3( - bucket_name="retrofit-data-dev", file_key="model_build_data/change_data/rdsap_full/test_data_with_id.parquet" + # Store parquet file in s3 for scoring + created_at = datetime.now().strftime("%Y-%m-%d-%H-%M-%S") + file_location = "sap_change_predictions/{portfolio_id}/{timestamp}.parquet".format( + portfolio_id=body.portfolio_id, + timestamp=created_at ) - data = data.head(5) + save_dataframe_to_s3_parquet( + df=recommendations_scoring_data, + bucket_name="retrofit-data-dev", + file_key=file_location + ) - # We query the sap difference model api to get the estimated impact on sap - for property_id, recommendations in recommendations.items(): + # Call the sap change model + response = requests.post( + url="https://api.dev.hestia.homes/sapmodel/predict", + json={ + "file_location": "s3://retrofit-data-dev/" + file_location, + "property_id": 999, + "portfolio_id": 4, + "created_at": created_at + } + ) + # TODO: Handle the response depending on response code + # Retrieve the predictions + predictions = read_csv_from_s3( + bucket_name="retrofit-sap-predictions-dev", + filepath=f"{body.portfolio_id}/999/{created_at}.csv" + ) + predictions = pd.DataFrame(predictions) + # We round the predictions + predictions["RDSAP_CHANGE"] = predictions["RDSAP_CHANGE"].astype(float).round(0) + # Extract property_id and recommendation_id + predictions[['property_id', 'recommendation_id']] = predictions['id'].str.split('+', expand=True) # 1) the property data # 2) the property details (epc) @@ -481,6 +454,16 @@ async def trigger_plan(body: PlanTriggerRequest): if not recommendations_to_upload: continue + property_predictions = predictions[predictions["property_id"] == str(p.id)] + for rec in recommendations_to_upload: + # Insert the prediction for sap points + rec["sap_points"] = property_predictions[property_predictions["recommendation_id"] == str( + rec["recommendation_id"] + )]["RDSAP_CHANGE"].values[0] + + if not rec["sap_points"]: + raise ValueError("Sap points missing") + # Create a plan new_plan_id = create_plan( session, diff --git a/backend/app/utils.py b/backend/app/utils.py index bc052e74..7099eba1 100644 --- a/backend/app/utils.py +++ b/backend/app/utils.py @@ -4,6 +4,8 @@ from io import StringIO import string import secrets import logging +import pandas as pd +from io import BytesIO def setup_logger(log_file=None, level=logging.INFO, overwrite_handler=False): @@ -117,3 +119,36 @@ def epc_to_sap_lower_bound(epc: str): return 1 else: raise ValueError("EPC rating should be between A and G") + + +def read_parquet_from_s3(bucket_name, file_key): + client = boto3.client('s3') + + # Get the object + s3_object = client.get_object(Bucket=bucket_name, Key=file_key) + + # Read the CSV body into a DataFrame + csv_body = s3_object["Body"].read() + df = pd.read_parquet(BytesIO(csv_body)) + + return df + + +def save_dataframe_to_s3_parquet(df, bucket_name, file_key): + """ + Save a pandas DataFrame to S3 as a Parquet file. + + :param df: The pandas DataFrame. + :param bucket_name: Name of the S3 bucket. + :param file_key: Key of the file (including directory path within the bucket) + """ + + # Convert the DataFrame to a Parquet format in memory + parquet_buffer = BytesIO() + df.to_parquet(parquet_buffer) + + # Create the boto3 client + s3 = boto3.resource('s3') + + # Upload the Parquet file to S3 + s3.Object(bucket_name, file_key).put(Body=parquet_buffer.getvalue()) diff --git a/backend/docker/Dockerfile b/backend/docker/Dockerfile index f23435a0..fd498cdb 100644 --- a/backend/docker/Dockerfile +++ b/backend/docker/Dockerfile @@ -40,6 +40,7 @@ COPY ./model_data/config.py ./model_data/config.py COPY ./model_data/optimiser/ ./model_data/optimiser/ COPY ./model_data/__init__.py ./model_data/__init__.py COPY ./model_data/EpcClean.py ./model_data/EpcClean.py +COPT ./model_data/simulation_system/core/ ./model_data/simulation_system/core/ COPY ./model_data/utils.py ./model_data/utils.py COPY ./model_data/epc_attributes/ ./model_data/epc_attributes/ COPY ./datatypes/ ./datatypes/ diff --git a/model_data/simulation_system/handlers/predictions_app.py b/model_data/simulation_system/handlers/predictions_app.py index 5ea0d997..75d2aff6 100644 --- a/model_data/simulation_system/handlers/predictions_app.py +++ b/model_data/simulation_system/handlers/predictions_app.py @@ -70,11 +70,24 @@ def handler(event, context): s3_file_name=storage_filepath ) - return storage_filepath + return { + "statusCode": 200, + "body": json.dumps({ + "message": "Successfully processed input", + "storage_filepath": storage_filepath + }) + } except (Exception, KeyError, ValueError) as e: logger.info("Prediction failed") logger.info(e) + return { + "statusCode": 500, + "body": json.dumps({ + "message": "Prediction failed", + "error": str(e) + }) + } if __name__ == "__main__": diff --git a/recommendations/FloorRecommendations.py b/recommendations/FloorRecommendations.py index 3d53da69..b111090f 100644 --- a/recommendations/FloorRecommendations.py +++ b/recommendations/FloorRecommendations.py @@ -6,7 +6,7 @@ from backend.Property import Property from recommendations.rdsap_tables import default_wall_thickness, age_band_data from recommendations.recommendation_utils import ( r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns, update_lowest_selected_u_value, - get_recommended_part, get_uvalue_estimate, estimate_sap_points + get_recommended_part, get_uvalue_estimate ) suspended_floor_insulation_parts = [ @@ -323,7 +323,7 @@ class FloorRecommendations(Definitions): "description": self._make_floor_description(part, depth), "starting_u_value": u_value, "new_u_value": new_u_value, - "sap_points": estimate_sap_points(), + "sap_points": None, "cost": estimated_cost, } ) diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py index 570a46d7..9edbe969 100644 --- a/recommendations/WallRecommendations.py +++ b/recommendations/WallRecommendations.py @@ -6,7 +6,7 @@ from backend.Property import Property from model_data.BaseUtility import Definitions from recommendations.recommendation_utils import ( r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns, update_lowest_selected_u_value, - get_recommended_part, get_uvalue_estimate, estimate_sap_points + get_recommended_part, get_uvalue_estimate ) external_wall_insulation_parts = [ @@ -354,7 +354,7 @@ class WallRecommendations(Definitions): "description": "Install " + self._make_description(part, depth), "starting_u_value": u_value, "new_u_value": new_u_value, - "sap_points": estimate_sap_points(), + "sap_points": None, "cost": estimated_cost, } ) @@ -436,7 +436,7 @@ class WallRecommendations(Definitions): ), "starting_u_value": u_value, "new_u_value": combined_new_u_value, - "sap_points": estimate_sap_points(), + "sap_points": None, "cost": ewi_esimtated_cost + iwi_esimtated_cost, } self.recommendations.append(recommendation) diff --git a/recommendations/recommendation_utils.py b/recommendations/recommendation_utils.py index d35befd7..e53aeb17 100644 --- a/recommendations/recommendation_utils.py +++ b/recommendations/recommendation_utils.py @@ -4,15 +4,6 @@ from statistics import mean import random -def estimate_sap_points(): - """ - This is a placeholder function. We will implement the proper version soon - :return: - """ - - return random.sample(range(4, 12), 1)[0] - - def r_value_per_mm_to_u_value(depth_mm: int, r_value_per_mm: float): """ Converts R-value per mm to U-value in W/m²K.