Model/backend/app/plan/router.py

from datetime import datetime

import numpy as np
import pandas as pd
from backend.SearchEpc import SearchEpc
from fastapi import APIRouter, Depends
from sqlalchemy.exc import IntegrityError, OperationalError
from sqlalchemy.orm import sessionmaker
from starlette.responses import Response

from backend.app.config import get_settings
from backend.app.db.connection import db_engine
from backend.app.db.functions.materials_functions import get_materials
from backend.app.db.functions.portfolio_functions import aggregate_portfolio_recommendations
from backend.app.db.functions.property_functions import (
    create_property, create_property_details_epc, create_property_targets, update_property_data,
    update_or_create_property_spatial_details
)
from backend.app.db.functions.recommendations_functions import (
    create_plan, create_plan_recommendations, upload_recommendations
)
from backend.app.db.models.portfolio import rating_lookup
from backend.app.dependencies import validate_token
from backend.app.plan.schemas import PlanTriggerRequest
from backend.app.plan.utils import create_recommendation_scoring_data, get_cleaned
from backend.app.utils import epc_to_sap_lower_bound, read_csv_from_s3, read_parquet_from_s3, sap_to_epc

from backend.ml_models.api import ModelApi
from backend.Property import Property
from etl.epc.DataProcessor import DataProcessor
from etl.epc.settings import COLUMNS_TO_MERGE_ON
from recommendations.optimiser.CostOptimiser import CostOptimiser
from recommendations.optimiser.GainOptimiser import GainOptimiser
from recommendations.optimiser.optimiser_functions import prepare_input_measures
from recommendations.Recommendations import Recommendations
from utils.logger import setup_logger
from utils.s3 import read_dataframe_from_s3_parquet
from backend.ml_models.Valuation import PropertyValuation
from backend.ml_models.AnnualBillSavings import AnnualBillSavings

logger = setup_logger()

BATCH_SIZE = 5

router = APIRouter(
    prefix="/plan",
    tags=["plan"],
    dependencies=[Depends(validate_token)],
    responses={404: {"description": "Not found"}}
)


@router.post("/trigger")
async def trigger_plan(body: PlanTriggerRequest):
    logger.info("Connecting to db")
    session = sessionmaker(bind=db_engine)()
    created_at = datetime.now().isoformat()

    try:
        session.begin()
        logger.info("Getting the inputs")
        plan_input = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path)

        uprn_filenames = read_dataframe_from_s3_parquet(
            bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet"
        )
        cleaning_data = read_parquet_from_s3(
            bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
        )

        input_properties = []
        for config in plan_input:
            # We validate each record in the file. If the record is NOT valid, we need to handle this accordingly

            epc_searcher = SearchEpc(
                address1=config["address"],
                postcode=config["postcode"],
                auth_token=get_settings().EPC_AUTH_TOKEN,
                os_api_key=get_settings().ORDNANCE_SURVEY_API_KEY
            )
            epc_searcher.find_property()
            # Create a record in db
            property_id, is_new = create_property(
                session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn
            )
            # if a new record was not created, we don't produduce recommendations
            if not is_new:
                continue

            create_property_targets(
                session,
                property_id=property_id,
                portfolio_id=body.portfolio_id,
                epc_target=body.goal_value,
                heat_demand_target=None
            )

            input_properties.append(
                Property(
                    id=property_id,
                    address=epc_searcher.address_clean,
                    postcode=epc_searcher.postcode_clean,
                    data=epc_searcher.newest_epc,
                    old_data=epc_searcher.older_epcs,
                    full_sap_epc=epc_searcher.full_sap_epc,
                )
            )

        if not input_properties:
            return Response(status_code=204)

        logger.info("Getting spatial data")
        for p in input_properties:
            p.get_spatial_data(uprn_filenames)

        # The materials data could be cached or local so we don't need to make
        # consistent requests to the backend for
        # the same data
        logger.info("Reading in materials and cleaned datasets")
        materials = get_materials(session)
        cleaned = get_cleaned()

        logger.info("Getting components and epc recommendations")

        recommendations = {}
        recommendations_scoring_data = []
        property_scoring_data = {}

        for p in input_properties:

            # Property recommendations
            p.get_components(cleaned)

            # This is temp - this should happen after scoring
            cleaned_property_data = DataProcessor.apply_averages_cleaning(
                data_to_clean=pd.DataFrame([dict(**p.get_model_data(), LOCAL_AUTHORITY=p.data["local-authority"])]),
                cleaning_data=cleaning_data,
                cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
            )
            p.set_number_lighting_outlets(cleaned_property_data)

            recommender = Recommendations(property_instance=p, materials=materials)
            property_recommendations = recommender.recommend()

            if not property_recommendations:
                continue

            recommendations[p.id] = property_recommendations

            # Finally, we'll prepare data for predicting the impact on SAP
            data_processor = DataProcessor(None, newdata=True)
            data_processor.insert_data(pd.DataFrame([p.get_model_data()]))

            data_processor.pre_process()

            starting_epc_data = data_processor.get_component_features(suffix="_STARTING")
            ending_epc_data = data_processor.get_component_features(suffix="_ENDING")
            fixed_data = data_processor.get_fixed_features()

            # We update the ending record with the recommended updates and we set lodgement date to today
            ending_epc_data["DAYS_TO_ENDING"] = data_processor.calculate_days_to(created_at)

            property_scoring_data[p.id] = {
                "starting_epc_data": starting_epc_data,
                "ending_epc_data": ending_epc_data,
                "fixed_data": fixed_data
            }

            for recommendations_by_type in property_recommendations:
                for i, rec in enumerate(recommendations_by_type):
                    scoring_dict = create_recommendation_scoring_data(
                        property=p,
                        recommendation=rec,
                        starting_epc_data=starting_epc_data,
                        ending_epc_data=ending_epc_data,
                        fixed_data=fixed_data,
                    )

                    recommendations_scoring_data.append(scoring_dict)

        # cleanup
        del data_processor

        logger.info("Preparing data for scoring in sap change api")
        recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)

        # Perform the same cleaning as in the model - first clean number of room variables though
        recommendations_scoring_data = DataProcessor.apply_averages_cleaning(
            data_to_clean=recommendations_scoring_data,
            cleaning_data=cleaning_data,
            cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
            colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
        )

        recommendations_scoring_data = DataProcessor.apply_averages_cleaning(
            data_to_clean=recommendations_scoring_data,
            cleaning_data=cleaning_data,
            cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
        ).drop(columns=["LOCAL_AUTHORITY"])

        recommendations_scoring_data = DataProcessor.clean_missings_after_description_process(
            recommendations_scoring_data,
            ignore_cols=[c for c in recommendations_scoring_data.columns if ("thermal_transmittance" in c) or (
                "insulation_thickness" in c) or ("ENERGY_EFF" in c)]
        )

        recommendations_scoring_data = DataProcessor.clean_efficiency_variables(recommendations_scoring_data)

        model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
        all_predictions = model_api.predict_all(
            df=recommendations_scoring_data,
            bucket=get_settings().DATA_BUCKET,
            prediction_buckets={
                "sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET,
                "heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET,
                "carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET
            }
        )

        # Insert the predictions into the recommendations and run the optimiser
        logger.info("Optimising recommendations")
        for property_id in recommendations.keys():

            property_instance = [p for p in input_properties if p.id == property_id][0]

            recommendations_with_impact = Recommendations.calculate_recommendation_impact(
                property_instance=property_instance,
                all_predictions=all_predictions,
                recommendations=recommendations
            )

            input_measures = prepare_input_measures(recommendations_with_impact, body.goal)

            if body.budget:
                optimiser = GainOptimiser(input_measures, max_cost=body.budget)
            else:
                # The minimum gain is the minimum number of SAP points required to get to the target SAP band
                current_sap_points = int(property_instance.data["current-energy-efficiency"])
                target_sap_points = epc_to_sap_lower_bound(body.goal_value)

                # If the gain is negative, the optimiser will return an empty solution
                optimiser = CostOptimiser(
                    input_measures,
                    min_gain=CostOptimiser.calculate_sap_gain_with_slack(target_sap_points - current_sap_points)
                )

            optimiser.setup()
            optimiser.solve()
            solution = optimiser.solution

            selected_recommendations = {r["id"] for r in solution}

            # If wall ventilation is selected, we also include mechanical ventilation as a best practice measure
            if any(x in [r["type"] for r in solution] for x in [
                "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation"
            ]):
                ventilation_rec = [
                    r for r in recommendations_with_impact if r[0]["type"] == "mechanical_ventilation"
                ][0]

                selected_recommendations = set(
                    list(selected_recommendations) + [ventilation_rec[0]["recommendation_id"]]
                )

            # We check if the selected recommendation is wall ventilation and if so, we make sure
            # mechanical ventilation is selected

            # We'll use the set of selected recommendations to filter the recommendations to upload
            final_recommendations = [
                [
                    {**rec, "default": True if rec["recommendation_id"] in selected_recommendations else False}
                    for rec in recommendations_by_type
                ]
                for recommendations_by_type in recommendations_with_impact
            ]

            # We'll also unlist the recommendations so they're a bit easier to handle from here onwards
            final_recommendations = [
                rec for recommendations_by_type in final_recommendations for rec in recommendations_by_type
            ]
            recommendations[property_id] = final_recommendations

        # This is a temporary step, to estimate the impact of the measured on heat demand and carbon
        # TODO: This needs to be cleaned up, if it happens to be kept
        combined_recommendations_scoring_data = []
        representative_recs = {}
        for property_id, property_recommendations in recommendations.items():
            default_recommendations = [r for r in property_recommendations if r["default"]]
            default_types = {x["type"] for x in default_recommendations}

            # Missing types
            missing_types = list(set([r["type"] for r in property_recommendations if r["type"] not in default_types]))
            # We might have a missing type as one of the solid wall options because for a solid wall, you might
            # have ewi or iwi but only one of them will be a default
            if ("internal_wall_insulation" in default_types) or ("external_wall_insaultion" in default_types):
                missing_types = [
                    t for t in missing_types if t not in ["internal_wall_insulation", "external_wall_insulation"]
                ]

            # We check if NO wall insulation was selected but iwi and ewi are available
            # This condition will check
            # 1) iwi and ewi are both in missing_types
            # 2) iwi and ewi are not in default_types
            # If both of these are true, it means that no wall insulation was selected via the optimisation routine
            # but both are possible, so we need to select a default. We default to iwi because it's usually cheaper
            if (("internal_wall_insulation" in missing_types) and ("external_wall_insulation" in missing_types)) and (
                ("internal_wall_insulation" not in default_types) and ("external_wall_insulation" not in default_types)
            ):
                missing_types = [t for t in missing_types if t != "external_wall_insulation"]

            if missing_types:
                for missed_type in missing_types:
                    missed = [r for r in property_recommendations if r["type"] == missed_type]
                    min_cost = min([r["total"] for r in missed])
                    # Grab a representative, based on cheapest cost

                    representative_rec = [r for r in property_recommendations if np.isclose(r["total"], min_cost)]
                    default_recommendations.append(representative_rec[0])

            representative_recs[property_id] = default_recommendations

            property_instance = [p for p in input_properties if p.id == property_id][0]

            property_scoring_datasets = property_scoring_data[property_id]
            starting_epc_data = property_scoring_datasets["starting_epc_data"].copy()
            ending_epc_data = property_scoring_datasets["ending_epc_data"].copy()
            fixed_data = property_scoring_datasets["fixed_data"].copy()

            scoring_dict = {}
            for rec in default_recommendations:
                scoring_dict = create_recommendation_scoring_data(
                    property=property_instance,
                    recommendation=rec,
                    starting_epc_data=starting_epc_data,
                    ending_epc_data=ending_epc_data,
                    fixed_data=fixed_data,
                )
                # At each iteration, we want to update the ending_epc_data, so in the end, ending_epc_data contains
                # all of the updates
                for k in scoring_dict.keys():
                    if k in ending_epc_data.columns:
                        ending_epc_data[k] = scoring_dict[k]

            combined_recommendations_scoring_data.append(scoring_dict)

        # PERFORM SAME STEPS AGAIN - TODO: TO BE REMOVED
        combined_recommendations_scoring_data = pd.DataFrame(combined_recommendations_scoring_data)

        # Perform the same cleaning as in the model - first clean number of room variables though
        combined_recommendations_scoring_data = DataProcessor.apply_averages_cleaning(
            data_to_clean=combined_recommendations_scoring_data,
            cleaning_data=cleaning_data,
            cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
            colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
        )

        combined_recommendations_scoring_data = DataProcessor.apply_averages_cleaning(
            data_to_clean=combined_recommendations_scoring_data,
            cleaning_data=cleaning_data,
            cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
        ).drop(columns=["LOCAL_AUTHORITY"])

        combined_recommendations_scoring_data = DataProcessor.clean_missings_after_description_process(
            combined_recommendations_scoring_data,
            ignore_cols=[
                c for c in combined_recommendations_scoring_data.columns if ("thermal_transmittance" in c) or (
                    "insulation_thickness" in c) or ("ENERGY_EFF" in c)
            ]
        )

        combined_recommendations_scoring_data = DataProcessor.clean_efficiency_variables(
            combined_recommendations_scoring_data
        )

        model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
        all_combined_predictions = model_api.predict_all(
            df=combined_recommendations_scoring_data,
            bucket=get_settings().DATA_BUCKET,
            prediction_buckets={
                "sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET,
                "heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET,
                "carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET
            }
        )

        # We update the carbon and heat demand predictions
        for property_id, property_recommendations in recommendations.items():
            combined_heat_demand = all_combined_predictions["heat_demand_predictions"]
            combined_heat_demand = combined_heat_demand[combined_heat_demand["property_id"] == str(property_id)]

            combined_carbon = all_combined_predictions["carbon_change_predictions"]
            combined_carbon = combined_carbon[combined_carbon["property_id"] == str(property_id)]

            property_instance = [p for p in input_properties if p.id == property_id][0]

            carbon_change = float(
                property_instance.data["co2-emissions-current"]
            ) - combined_carbon["predictions"].values[0]

            starting_heat_demand = (
                float(property_instance.data["energy-consumption-current"]) * property_instance.floor_area
            )
            expected_heat_demand = starting_heat_demand - (
                combined_heat_demand["predictions"].values[0] * property_instance.floor_area
            )

            # We don't want to adjust the heat demand for mechanical ventilation so we add it back on

            # We adjust the heat demand figures to align to the UCL paper
            current_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered(
                epc_energy_consumption=starting_heat_demand,
                current_epc_rating=property_instance.data["current-energy-rating"],
            )

            # We sum up the SAP points of the default recommendations and calculate a new EPC category. This
            # category is then used to produce adjusted energy figures

            expected_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered(
                epc_energy_consumption=expected_heat_demand,
                current_epc_rating=property_instance.data["current-energy-rating"],
            )

            heat_demand_change = (
                current_adjusted_energy - expected_adjusted_energy
            )

            # update the recommendations
            # We need to totals for the representative recommendations
            representative_rec_data = [
                {
                    "recommendation_id": r["recommendation_id"],
                    "co2_equivalent_savings": r.get("co2_equivalent_savings"),
                    "heat_demand": r.get("heat_demand"),
                    "type": r["type"]
                } for r
                in representative_recs[property_id]
            ]
            representative_rec_data = pd.DataFrame(representative_rec_data)
            # Convert co2 and heat demand to proportions of their column sums
            representative_rec_data["co2_equivalent_savings_percent"] = (
                representative_rec_data["co2_equivalent_savings"] /
                representative_rec_data["co2_equivalent_savings"].sum()
            )

            representative_rec_data["heat_demand_percent"] = (
                representative_rec_data["heat_demand"] / representative_rec_data["heat_demand"].sum()
            )

            # We'll use the proportions to update the carbon and heat demand
            representative_rec_data["co2_equivalent_savings"] = (
                carbon_change * representative_rec_data["co2_equivalent_savings_percent"]
            )

            representative_rec_data["heat_demand"] = (
                heat_demand_change * representative_rec_data["heat_demand_percent"]
            )

            # Finally, insert these values into the final recommendations
            for rec in property_recommendations:
                if rec["type"] in ["external_wall_insulation", "internal_wall_insulation"]:
                    change_data = representative_rec_data[
                        representative_rec_data["type"].isin(["external_wall_insulation", "internal_wall_insulation"])
                    ]
                else:
                    change_data = representative_rec_data[representative_rec_data["type"] == rec["type"]]

                if rec["type"] == "mechanical_ventilation":
                    rec["co2_equivalent_savings"] = 0
                    rec["heat_demand"] = 0
                    rec["energy_cost_savings"] = 0
                else:
                    rec["co2_equivalent_savings"] = change_data["co2_equivalent_savings"].values[0]
                    rec["heat_demand"] = change_data["heat_demand"].values[0]
                    rec["energy_cost_savings"] = AnnualBillSavings.estimate(rec["heat_demand"])

            # Update recommendations
            recommendations[property_id] = property_recommendations

            # For expected adjust energy, we don't include mechanical ventilation so we'll add it back on
            mechanical_ventilation_rec = representative_rec_data[
                representative_rec_data["type"] == "mechanical_ventilation"
                ]
            if not mechanical_ventilation_rec.empty:
                expected_adjusted_energy = (
                    expected_adjusted_energy + mechanical_ventilation_rec["heat_demand"].values[0]
                )

            property_instance.set_adjusted_energy(
                current_adjusted_energy=current_adjusted_energy,
                expected_adjusted_energy=expected_adjusted_energy
            )

        # 1) the property data
        # 2) the property details (epc)
        # 3) the recommendations

        logger.info("Uploading recommendations to the database")
        property_valuation_increases = []
        session.commit()
        for i in range(0, len(input_properties), BATCH_SIZE):
            try:
                # Take a slice of the input_properties list to make a batch
                batch_properties = input_properties[i:i + BATCH_SIZE]

                for p in batch_properties:
                    # Your existing operations
                    property_details_epc = p.get_property_details_epc(
                        portfolio_id=body.portfolio_id, rating_lookup=rating_lookup
                    )
                    create_property_details_epc(session, property_details_epc)

                    update_or_create_property_spatial_details(session, p.uprn, p.spatial)

                    property_data = p.get_full_property_data()
                    update_property_data(
                        session, property_id=p.id, portfolio_id=body.portfolio_id, property_data=property_data
                    )

                    recommendations_to_upload = recommendations.get(p.id, [])
                    if not recommendations_to_upload:
                        continue

                    new_plan_id = create_plan(session, {
                        "portfolio_id": body.portfolio_id,
                        "property_id": p.id,
                        "is_default": True
                    })

                    uploaded_recommendation_ids = upload_recommendations(session, recommendations_to_upload, p.id)

                    create_plan_recommendations(
                        session, plan_id=new_plan_id, recommendation_ids=uploaded_recommendation_ids
                    )

                    # Get defaults
                    default_recommendations = [r for r in recommendations_to_upload if r["default"]]
                    total_sap_points = sum([r["sap_points"] for r in default_recommendations])
                    new_sap_points = float(p.data["current-energy-efficiency"]) + total_sap_points
                    new_epc = sap_to_epc(new_sap_points)

                    valuations = PropertyValuation.estimate(property_instance=p, target_epc=new_epc)

                    property_valuation_increases.append(valuations["average_increase"])

                # Commit the session after each batch
                session.commit()

            except Exception as e:
                # Rollback the session if an error occurs
                session.rollback()
                print("Failed i = %s" % str(i))
                logger.error(f"An error occurred during batch starting at index {i}: {e}")

        logger.info("Creating portfolio aggregations")
        # We implement this in the simplest way possible which will be just to query the database for all
        # recommendations associated to the portfolio and then aggregate them. This is not the most efficient
        # way to do this, but it's the simplest and will be a process that we can re-use since when we change a
        # recommendation from being default to not default, we'll need to re-run this process to re-calculate the
        # the portfolion level impact

        total_valuation_increase = sum([v for v in property_valuation_increases if v is not None])
        labour_days = round(max(
            [sum(r["labour_days"] for r in rec_group if r["default"]) for p_id, rec_group in recommendations.items()]
        ))

        aggregate_portfolio_recommendations(
            session,
            portfolio_id=body.portfolio_id,
            total_valuation_increase=total_valuation_increase,
            labour_days=labour_days
        )

        # Commit final changes
        session.commit()
    except IntegrityError:
        logger.error("Database integrity error occurred", exc_info=True)
        session.rollback()
        return Response(status_code=500, content="Database integrity error.")
    except OperationalError:
        logger.error("Database operational error occurred", exc_info=True)
        session.rollback()
        return Response(status_code=500, content="Database operational error.")
    except ValueError:
        logger.error("Value error - possibly due to malformed data", exc_info=True)
        session.rollback()
        return Response(status_code=400, content="Bad request: malformed data.")
    except Exception as e:  # General exception handling
        logger.error(f"An error occurred: {e}")
        session.rollback()
        return Response(status_code=500, content="An unexpected error occurred.")
    finally:
        session.close()

    return Response(status_code=200)