Model/backend/tests/test_integration.py
Khalim Conn-Kowlessar ef934f6b7c debugged funding test
2025-10-28 19:21:53 +00:00

532 lines
24 KiB
Python

# import ast
# import json
# from copy import deepcopy
# from dataclasses import replace
# from datetime import datetime
#
# import random
# from tqdm import tqdm
# import pandas as pd
# import numpy as np
# from etl.epc.Record import EPCRecord
# from backend.SearchEpc import SearchEpc
# from sqlalchemy.exc import IntegrityError, OperationalError
# from sqlalchemy.orm import sessionmaker
# from starlette.responses import Response
#
# from backend.app.config import get_settings, get_prediction_buckets
# from backend.app.db.connection import db_engine
# from backend.app.db.functions.materials_functions import get_materials
# from backend.app.db.functions.portfolio_functions import aggregate_portfolio_recommendations
# from backend.app.db.functions.property_functions import (
# create_property, create_property_details_epc, create_property_targets, update_property_data,
# update_or_create_property_spatial_details
# )
# from backend.app.db.functions.recommendations_functions import (
# create_plan, upload_recommendations, create_scenario
# )
# from backend.app.db.functions.funding_functions import upload_funding
# from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn
# from backend.app.db.models.portfolio import rating_lookup
# from backend.app.plan.schemas import PlanTriggerRequest, WALL_INSULATION_MEASURES, ROOF_INSULATION_MEASURES
# from backend.app.plan.utils import get_cleaned
# from backend.app.utils import sap_to_epc
# import backend.app.assumptions as assumptions
#
# from backend.ml_models.api import ModelApi
# from backend.Property import Property
# from backend.apis.GoogleSolarApi import GoogleSolarApi
#
# from recommendations.optimiser.CostOptimiser import CostOptimiser
# from recommendations.optimiser.GainOptimiser import GainOptimiser
# import recommendations.optimiser.optimiser_functions as optimiser_functions
# from recommendations.Recommendations import Recommendations
# from utils.logger import setup_logger
# from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3, read_excel_from_s3
# from backend.ml_models.Valuation import PropertyValuation
#
# from etl.bill_savings.KwhData import KwhData
# from etl.spatial.OpenUprnClient import OpenUprnClient
# from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc
#
# from backend.Funding import Funding
# from recommendations.optimiser.funding_optimiser import optimise_with_funding_paths
# from recommendations.recommendation_utils import convert_thickness_to_numeric, get_wall_u_value
#
# # Input data (temp)
# import pickle
#
# import pandas as pd
#
# with open("local_data_for_deletion.pkl", 'rb') as f:
# local_data = pickle.load(f)
#
# cleaning_data = local_data["cleaning_data"]
# materials = local_data["materials"]
# cleaned = local_data["cleaned"]
# project_scores_matrix = local_data["project_scores_matrix"]
# partial_project_scores_matrix = local_data["partial_project_scores_matrix"]
# whlg_eligible_postcodes = local_data["whlg_eligible_postcodes"]
#
# with open("kwh_client_for_deletion.pkl", "rb") as f:
# kwh_client = pickle.load(f)
#
# epc_data = pd.read_csv(
# "/Users/khalimconn-kowlessar/Downloads/all-domestic-certificates/domestic-E06000002-Middlesbrough/certificates
# .csv",
# low_memory=False
# )
#
# # TODO: Store this for cleaning
# costs_by_floor_area = epc_data[
# pd.to_datetime(epc_data["LODGEMENT_DATE"]) >= "2024-01-01"
# ][["TOTAL_FLOOR_AREA", "CURRENT_ENERGY_EFFICIENCY", "LIGHTING_COST_CURRENT", "HEATING_COST_CURRENT",
# "HOT_WATER_COST_CURRENT"]].copy()
#
# costs_by_floor_area.columns = [c.lower().replace("_", "-") for c in costs_by_floor_area.columns]
# for c in ["lighting-cost-current", "heating-cost-current", "hot-water-cost-current"]:
# costs_by_floor_area[c + "_scaled"] = costs_by_floor_area[c] / costs_by_floor_area["total-floor-area"]
#
# costs_by_floor_area = costs_by_floor_area.groupby("current-energy-efficiency")[
# ["lighting-cost-current_scaled", "heating-cost-current_scaled", "hot-water-cost-current_scaled"]
# ].mean().reset_index()
#
# sample_epc_data = epc_data[pd.to_datetime(epc_data["LODGEMENT_DATE"]) >= "2015-01-01"].drop_duplicates("UPRN").sample(
# 1000).reset_index(drop=True)
#
# # TODO: In Property find_energy_sources, sort out biomass community heating - what fuel type
# # TODO: We might be able to remove find_energy_sources entirely and remove estimate_electrical_consumption. It's used
# # in the google solar api but is it really needed? I don't think it's super accurate. It might be better to
# # just use an average energy consumption by floor area for UK households?
# # Load the input properties
# input_properties = []
# for row_id, config in tqdm(sample_epc_data.iterrows(), total=len(sample_epc_data)):
# epc = {
# k.lower().replace("_", "-"): v if not pd.isnull(v) else None for k, v in config.items()
# }
# # Avoid the data load inside of EPCRecord - something we should pull out
# for x in ["number-habitable-rooms", "floor-height", "number-heated-rooms"]:
# if pd.isnull(epc[x]):
# if x == "floor-height":
# epc[x] = 2.4
# if x == "number-habitable-rooms":
# epc[x] = 3
# if x == "number-heated-rooms":
# epc[x] = 3
#
# epc_records = {'original_epc': epc, 'full_sap_epc': {}, 'old_data': []}
#
# prepared_epc = EPCRecord(
# epc_records=epc_records,
# run_mode="newdata",
# cleaning_data=cleaning_data,
# )
#
# input_properties.append(
# Property(
# id=row_id,
# is_new=True,
# address=epc["address"],
# postcode=epc["postcode"],
# epc_record=prepared_epc,
# already_installed={},
# property_valuation={},
# non_invasive_recommendations=[],
# energy_assessment=None,
# **Property.extract_kwargs(config), # TODO: Depraecate this
# )
# )
#
# # For each property, insert the default solar configuration
# for p in tqdm(input_properties):
# solar_api = GoogleSolarApi(
# api_key=None, solar_materials=[m for m in materials if m["type"] == "solar_pv"], max_retries=5
# )
# panel_performance = solar_api.default_panel_performance(property_instance=p)
# p.set_solar_panel_configuration(
# solar_panel_configuration={
# "insights_data": None, "panel_performance": panel_performance, "unit_share_of_energy": 1
# },
# )
#
# # We mock kwh preds
# mocked_kwh_predictions = {"heating_kwh_predictions": [], "hotwater_kwh_predictions": []}
# for p in tqdm(input_properties):
# mocked_kwh_predictions["heating_kwh_predictions"].append({
# "id": p.uprn, "predictions": random.sample(range(100, 3000), 1)[0]
# })
# mocked_kwh_predictions["hotwater_kwh_predictions"].append({
# "id": p.uprn, "predictions": random.sample(range(100, 3000), 1)[0]
# })
# mocked_kwh_predictions["heating_kwh_predictions"] = pd.DataFrame(mocked_kwh_predictions["heating_kwh_predictions"])
# mocked_kwh_predictions["hotwater_kwh_predictions"] = pd.DataFrame(mocked_kwh_predictions["hotwater_kwh_predictions"])
#
# # TODO: We might want to implement this generally, via an ETL process
# for p in input_properties:
# for col in ["lighting-cost-current", "heating-cost-current", "hot-water-cost-current"]:
# if pd.isnull(p.data[col]):
# min_diff = abs(
# (costs_by_floor_area["current-energy-efficiency"] - p.data["current-energy-efficiency"])
# ).min()
# df = costs_by_floor_area[
# abs((costs_by_floor_area["current-energy-efficiency"] - p.data[
# "current-energy-efficiency"])) == min_diff
# ]
# if df.shape[0] > 1:
# df = df.head(1)
# p.data[col] = (df[col + "_scaled"] * p.data["total-floor-area"]).values[0]
#
# [
# p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=mocked_kwh_predictions) for p in
# input_properties
# ]
# # for p in input_properties:
# # p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=mocked_kwh_predictions)
#
# # Run the recommendations
# recommendations = {}
# recommendations_scoring_data = []
# representative_recommendations = {}
# for p in tqdm(input_properties):
# if p.data["property-type"] == "House" and pd.isnull(p.data["built-form"]):
# p.data["built-form"] = "Semi-Detached"
# recommender = Recommendations(
# property_instance=p,
# materials=materials,
# exclusions=[],
# inclusions=[],
# default_u_values=True
# )
# property_recommendations, property_representative_recommendations = recommender.recommend()
#
# if not property_recommendations:
# continue
#
# recommendations[p.id] = property_recommendations
# representative_recommendations[p.id] = property_representative_recommendations
#
# p.create_base_difference_epc_record(cleaned_lookup=cleaned)
# p.adjust_difference_record_with_recommendations(
# property_recommendations, property_representative_recommendations
# )
#
# recommendations_scoring_data.extend(p.recommendations_scoring_data)
#
# recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)
# recommendations_scoring_data = recommendations_scoring_data.drop(
# columns=[
# "rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
# "carbon_ending"
# ]
# )
#
# model_predictions_mocked = {
# "sap_change_predictions": None,
# "heat_demand_predictions": None,
# "carbon_change_predictions": None,
# "heating_kwh_predictions": None,
# "hotwater_kwh_predictions": None,
# }
#
# for k in model_predictions_mocked.keys():
# model_predictions_mocked[k] = recommendations_scoring_data[["id"]].copy()
# model_predictions_mocked[k][['property_id', 'recommendation_id']] = (
# model_predictions_mocked[k]['id'].str.split('+', expand=True)
# )
# model_predictions_mocked[k]['phase'] = model_predictions_mocked[k]['recommendation_id'].apply(
# ModelApi.extract_phase)
#
# if k in ["heating_kwh_predictions", "hotwater_kwh_predictions"]:
# model_predictions_mocked[k]["predictions"] = random.choices(range(100, 3000),
# k=len(recommendations_scoring_data))
# continue
#
# model_predictions_mocked[k] = model_predictions_mocked[k].sort_values(["property_id", "phase"], ascending=True)
# preds = []
# for p_id in model_predictions_mocked[k]["property_id"].unique():
# # We add some amount each time
# p = [p for p in input_properties if str(p.id) == p_id][0]
# if k == "sap_change_predictions":
# start = p.data["current-energy-efficiency"]
# elif k == "heat_demand_predictions":
# start = p.data["energy-consumption-current"]
# else:
# start = p.data["co2-emissions-current"]
# df = model_predictions_mocked[k][model_predictions_mocked[k]["property_id"] == p_id].copy()
# # Add some amount each time
# to_add = random.choices(range(0, 15), k=len(df))
# to_add = np.cumsum(to_add)
# df["predictions"] = start + to_add
# preds.append(df)
# preds = pd.concat(preds)
# model_predictions_mocked[k] = preds
#
# for property_id in tqdm(recommendations.keys(), total=len(recommendations)):
# property_instance = [p for p in input_properties if p.id == property_id][0]
#
# recommendations_with_impact, impact_summary = (
# Recommendations.calculate_recommendation_impact(
# property_instance=property_instance,
# all_predictions=model_predictions_mocked,
# recommendations=recommendations,
# representative_recommendations=representative_recommendations
# )
# )
#
# # We use the impact_summary to update the simulation_epcs with the new SAP, heat demand, carbon, cost etc
# # at each phase
# property_instance.update_simulation_epcs(impact_summary)
# recommendations[property_id] = recommendations_with_impact
#
# for property_id in tqdm([p.id for p in input_properties]):
# property_recommendations = recommendations.get(property_id, [])
# property_instance = [p for p in input_properties if p.id == property_id][0]
#
# property_current_energy_bill = (
# Recommendations.calculate_recommendation_tenant_savings(
# property_instance=property_instance,
# kwh_simulation_predictions=model_predictions_mocked,
# property_recommendations=property_recommendations,
# ashp_cop=2.8
# )
# )
# property_instance.current_energy_bill = property_current_energy_bill
#
# body = PlanTriggerRequest(
# **{'budget': None, 'goal': 'Increasing EPC', 'housing_type': 'Social', 'goal_value': 'B', 'portfolio_id': 0,
# 'trigger_file_path': '', 'already_installed_file_path': '',
# 'patches_file_path': None, 'non_invasive_recommendations_file_path': None,
# 'valuation_file_path': '',
# 'required_measures': [], 'scenario_name': 'EPC B', 'scenario_id': None,
# 'multi_plan': True, 'optimise': True, 'default_u_values': True, 'ashp_cop': 2.8,
# 'event_type': 'remote_assessment', 'simulate_sap_10': False, 'file_type': None, 'file_format': None,
# 'sheet_name': None, 'sheet_count': None, 'index_start': None, 'index_end': None}
# )
#
# for p in tqdm(input_properties):
# if not recommendations.get(p.id):
# continue
#
# # we need to double unlist because we have a list of lists
# property_measure_types = {rec["type"] for recs in recommendations[p.id] for rec in recs}
# property_required_measures = [m for m in recommendations[p.id] if m[0]["type"] in body.required_measures]
# measures_to_optimise = [m for m in recommendations[p.id] if m[0]["type"] not in body.required_measures]
#
# # If a measure requiring ventilation is selected, and the property does not have ventilation, we enfore
# # its inclusion
# needs_ventilation = any(
# x in property_measure_types for x in assumptions.measures_needing_ventilation
# ) and not p.has_ventilation
#
# if not measures_to_optimise:
# # Nothing to do, we just reshape the recommendations
# recommendations[p.id] = optimiser_functions.flatten_recommendations_with_defaults(
# p.id, recommendations, set()
# )
# continue
#
# fixed_gain = optimiser_functions.calculate_fixed_gain(
# property_required_measures, recommendations, p, needs_ventilation
# )
# gain = optimiser_functions.calculate_gain(body=body, p=p, fixed_gain=fixed_gain)
#
# funding = Funding(
# tenure="Social",
# project_scores_matrix=project_scores_matrix,
# partial_project_scores_matrix=partial_project_scores_matrix,
# whlg_eligible_postcodes=whlg_eligible_postcodes,
# eco4_social_cavity_abs_rate=12.5,
# eco4_social_solid_abs_rate=17,
# eco4_private_cavity_abs_rate=12.5,
# eco4_private_solid_abs_rate=17,
# gbis_social_cavity_abs_rate=21,
# gbis_social_solid_abs_rate=25,
# gbis_private_cavity_abs_rate=21,
# gbis_private_solid_abs_rate=28,
# )
#
# li_thickness = convert_thickness_to_numeric(
# p.roof["insulation_thickness"], p.roof["is_pitched"], p.roof["is_flat"]
# )
# current_wall_u_value = p.walls["thermal_transmittance"]
# if current_wall_u_value is None:
# current_wall_u_value = get_wall_u_value(
# clean_description=p.walls["clean_description"],
# age_band=p.age_band,
# is_granite_or_whinstone=p.walls["is_granite_or_whinstone"],
# is_sandstone_or_limestone=p.walls["is_sandstone_or_limestone"],
# )
#
# # We insert the innovation uplift
# measures_to_optimise_with_uplift = deepcopy(measures_to_optimise)
#
# # TODO: Turn this into a function and store the innovaiton uplift
# for group in measures_to_optimise_with_uplift:
# for r in group:
#
# if r["type"] in ["mechanical_ventilation", "low_energy_lighting", "secondary_heating",
# "extension_cavity_wall_insulation", "draught_proofing", "sealing_open_fireplace"]:
# (
# r["partial_project_score"],
# r["partial_project_funding"],
# r["innovation_uplift"],
# r["uplift_project_score"],
# ) = (
# 0, 0, 0, 0
# )
# continue
#
# (
# r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"],
# r["uplift_project_score"]
# ) = funding.get_innovation_uplift(
# measure=r,
# starting_sap=p.data["current-energy-efficiency"],
# floor_area=p.floor_area,
# is_cavity=p.walls["is_cavity_wall"],
# current_wall_uvalue=current_wall_u_value,
# is_partial="partial" in p.walls["clean_description"].lower(),
# existing_li_thickness=li_thickness,
# mainheating=p.main_heating,
# main_fuel=p.main_fuel,
# mainheat_energy_eff=p.data["mainheat-energy-eff"],
# )
#
# input_measures = optimiser_functions.prepare_input_measures(
# measures_to_optimise_with_uplift, body.goal, needs_ventilation, funding=True
# )
#
# # When the goal is Increasing EPC, we can run the funding optimiser
# if body.goal == "Increasing EPC":
#
# solutions = optimise_with_funding_paths(
# p=p,
# input_measures=input_measures,
# housing_type=body.housing_type,
# budget=body.budget,
# target_gain=gain,
# funding=funding
# )
#
# # Given the solutions we select the optimal one
# solutions["cost_less_full_project_funding"] = np.where(
# solutions["scheme"] == "eco4",
# solutions["total_cost"] - solutions["full_project_funding"] - solutions["total_uplift"],
# solutions["total_cost"] - solutions["partial_project_funding"] - solutions["total_uplift"]
# )
#
# solutions["cost_less_full_project_funding"] = (
# solutions["total_cost"] - solutions["full_project_funding"] - solutions["total_uplift"]
# )
# solutions = solutions.sort_values("cost_less_full_project_funding", ascending=True)
#
# if solutions["meets_upgrade_target"].any():
# # If we have a solution that meets the upgrade target, we select that one
# optimal_solution = solutions[solutions["meets_upgrade_target"]].iloc[0]
# else:
# # Pick the cheapest
# optimal_solution = solutions.iloc[0]
#
# # This is the list of measures that we will recommend
# scheme = optimal_solution["scheme"]
# funded_measures = optimal_solution["items"] if scheme != "none" else []
# solution = optimal_solution["items"] + optimal_solution["unfunded_items"]
# # This is the total amount of funding that the project will produce (including uplifts) (£)
# project_funding = optimal_solution["full_project_funding"] if scheme == "eco4" else \
# optimal_solution["partial_project_funding"]
# # This is the total amount of funding associated to the uplift (£)
# total_uplift = optimal_solution["total_uplift"]
# # This is the funding scheme selected
# # This is the full project ABS
# full_project_score = optimal_solution["project_score"]
# # This is the partial project ABS
# partial_project_score = optimal_solution["partial_project_score"]
# # This is the uplift score ABS
# uplift_project_score = optimal_solution["total_uplift_score"]
# else:
# # We optimise and then we determine eligibility for funding, based on the measures selected
# optimiser = (
# GainOptimiser(
# input_measures, max_cost=body.budget, max_gain=gain, allow_slack=False
# ) if body.budget else CostOptimiser(input_measures, min_gain=gain)
# )
# optimiser.setup()
# optimiser.solve()
# solution = optimiser.solution
#
# recommendation_types = []
# for measures in input_measures:
# for measure in measures:
# recommendation_types.append(measure["type"])
# recommendation_types = set(recommendation_types)
#
# has_wall_insulation_recommendation = any(
# (m in recommendation_types or "+".join([m, "mechanical_ventilation"])) for m in
# WALL_INSULATION_MEASURES
# )
# has_roof_insulation_recommendation = any(
# (m in recommendation_types or "+".join([m, "mechanical_ventilation"])) for m in
# ROOF_INSULATION_MEASURES
# )
#
# funding.check_funding(
# measures=solution,
# starting_sap=p.data["current-energy-efficiency"],
# ending_sap=p.data["current-energy-efficiency"] + sum([x["gain"] for x in solution]),
# floor_area=p.floor_area,
# mainheat_description=p.main_heating["clean_description"],
# heating_control_description=p.main_heating_controls["clean_description"],
# is_cavity=p.walls["is_cavity_wall"],
# current_wall_uvalue=current_wall_u_value,
# is_partial="partial" in p.walls["clean_description"].lower(),
# existing_li_thickness=li_thickness,
# mainheating=p.main_heating,
# main_fuel=p.main_fuel,
# mainheat_energy_eff=p.data["mainheat-energy-eff"],
# has_wall_insulation_recommendation=has_wall_insulation_recommendation,
# has_roof_insulation_recommendation=has_roof_insulation_recommendation,
# )
#
# # Determine the scheme
# scheme = "none"
# if funding.eco4_eligible:
# scheme = "eco4"
# if scheme == "none" and funding.gbis_eligible:
# scheme = "gbis"
#
# funded_measures = solution if scheme in ["gbis", "eco4"] else []
# project_funding = 0 if funding.full_project_abs is not None else funding.full_project_abs
# total_uplift = funding.eco4_uplift
# full_project_score = 0 if funding.full_project_abs is not None else funding.full_project_abs
# partial_project_score = funding.partial_project_abs
# uplift_project_score = funding.eco4_uplift if scheme == "eco4" else funding.gbis_uplift
#
# selected = {r["id"] for r in solution}
#
# if property_required_measures:
# solution = optimiser_functions.add_required_measures(
# property_id=p.id, property_required_measures=property_required_measures,
# recommendations=recommendations, selected=selected,
# )
#
# # Add best practice measures (ventilation/trickle vents)
# selected = optimiser_functions.add_best_practice_measures(p.id, solution, recommendations, selected)
# # Final flattening - Don't do this!
# # recommendations[p.id] = optimiser_functions.flatten_recommendations_with_defaults(
# # p.id, recommendations, selected
# # )
#
# # TODO: functionise
# for measure in funded_measures:
# if "+mechanical_ventilation" in measure["type"]:
# measure["type"] = measure["type"].split("+mechanical_ventilation")[0]
#
# p.insert_funding(
# scheme=scheme,
# funded_measures=funded_measures,
# project_funding=project_funding,
# total_uplift=total_uplift,
# full_project_score=full_project_score,
# partial_project_score=partial_project_score,
# uplift_project_score=uplift_project_score
# )