From 888357d797837d47f547af759305c2ec64dad296 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 21 Oct 2024 12:36:01 +0100 Subject: [PATCH] removed old mds app and updating dockerfile --- backend/app/plan/router.py | 588 +------------------------------ backend/docker/lambda.Dockerfile | 15 +- 2 files changed, 6 insertions(+), 597 deletions(-) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 925fb05b..51b03d01 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -24,7 +24,7 @@ from backend.app.db.functions.recommendations_functions import ( from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn from backend.app.db.models.portfolio import rating_lookup from backend.app.dependencies import validate_token -from backend.app.plan.schemas import PlanTriggerRequest, MdsRequest +from backend.app.plan.schemas import PlanTriggerRequest from backend.app.plan.utils import get_cleaned from backend.app.utils import epc_to_sap_lower_bound, sap_to_epc @@ -36,7 +36,6 @@ from recommendations.optimiser.CostOptimiser import CostOptimiser from recommendations.optimiser.GainOptimiser import GainOptimiser from recommendations.optimiser.optimiser_functions import prepare_input_measures from recommendations.Recommendations import Recommendations -from recommendations.Mds import Mds from utils.logger import setup_logger from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3 from backend.ml_models.Valuation import PropertyValuation @@ -44,7 +43,6 @@ from backend.ml_models.Valuation import PropertyValuation from etl.bill_savings.EnergyConsumptionModel import EnergyConsumptionModel from etl.bill_savings.KwhData import KwhData from etl.spatial.OpenUprnClient import OpenUprnClient -from etl.solar.SolarPhotoSupply import SolarPhotoSupply logger = setup_logger() @@ -868,587 +866,3 @@ async def trigger_plan(body: PlanTriggerRequest): session.close() return Response(status_code=200) - - -@router.post("/mds") -async def build_mds(body: MdsRequest): - # TODO: This is a placeholder location for the MDS endpoint, which this is being assembled - - logger.info("Connecting to db") - session = sessionmaker(bind=db_engine)() - created_at = datetime.now().isoformat() - - try: - session.begin() - logger.info("Getting the inputs") - plan_input = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path) - measure_set = body.measures - optimise_measures = measure_set is not None - - cleaning_data = read_dataframe_from_s3_parquet( - bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet", - ) - - input_properties = [] - for property_id, config in tqdm(enumerate(plan_input), total=len(plan_input)): - # We validate each record in the file. If the record is NOT valid, we need to handle this accordingly - uprn = config.get("uprn", None) - uprn = None if uprn == "" else uprn - if uprn: - uprn = int(float(uprn)) - - epc_searcher = SearchEpc( - address1=config["address"], - postcode=config["postcode"], - uprn=uprn, - auth_token=get_settings().EPC_AUTH_TOKEN, - os_api_key=get_settings().ORDNANCE_SURVEY_API_KEY, - ) - epc_searcher.ordnance_survey_client.built_form = config.get("built_form", None) - epc_searcher.ordnance_survey_client.property_type = config.get("property_type", None) - # For the moment, our OS API access is unavailable, so we skip and interpolate - epc_searcher.find_property(skip_os=True) - - if config["address"] == "35b High Street": - print("Performing temporary patch on 35b High Street") - epc_searcher.newest_epc["uprn"] = 10002911892 - epc_searcher.full_sap_epc["uprn"] = 10002911892 - - if config["address"] == "Cobnut Barn": - print("Performing temporary patch on Cobnut Barn") - epc_searcher.newest_epc["uprn"] = 10013924689 - - # Create a record in db - # TODO: If we productionise the creation of this mds report, we will need to store this in the db - # property_id, is_new = create_property( - # session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn - # ) - # if not is_new: - # continue - # - # create_property_targets( - # session, - # property_id=property_id, - # portfolio_id=body.portfolio_id, - # epc_target=body.goal_value, - # heat_demand_target=None - # ) - - epc_records = { - 'original_epc': epc_searcher.newest_epc.copy(), - 'full_sap_epc': epc_searcher.full_sap_epc.copy(), - 'old_data': epc_searcher.older_epcs.copy(), - } - - # patch = next(( - # x for x in patches if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"]) - # ), {}) - # epc_records = patch_epc(patch, epc_records) - - prepared_epc = EPCRecord( - epc_records=epc_records, - run_mode="newdata", - cleaning_data=cleaning_data - ) - - # property_already_installed = next(( - # x for x in already_installed if - # (x["address"] == config["address"]) and (x["postcode"] == config["postcode"]) - # ), {}) - # - # property_non_invasive_recommendations = next(( - # x for x in non_invasive_recommendations if - # (x["address"] == config["address"]) and (x["postcode"] == config["postcode"]) - # ), {}) - - if measure_set is None: - measures = config["measures"] if "measures" in config else None - else: - measures = measure_set - - input_properties.append( - Property( - id=property_id, - address=epc_searcher.address_clean, - postcode=epc_searcher.postcode_clean, - epc_record=prepared_epc, - # already_installed=property_already_installed, - # non_invasive_recommendations=property_non_invasive_recommendations, - measures=measures, - is_new=is_new, - **Property.extract_kwargs(config) - ) - ) - - logger.info("Reading in materials and cleaned datasets") - materials = get_materials(session) - cleaned = get_cleaned() - - uprn_filenames = read_dataframe_from_s3_parquet( - bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet" - ) - photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket=get_settings().DATA_BUCKET) - - logger.info("Getting spatial data") - for p in tqdm(input_properties): - p.get_spatial_data(uprn_filenames) - - logger.info("Getting components and epc recommendations") - recommendations_scoring_data = [] - representative_recommendations = {} - recommendations = {} - - for p in tqdm(input_properties): - p.set_features(cleaned, photo_supply_lookup, floor_area_decile_thresholds) - - mds = Mds(property_instance=p, materials=materials, optimise_measures=optimise_measures) - mds_recommendations, property_representative_recommendations, errors = mds.build() - - if isinstance(errors, list): - if errors: - raise Exception("Errors occurred during MDS build") - else: - if any([len(x) for x in errors.values()]): - raise Exception("Errors occurred during MDS build") - - recommendations[p.id] = mds_recommendations - representative_recommendations[p.id] = property_representative_recommendations - - # Build the scoring data - p.create_base_difference_epc_record(cleaned_lookup=cleaned) - if optimise_measures: - for _id, mds_recs in mds_recommendations.items(): - representative_ids = [r["recommendation_id"] for r in property_representative_recommendations[_id]] - simulation_mds_recs = [] - for recs in mds_recs: - simulation_mds_recs.append( - [r for r in recs if r["recommendation_id"] in representative_ids] - ) - - p.adjust_difference_record_with_recommendations( - simulation_mds_recs, property_representative_recommendations[_id] - ) - - data = p.recommendations_scoring_data.copy() - for d in data: - d["id"] = d["id"] + "*" + _id - - recommendations_scoring_data.extend(data) - - else: - recommendations_scoring_data.append( - p.simulate_all_representative_recommendations(property_representative_recommendations) - ) - - logger.info("Preparing data for scoring in sap change api") - recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data) - - recommendations_scoring_data = recommendations_scoring_data.drop( - columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", - "carbon_ending"] - ) - - model_api = ModelApi( - portfolio_id=body.portfolio_id, timestamp=created_at, prediction_buckets=get_prediction_buckets() - ) - - all_predictions = { - "sap_change_predictions": pd.DataFrame(), - "heat_demand_predictions": pd.DataFrame(), - "carbon_change_predictions": pd.DataFrame() - } - to_loop_over = range(0, recommendations_scoring_data.shape[0], SCORING_BATCH_SIZE) - for chunk in tqdm(to_loop_over, total=len(to_loop_over)): - predictions_dict = model_api.predict_all( - df=recommendations_scoring_data.iloc[chunk:chunk + SCORING_BATCH_SIZE], - ) - - # Append the predictions to the predictions dictionary - for key, scored in predictions_dict.items(): - all_predictions[key] = pd.concat([all_predictions[key], scored]) - - # TODO: 1) walls_insulation_thickness_ending is not being set in the recommendations_scoring_data, - # insulation_thickness_ending is being set instead - # 2) - - # TODO: TEMP - for p in plan_input: - if p["uprn"]: - p["uprn"] = str(int(float(p["uprn"]))) - - import re - from backend.ml_models.AnnualBillSavings import AnnualBillSavings - - if optimise_measures: - results = [] - for p in input_properties: - - sap_before = int(p.data["current-energy-efficiency"]) - epc_before = p.data["current-energy-rating"] - heat_demand_before = p.data["energy-consumption-current"] - carbon_before = p.data["co2-emissions-current"] - current_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered( - epc_energy_consumption=heat_demand_before * p.floor_area, - current_epc_rating=epc_before, - ) - current_energy_bill = AnnualBillSavings.calculate_annual_bill(current_adjusted_energy) - - package_comparison = [] - for _id in recommendations[p.id].keys(): - - sap_prediction = all_predictions["sap_change_predictions"][ - (all_predictions["sap_change_predictions"]["property_id"] == str(p.id)) & - (all_predictions["sap_change_predictions"]["recommendation_id"].str.contains(re.escape(_id))) - ].copy().reset_index(drop=True) - sap_prediction["row_id"] = sap_prediction.index - - heat_demand_prediction = all_predictions["heat_demand_predictions"][ - (all_predictions["heat_demand_predictions"]["property_id"] == str(p.id)) & - (all_predictions["heat_demand_predictions"]["recommendation_id"].str.contains(re.escape(_id))) - ].copy().reset_index(drop=True) - heat_demand_prediction["row_id"] = heat_demand_prediction.index - - carbon_prediction = all_predictions["carbon_change_predictions"][ - (all_predictions["carbon_change_predictions"]["property_id"] == str(p.id)) & - (all_predictions["carbon_change_predictions"]["recommendation_id"].str.contains(re.escape(_id))) - ].copy().reset_index(drop=True) - carbon_prediction["row_id"] = carbon_prediction.index - - epc_target = body.goal_value - if epc_before == epc_target: - continue - - sap_target = epc_to_sap_lower_bound(epc_target) - # Define the measures - sap_threshold_barrier = sap_prediction[sap_prediction["predictions"] >= sap_target] - meets_threshold = True - if sap_threshold_barrier.empty: - sap_threshold_barrier = sap_prediction.tail(1) - meets_threshold = False - sap_threshold_barrier = sap_threshold_barrier.head(1) - - sap_prediction = sap_prediction[ - sap_prediction["row_id"] <= sap_threshold_barrier["row_id"].values[0] - ] - heat_demand_prediction = heat_demand_prediction[ - heat_demand_prediction["row_id"] <= sap_threshold_barrier["row_id"].values[0] - ] - carbon_prediction = carbon_prediction[ - carbon_prediction["row_id"] <= sap_threshold_barrier["row_id"].values[0] - ] - - reverse_map = {v: k for k, v in Mds.format_map.items()} - - selected_measures = [ - reverse_map[x.split("-")[0]] for x in sap_prediction["recommendation_id"].values - ] - selected_measure_ids = [x.split("*")[0] for x in sap_prediction["recommendation_id"].values] - - costs = [ - r["total"] for r in representative_recommendations[p.id][_id] if - r["recommendation_id"] in selected_measure_ids - ] - costs = sum(costs) - - sap_after = sap_prediction["predictions"].values[-1] - epc_after = sap_to_epc(sap_after) - heat_demand_after = heat_demand_prediction["predictions"].values[-1] - carbon_after = carbon_prediction["predictions"].values[-1] - - expected_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered( - epc_energy_consumption=heat_demand_after * p.floor_area, - current_epc_rating=epc_before, - ) - - expected_energy_bill = AnnualBillSavings.calculate_annual_bill(expected_adjusted_energy) - - bill_savings = current_energy_bill - expected_energy_bill - energy_savings = current_adjusted_energy - expected_adjusted_energy - - package_comparison.append( - { - "id": _id, - "cost": costs, - "measures": selected_measures, - "sap_before": sap_before, - "sap_after": sap_after, - "epc_before": epc_before, - "epc_after": epc_after, - "heat_demand_before": heat_demand_before, - "heat_demand_after": heat_demand_after, - "carbon_before": carbon_before, - "carbon_after": carbon_after, - "bill_savings": bill_savings, - "energy_savings": energy_savings, - "current_energy_bill": current_energy_bill, - "meets_threshold": meets_threshold - } - ) - - package_comparison = pd.DataFrame(package_comparison) - # Find the smallest cost package - if not package_comparison.empty: - - # We check if any of the packages meet the threshold - # If none of them do, take the one that gets closest to the target - if package_comparison["meets_threshold"].any(): - package_comparison = package_comparison[package_comparison["meets_threshold"]] - package_comparison = package_comparison.sort_values("cost") - else: - package_comparison = package_comparison.sort_values("sap_after", ascending=False) - - package_comparison = package_comparison.head(1).to_dict("records")[0] - else: - package_comparison = { - "measures": [], - "sap_before": sap_before, - "sap_after": sap_before, - "epc_before": epc_before, - "epc_after": epc_before, - "heat_demand_before": heat_demand_before, - "heat_demand_after": heat_demand_before, - "carbon_before": carbon_before, - "carbon_after": carbon_before, - "bill_savings": 0, - "energy_savings": 0, - "current_energy_bill": current_energy_bill, - "meets_threshold": False - } - - config = [c for c in plan_input if c["uprn"] == str(p.uprn)] - if not config: - config = {"address": None, "postcode": None} - else: - config = config[0] - - results.append({ - "config_address": config["address"], - "config_postcode": config["postcode"], - "uprn": p.uprn, - "address": p.address, - "postcode": p.postcode, - "measures": package_comparison["measures"], - "year_of_epc": p.data['lodgement-date'], - "sap_before": package_comparison["sap_before"], - "sap_after": package_comparison["sap_after"], - "epc_before": package_comparison["epc_before"], - "epc_after": package_comparison["epc_after"], - "heat_demand_before": package_comparison["heat_demand_before"], - "heat_demand_after": package_comparison["heat_demand_after"], - "carbon_before": package_comparison["carbon_before"], - "carbon_after": package_comparison["carbon_after"], - "bill_savings": round(package_comparison["bill_savings"], 2), - "energy_savings": round(package_comparison["energy_savings"], 2), - "current_energy_bill": round(package_comparison["current_energy_bill"], 2), - "EWI": "EWI" if "external_wall_insulation" in package_comparison["measures"] else None, - "CWI": "CWI" if "cavity_wall_insulation" in package_comparison["measures"] else None, - "LI": "LI" if "loft_insulation" in package_comparison["measures"] else None, - "ASHP Htg": "ASHP Htg" if "air_source_heat_pump" in package_comparison["measures"] else None, - "Elec Storage": ( - "Elec Storage Htrs (Out of scope -Prov sum only)" if "high_heat_retention_storage_heaters" in - package_comparison["measures"] else None - ), - "Solar PV": "Solar PV" if "solar_pv" in package_comparison["measures"] else None, - }) - - results = pd.DataFrame(results) - - # For the different measures, we check the impact with a few debugging functions - - walls_check, hhr_check = check_mds(results, input_properties, recommendations, optimise_measures) - - results.to_excel("optimised mds_results 5th June.xlsx") - - results = [] - for p in input_properties: - measures = p.measures - property_recommendations = [r['type'] for r in representative_recommendations[p.id]] - - # TODO: Check high heat retention storage heaters - looks like it's excluded controls! - - sap_prediction = all_predictions["sap_change_predictions"][ - all_predictions["sap_change_predictions"]["property_id"] == str(p.id) - ] - - heat_demand_prediction = all_predictions["heat_demand_predictions"][ - all_predictions["heat_demand_predictions"]["property_id"] == str(p.id) - ] - - carbon_prediction = all_predictions["carbon_change_predictions"][ - all_predictions["carbon_change_predictions"]["property_id"] == str(p.id) - ] - - # Get a before and after for SAP, heat demand, CO2 and also calculate energy bill and energy savings - sap_before = int(p.data["current-energy-efficiency"]) - sap_after = sap_prediction["predictions"].values[0] if measures else sap_before - - epc_before = p.data["current-energy-rating"] - epc_after = sap_to_epc(sap_after) if measures else epc_before - - heat_demand_before = p.data["energy-consumption-current"] - heat_demand_after = heat_demand_prediction["predictions"].values[0] if measures else heat_demand_before - - carbon_before = p.data["co2-emissions-current"] - carbon_after = carbon_prediction["predictions"].values[0] if measures else carbon_before - - # Estimate bill savings - - from backend.ml_models.AnnualBillSavings import AnnualBillSavings - current_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered( - epc_energy_consumption=heat_demand_before * p.floor_area, - current_epc_rating=epc_before, - ) - - # TODO: This isn't quite right as this is based on EVERY possible measure, not just the ones that are - # actually implemented - expected_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered( - epc_energy_consumption=heat_demand_after * p.floor_area, - current_epc_rating=epc_before, - ) - - # TODO: We should determine if the home is gas & electricity or just electricity - - # Determine if the heating and hotwater was previously electric only or both - - current_energy_bill = AnnualBillSavings.calculate_annual_bill( - kwh=current_adjusted_energy, - ) - expected_energy_bill = AnnualBillSavings.calculate_annual_bill( - kwh=expected_adjusted_energy, - ) - - bill_savings = current_energy_bill - expected_energy_bill - energy_savings = current_adjusted_energy - expected_adjusted_energy - - config = [c for c in plan_input if c["uprn"] == str(p.uprn)] - if not config: - config = {"address": None, "postcode": None} - else: - config = config[0] - - to_append = { - "config_address": config["address"], - "config_postcode": config["postcode"], - "uprn": p.uprn, - "address": p.address, - "postcode": p.postcode, - "measures": measures, - "property_recommendations": property_recommendations, - "year_of_epc": p.data['lodgement-date'], - "sap_before": sap_before, - "sap_after": sap_after, - "epc_before": epc_before, - "epc_after": epc_after, - "heat_demand_before": heat_demand_before, - "heat_demand_after": heat_demand_after, - "carbon_before": carbon_before, - "carbon_after": carbon_after, - "bill_savings": round(bill_savings, 2), - "energy_savings": round(energy_savings, 2), - "current_energy_bill": round(current_energy_bill, 2), - "fuel_type": p.main_fuel["fuel_type"], - } - results.append(to_append) - - results = pd.DataFrame(results) - results["sap_uplift"] = results["sap_after"] - results["sap_before"] - - # results.to_excel("mds_results 5th June.xlsx") - - walls_check, hhr_check = check_mds(results, input_properties, recommendations, optimise_measures) - - except IntegrityError: - logger.error("Database integrity error occurred", exc_info=True) - session.rollback() - return Response(status_code=500, content="Database integrity error.") - except OperationalError: - logger.error("Database operational error occurred", exc_info=True) - session.rollback() - return Response(status_code=500, content="Database operational error.") - except ValueError: - logger.error("Value error - possibly due to malformed data", exc_info=True) - session.rollback() - return Response(status_code=400, content="Bad request: malformed data.") - except Exception as e: # General exception handling - logger.error(f"An error occurred: {e}") - session.rollback() - return Response(status_code=500, content="An unexpected error occurred.") - finally: - session.close() - - -def check_mds(results, input_properties, recommendations, optimise_measures): - import ast - walls_check = [] - hhr_check = [] - for p in input_properties: - res = results[results["uprn"] == p.uprn] - wall = p.walls - heating = p.main_heating - heating_controls = p.main_heating_controls - - if optimise_measures: - measures = res["measures"].values[0] - else: - measures = [list(z.keys())[0] for z in res["measures"].values[0]] - - wall_recommendation = [ - x for x in measures if - x in ["internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation"] - ] - - hhr_recommendation = [ - x for x in measures if - x in ["high_heat_retention_storage_heaters"] - ] - - if optimise_measures: - possible_measures = [ast.literal_eval(x) for x in list(recommendations[p.id].keys())] - # Unlist them - possible_measures = [x for sublist in possible_measures for x in sublist] - possible_measures = list(set(possible_measures)) - else: - possible_measures = p.measures - - if wall_recommendation: - if len(wall_recommendation) > 1: - raise Exception("something went wrong") - wall_recommendation = wall_recommendation[0] - else: - wall_recommendation = None - - hhr_recommendation = hhr_recommendation[0] if hhr_recommendation else None - - walls_check.append( - { - "uprn": p.uprn, - "address": p.address, - "postcode": p.postcode, - "property_type": p.data['property-type'], - "conservation_status": p.spatial["conservation_status"], - "is_listed_building": p.spatial["is_listed_building"], - "is_heritage_building": p.spatial["is_heritage_building"], - "wall": wall["clean_description"], - "recommendation": wall_recommendation, - "possible_measures": possible_measures, - "selected_measures": res["measures"].values[0], - } - ) - - hhr_check.append( - { - "uprn": p.uprn, - "address": p.address, - "postcode": p.postcode, - "heating": heating["clean_description"], - "heating_controls": heating_controls["clean_description"], - "recommendation": hhr_recommendation, - "possible_measures": possible_measures, - "selected_measures": res["measures"].values[0], - } - ) - - walls_check = pd.DataFrame(walls_check) - hhr_check = pd.DataFrame(hhr_check) - - return walls_check, hhr_check diff --git a/backend/docker/lambda.Dockerfile b/backend/docker/lambda.Dockerfile index 13c30b88..50773fd5 100644 --- a/backend/docker/lambda.Dockerfile +++ b/backend/docker/lambda.Dockerfile @@ -35,17 +35,12 @@ COPY --from=build-image /usr/local/lib/python3.10/site-packages/ /usr/local/lib/ # Copy project files COPY ./backend/ ./backend COPY ./recommendations/ ./recommendations -COPY ./model_data/BaseUtility.py ./model_data/BaseUtility.py -COPY ./model_data/config.py ./model_data/config.py -COPY ./model_data/optimiser/ ./model_data/optimiser/ -COPY ./model_data/__init__.py ./model_data/__init__.py -COPY ./model_data/EpcClean.py ./model_data/EpcClean.py -COPY ./model_data/utils.py ./model_data/utils.py -COPY ./model_data/epc_attributes/ ./model_data/epc_attributes/ -COPY ./model_data/simulation_system/core/DataProcessor.py ./model_data/simulation_system/core/DataProcessor.py -COPY ./model_data/simulation_system/core/Settings.py ./model_data/simulation_system/core/Settings.py -COPY ./datatypes/ ./datatypes/ COPY ./utils/ ./utils/ +COPY ./etl/epc/ ./etl/epc/ +COPY ./etl/epc_clean/ ./etl/epc_clean/ +COPY ./etl/bill_savings/ ./etl/bill_savings/ +COPY ./etl/spatial/ ./etl/spatial/ + # Set the ENTRYPOINT to the AWS Lambda RIC and CMD to your function handler ENTRYPOINT [ "/usr/local/bin/python", "-m", "awslambdaric" ]