diff --git a/.devcontainer/backend/devcontainer.json b/.devcontainer/backend/devcontainer.json index c672b1bf..3727d8a3 100644 --- a/.devcontainer/backend/devcontainer.json +++ b/.devcontainer/backend/devcontainer.json @@ -6,7 +6,7 @@ "workspaceFolder": "/workspaces/model", "postStartCommand": "bash .devcontainer/backend/post-install.sh", "mounts": [ - "source=${localEnv:HOME},target=/workspaces/home,type=bind" + "source=${localEnv:HOME},target=/home/vscode,type=bind" ], "customizations": { "vscode": { @@ -22,7 +22,11 @@ "corentinartaud.pdfpreview", "ms-python.vscode-python-envs", "ms-python.black-formatter", - "waderyan.gitblame" + "waderyan.gitblame", + "GrapeCity.gc-excelviewer", + "jakobhoeg.vscode-pokemon", + "github.vscode-github-actions", + "me-dutour-mathieu.vscode-github-actions" ], "settings": { "files.defaultWorkspace": "/workspaces/model", @@ -38,3 +42,4 @@ "PYTHONFLAGS": "-Xfrozen_modules=off" } } + \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index 3d4c6b42..b294c736 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -9,12 +9,14 @@ "path": "/bin/bash" } }, -<<<<<<< HEAD -======= "python.testing.unittestEnabled": false, "python.testing.pytestEnabled": true, - "python.testing.pytestArgs": ["-s", "-q", "--no-cov"] ->>>>>>> 11b482838efcf46f376fd3ecbf2c1bb0be6d097d + "python.testing.pytestArgs": ["-s", "-q", "--no-cov"], + + "python.languageServer": "Pylance", + "python.analysis.typeCheckingMode": "strict", + "python.analysis.autoSearchPaths": true, + "python.analysis.extraPaths": ["./src"] // Hot reload setting that needs to be in user settings // "jupyter.runStartupCommands": [ diff --git a/backend/Outputs.py b/backend/Outputs.py index f9538709..7111e4d3 100644 --- a/backend/Outputs.py +++ b/backend/Outputs.py @@ -8,7 +8,11 @@ from utils.s3 import read_from_s3, save_excel_to_s3 from backend.app.utils import sap_to_epc from backend.app.db.connection import db_engine from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel -from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations +from backend.app.db.models.recommendations import ( + Recommendation, + PlanModel, + PlanRecommendations, +) class Outputs: @@ -42,7 +46,7 @@ class Outputs: "flat_roof_insulation": "Flat roof (Out of scope - prov sum only)", "room_in_roof_insulation": "RIR (POA - Prov sum only)", "ev_charging": "EV Charging", - "battery": "Battery" + "battery": "Battery", } def __init__(self, format, portfolio_id): @@ -67,28 +71,38 @@ class Outputs: # Download cleaned data self.cleaned_epc_lookup = read_from_s3( s3_file_name="cleaned_epc_data/cleaned.bson", - bucket_name="retrofit-data-dev" + bucket_name="retrofit-data-dev", ) self.cleaned_epc_lookup = msgpack.unpackb(self.cleaned_epc_lookup, raw=False) def get_properties_from_db(self): # Get properties and their details for a specific portfolio - properties_query = self.session.query( - PropertyModel, - PropertyDetailsEpcModel - ).join( - PropertyDetailsEpcModel, - PropertyModel.id == PropertyDetailsEpcModel.property_id - ).filter( - PropertyModel.portfolio_id == self.portfolio_id # Filter by portfolio ID - ).all() + properties_query = ( + self.session.query(PropertyModel, PropertyDetailsEpcModel) + .join( + PropertyDetailsEpcModel, + PropertyModel.id == PropertyDetailsEpcModel.property_id, + ) + .filter( + PropertyModel.portfolio_id + == self.portfolio_id # Filter by portfolio ID + ) + .all() + ) # Transform properties data to include all fields dynamically properties_data = [ - {**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns}, - **{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in - PropertyDetailsEpcModel.__table__.columns}} + { + **{ + col.name: getattr(prop.PropertyModel, col.name) + for col in PropertyModel.__table__.columns + }, + **{ + col.name: getattr(prop.PropertyDetailsEpcModel, col.name) + for col in PropertyDetailsEpcModel.__table__.columns + }, + } for prop in properties_query ] @@ -96,10 +110,14 @@ class Outputs: def get_plans_from_db(self): - plans_query = self.session.query(Plan).filter(Plan.portfolio_id == self.portfolio_id).all() + plans_query = ( + self.session.query(PlanModel) + .filter(PlanModel.portfolio_id == self.portfolio_id) + .all() + ) # Transform plans data to include all fields dynamically plans_data = [ - {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} + {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns} for plan in plans_query ] @@ -107,28 +125,38 @@ class Outputs: def get_recommendations_from_db(self, plan_ids): # Get recommendations through PlanRecommendations for those plans and that are default - recommendations_query = self.session.query( - Recommendation, - Plan.scenario_id - ).join( - PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id - ).join( - Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id - ).filter( - PlanRecommendations.plan_id.in_(plan_ids), - Recommendation.default == True # Filtering for default recommendations - ).all() + recommendations_query = ( + self.session.query(Recommendation, PlanModel.scenario_id) + .join( + PlanRecommendations, + Recommendation.id == PlanRecommendations.recommendation_id, + ) + .join( + PlanModel, + PlanModel.id + == PlanRecommendations.plan_id, # Join with Plan to access scenario_id + ) + .filter( + PlanRecommendations.plan_id.in_(plan_ids), + Recommendation.default == True, # Filtering for default recommendations + ) + .all() + ) # Transform recommendations data to include all fields dynamically and include scenario_id recommendations_data = [ { **{ - col.name: getattr(rec.Recommendation, col.name) if - hasattr(rec, 'Recommendation') else getattr(rec, col.name) + col.name: ( + getattr(rec.Recommendation, col.name) + if hasattr(rec, "Recommendation") + else getattr(rec, col.name) + ) for col in Recommendation.__table__.columns }, - "Scenario ID": rec.scenario_id - } for rec in recommendations_query + "Scenario ID": rec.scenario_id, + } + for rec in recommendations_query ] return recommendations_data @@ -148,7 +176,9 @@ class Outputs: measure_label = self.MDS_MEASURE_MAPPING.get(measure_type, None) # If the property_id already exists in the collected rows, update it - existing_row = next((item for item in rows if item["property_id"] == property_id), None) + existing_row = next( + (item for item in rows if item["property_id"] == property_id), None + ) if existing_row is None: # Create a new row if the property_id doesn't exist new_row = {measure: None for measure in all_measures} @@ -196,7 +226,7 @@ class Outputs: properties_data = self.get_properties_from_db() plans_data = self.get_plans_from_db() - plan_ids = [plan['id'] for plan in plans_data] + plan_ids = [plan["id"] for plan in plans_data] recommendations_data = self.get_recommendations_from_db(plan_ids) self.session.close() @@ -209,50 +239,54 @@ class Outputs: scenario_ids = plans_df["scenario_id"].unique() # We start to create the MDS sheet - mds = properties_df[ - [ - "property_id", - "address", - "postcode", - "uprn", - "current_epc_rating", - "current_sap_points", - "primary_energy_consumption", - "property_type", - "built_form", - "total_floor_area", - "walls", - "tenure", - "mainfuel", - # The bills columns are split out - we include them and aggregate, without appliances - "heating_cost_current", - "hot_water_cost_current", - "lighting_cost_current", - "gas_standing_charge", - "electricity_standing_charge" + mds = ( + properties_df[ + [ + "property_id", + "address", + "postcode", + "uprn", + "current_epc_rating", + "current_sap_points", + "primary_energy_consumption", + "property_type", + "built_form", + "total_floor_area", + "walls", + "tenure", + "mainfuel", + # The bills columns are split out - we include them and aggregate, without appliances + "heating_cost_current", + "hot_water_cost_current", + "lighting_cost_current", + "gas_standing_charge", + "electricity_standing_charge", + ] ] - ].copy().rename( - columns={ - "address": "Address", - "postcode": "Postcode", - "uprn": "UPRN", - "current_epc_rating": "Pre EPC", - "current_sap_points": "EPC Source", - "primary_energy_consumption": "Existing Heating Demand Kwh/m2/y", - "property_type": "Property Type", - "built_form": "Built Form", - "total_floor_area": "Floor area m2 (If known)", - "walls": "Wall Type (Mandatory field)", - "tenure": "Tenure", - } + .copy() + .rename( + columns={ + "address": "Address", + "postcode": "Postcode", + "uprn": "UPRN", + "current_epc_rating": "Pre EPC", + "current_sap_points": "EPC Source", + "primary_energy_consumption": "Existing Heating Demand Kwh/m2/y", + "property_type": "Property Type", + "built_form": "Built Form", + "total_floor_area": "Floor area m2 (If known)", + "walls": "Wall Type (Mandatory field)", + "tenure": "Tenure", + } + ) ) mds["Estimated bill (£ per year)"] = ( - mds["heating_cost_current"] + - mds["hot_water_cost_current"] + - mds["lighting_cost_current"] + - mds["gas_standing_charge"] + - mds["electricity_standing_charge"] + mds["heating_cost_current"] + + mds["hot_water_cost_current"] + + mds["lighting_cost_current"] + + mds["gas_standing_charge"] + + mds["electricity_standing_charge"] ) mds = mds.drop( @@ -261,65 +295,84 @@ class Outputs: "hot_water_cost_current", "lighting_cost_current", "gas_standing_charge", - "electricity_standing_charge" + "electricity_standing_charge", ] ) # Formatting - Pre EPC is an enum mds["Pre EPC"] = [x.value for x in mds["Pre EPC"].values] - mds["Wall Type (Mandatory field)"] = mds["Wall Type (Mandatory field)"].str.split(",").str[0] + mds["Wall Type (Mandatory field)"] = ( + mds["Wall Type (Mandatory field)"].str.split(",").str[0] + ) # Remove average thermal transmittance field mds["Wall Type (Mandatory field)"] = np.where( - mds["Wall Type (Mandatory field)"].str.contains("Average thermal transmittance"), + mds["Wall Type (Mandatory field)"].str.contains( + "Average thermal transmittance" + ), "", - mds["Wall Type (Mandatory field)"] + mds["Wall Type (Mandatory field)"], ) mds = mds.merge( - pd.DataFrame(self.cleaned_epc_lookup["main-fuel"])[["clean_description", "fuel_type"]], + pd.DataFrame(self.cleaned_epc_lookup["main-fuel"])[ + ["clean_description", "fuel_type"] + ], left_on="mainfuel", right_on="clean_description", - how="left" + how="left", + ) + mds = mds.rename(columns={"fuel_type": "Existing Fuel Type"}).drop( + columns=["clean_description", "mainfuel"] ) - mds = mds.rename(columns={"fuel_type": "Existing Fuel Type"}).drop(columns=["clean_description", "mainfuel"]) mds["Existing Fuel Type"].value_counts() mds_output_by_scenario = {} for scenario_id in scenario_ids: - scenario_recommendations = recommendations_df[recommendations_df["Scenario ID"] == scenario_id] + scenario_recommendations = recommendations_df[ + recommendations_df["Scenario ID"] == scenario_id + ] # For each measure, we create the measure matrix - scenario_measure_matrix = self.make_mds_measure_matrix(scenario_recommendations) + scenario_measure_matrix = self.make_mds_measure_matrix( + scenario_recommendations + ) # Calculate the predicted impact on: SAP, heat demand, bills, kwh - recommendation_impacts = scenario_recommendations.groupby("property_id")[ - ["sap_points", "heat_demand", "kwh_savings", "energy_cost_savings"] - ].sum().reset_index() + recommendation_impacts = ( + scenario_recommendations.groupby("property_id")[ + ["sap_points", "heat_demand", "kwh_savings", "energy_cost_savings"] + ] + .sum() + .reset_index() + ) scenario_mds = mds.merge( scenario_measure_matrix, how="left", on="property_id" - ).merge( - recommendation_impacts, how="left", on="property_id" - ) + ).merge(recommendation_impacts, how="left", on="property_id") # If we have no recommendations, sap_points, kwh_savings, head_demand will be NaN to_clean = [c for c in recommendation_impacts.columns if c != "property_id"] for col in to_clean: scenario_mds[col].fillna(0, inplace=True) scenario_mds.fillna(0, inplace=True) - scenario_mds["Post SAP"] = scenario_mds["EPC Source"] + scenario_mds["sap_points"] + scenario_mds["Post SAP"] = ( + scenario_mds["EPC Source"] + scenario_mds["sap_points"] + ) # Round Post SAP down to the nearest integer scenario_mds["Post SAP"] = scenario_mds["Post SAP"].apply(lambda x: int(x)) - scenario_mds["Post EPC"] = scenario_mds["Post SAP"].apply(lambda x: sap_to_epc(x)) + scenario_mds["Post EPC"] = scenario_mds["Post SAP"].apply( + lambda x: sap_to_epc(x) + ) scenario_mds["Heating Demand Kwh/m2/y"] = ( - scenario_mds["Existing Heating Demand Kwh/m2/y"] - scenario_mds["heat_demand"] + scenario_mds["Existing Heating Demand Kwh/m2/y"] + - scenario_mds["heat_demand"] ) scenario_mds = scenario_mds.rename( columns={ "sap_points": "Predicted SAP Points", "kwh_savings": "Energy Saving (Kwh)", - "energy_cost_savings": "Bill Reduction (£ per yr)" + "energy_cost_savings": "Bill Reduction (£ per yr)", } ) @@ -330,7 +383,7 @@ class Outputs: save_excel_to_s3( df=scenario_mds, file_key=f"engine_outputs/{self.format}/{self.today}_scenario_id={scenario_id}.xlsx", - bucket_name="retrofit-data-dev" + bucket_name="retrofit-data-dev", ) def export(self): diff --git a/backend/app/db/functions/portfolio_functions.py b/backend/app/db/functions/portfolio_functions.py index fa97c206..ae48afed 100644 --- a/backend/app/db/functions/portfolio_functions.py +++ b/backend/app/db/functions/portfolio_functions.py @@ -1,5 +1,10 @@ from sqlalchemy import func -from backend.app.db.models.recommendations import Plan, PlanRecommendations, Recommendation, Scenario +from backend.app.db.models.recommendations import ( + PlanModel, + PlanRecommendations, + Recommendation, + ScenarioModel, +) def aggregate_portfolio_recommendations( @@ -8,7 +13,7 @@ def aggregate_portfolio_recommendations( scenario_id: int, total_valuation_increase: float, labour_days: float, - aggregated_data: dict + aggregated_data: dict, ): # Aggregate multiple fields aggregates = ( @@ -16,15 +21,20 @@ def aggregate_portfolio_recommendations( func.sum(Recommendation.estimated_cost).label("cost"), func.sum(Recommendation.total_work_hours).label("total_work_hours"), func.sum(Recommendation.kwh_savings).label("energy_savings"), - func.sum(Recommendation.co2_equivalent_savings).label("co2_equivalent_savings"), + func.sum(Recommendation.co2_equivalent_savings).label( + "co2_equivalent_savings" + ), func.sum(Recommendation.energy_cost_savings).label("energy_cost_savings"), ) - .join(PlanRecommendations, PlanRecommendations.recommendation_id == Recommendation.id) - .join(Plan, Plan.id == PlanRecommendations.plan_id) + .join( + PlanRecommendations, + PlanRecommendations.recommendation_id == Recommendation.id, + ) + .join(PlanModel, PlanModel.id == PlanRecommendations.plan_id) .filter( - Plan.portfolio_id == portfolio_id, - Plan.scenario_id == scenario_id, - Recommendation.default == True + PlanModel.portfolio_id == portfolio_id, + PlanModel.scenario_id == scenario_id, + Recommendation.default == True, ) .one() ) @@ -36,11 +46,11 @@ def aggregate_portfolio_recommendations( "energy_savings": aggregates.energy_savings or 0, "co2_equivalent_savings": aggregates.co2_equivalent_savings or 0, "energy_cost_savings": aggregates.energy_cost_savings or 0, - **aggregated_data + **aggregated_data, } # Get the scenario and update the fields. This data needs to be stored against the scenario, not the portfolio - portfolio_scenario = session.query(Scenario).filter_by(id=scenario_id).one() + portfolio_scenario = session.query(ScenarioModel).filter_by(id=scenario_id).one() # Update the data for key, value in aggregates_dict.items(): diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 51562f55..e690991a 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -1,17 +1,33 @@ -from sqlalchemy import text -from sqlalchemy import insert, delete -from sqlalchemy.orm import Session +from typing import Any, Dict, List, Optional +from sqlalchemy import inspect, text, insert, delete, select, update +from sqlalchemy.orm import Session, Mapper from sqlalchemy.exc import SQLAlchemyError +from sqlmodel import Session + from backend.app.db.models.recommendations import ( - Plan, Recommendation, RecommendationMaterials, PlanRecommendations, Scenario + PlanModel, + Recommendation, + RecommendationMaterials, + PlanRecommendations, + ScenarioModel, ) from backend.app.db.models.portfolio import PropertyModel from backend.app.db.connection import db_session, db_read_session def prepare_plan_data( - p, body, scenario_id, eco_packages, valuations, new_sap_points, new_epc, default_recommendations, - rebaselining_carbon=0, rebaselining_heat_demand=0, rebaselining_kwh=0, rebaselining_bills=0, + p, + body, + scenario_id, + eco_packages, + valuations, + new_sap_points, + new_epc, + default_recommendations, + rebaselining_carbon=0, + rebaselining_heat_demand=0, + rebaselining_kwh=0, + rebaselining_bills=0, ): """ Utility function to prepare the data that goes into the production of a plan. Is a fairly rough and unstructured @@ -32,21 +48,37 @@ def prepare_plan_data( """ # Plan carbon savings co2_savings = sum( - [r["co2_equivalent_savings"] for r in default_recommendations if not r.get("already_installed", False)] + [ + r["co2_equivalent_savings"] + for r in default_recommendations + if not r.get("already_installed", False) + ] ) post_co2_emissions = p.energy["co2_emissions"] - rebaselining_carbon - co2_savings # Plan bill savings energy_bill_savings = sum( - [r["energy_cost_savings"] for r in default_recommendations if not r.get("already_installed", False)] + [ + r["energy_cost_savings"] + for r in default_recommendations + if not r.get("already_installed", False) + ] + ) + post_energy_bill = ( + sum(p.current_energy_bill.values()) - rebaselining_bills - energy_bill_savings ) - post_energy_bill = sum(p.current_energy_bill.values()) - rebaselining_bills - energy_bill_savings # energy consumption energy_consumption_savings = sum( - [r["kwh_savings"] for r in default_recommendations if not r.get("already_installed", False)] + [ + r["kwh_savings"] + for r in default_recommendations + if not r.get("already_installed", False) + ] + ) + post_energy_consumption = ( + p.current_energy_consumption - rebaselining_kwh - energy_consumption_savings ) - post_energy_consumption = p.current_energy_consumption - rebaselining_kwh - energy_consumption_savings valuation_post_retrofit, valuation_increase = None, None if valuations["current_value"]: @@ -54,9 +86,19 @@ def prepare_plan_data( valuation_post_retrofit = valuations["average_increased_value"] # plan costing data - cost_of_works = sum([r["total"] for r in default_recommendations if not r.get("already_installed", False)]) + cost_of_works = sum( + [ + r["total"] + for r in default_recommendations + if not r.get("already_installed", False) + ] + ) contingency_cost = sum( - [r.get("contingency", 0) for r in default_recommendations if not r.get("already_installed", False)] + [ + r.get("contingency", 0) + for r in default_recommendations + if not r.get("already_installed", False) + ] ) return { @@ -86,7 +128,7 @@ def prepare_plan_data( "valuation_increase": valuation_increase, "cost_of_works": float(cost_of_works), "contingency_cost": float(contingency_cost), - "plan_type": eco_packages.get(p.id, (None, None, None))[2] + "plan_type": eco_packages.get(p.id, (None, None, None))[2], } @@ -97,7 +139,7 @@ def create_plan(session: Session, plan): :param plan: dictionary of data representing a plan to be created """ try: - new_plan = Plan(**plan) + new_plan = PlanModel(**plan) session.add(new_plan) session.flush() session.commit() @@ -120,9 +162,7 @@ def bulk_create_plans(session: Session, plans_to_create: list[dict]) -> dict[int ] stmt = ( - insert(Plan) - .values(payload) - .returning(Plan.id, Plan.property_id) + insert(PlanModel).values(payload).returning(PlanModel.id, PlanModel.property_id) ) result = session.execute(stmt).all() @@ -133,14 +173,14 @@ def bulk_create_plans(session: Session, plans_to_create: list[dict]) -> dict[int def create_scenario(session: Session, scenario: dict) -> int: existing_scenario = ( - session.query(Scenario) + session.query(ScenarioModel) .filter_by(portfolio_id=scenario["portfolio_id"]) .first() ) scenario["is_default"] = not bool(existing_scenario) - new_scenario = Scenario(**scenario) + new_scenario = ScenarioModel(**scenario) session.add(new_scenario) session.flush() # ensures ID is populated @@ -167,7 +207,9 @@ def create_recommendation(session: Session, recommendation): raise e -def create_recommendation_material(session: Session, recommendation_id, material_id, depth): +def create_recommendation_material( + session: Session, recommendation_id, material_id, depth +): """ This function will create a record for the recommendation_material in the database if it does not exist. :param session: The databse session @@ -177,9 +219,7 @@ def create_recommendation_material(session: Session, recommendation_id, material """ new_recommendation_material = RecommendationMaterials( - recommendation_id=recommendation_id, - material_id=material_id, - depth=depth + recommendation_id=recommendation_id, material_id=material_id, depth=depth ) session.add(new_recommendation_material) session.flush() @@ -196,13 +236,17 @@ def create_plan_recommendations(session: Session, plan_id, recommendation_ids): """ # Prepare a list of dictionaries for bulk insert - data = [{"plan_id": plan_id, "recommendation_id": rid} for rid in recommendation_ids] + data = [ + {"plan_id": plan_id, "recommendation_id": rid} for rid in recommendation_ids + ] # Bulk insert using SQLAlchemy's core API session.execute(insert(PlanRecommendations).values(data)) -def upload_recommendations(session: Session, recommendations_to_upload, property_id, new_plan_id): +def upload_recommendations( + session: Session, recommendations_to_upload, property_id, new_plan_id +): try: # Prepare data for bulk insert for Recommendation recommendations_data = [ @@ -213,8 +257,14 @@ def upload_recommendations(session: Session, recommendations_to_upload, property "description": rec["description"], "estimated_cost": float(rec["total"]), "default": rec["default"], - "starting_u_value": float(rec.get("starting_u_value")) if rec.get("starting_u_value") else None, - "new_u_value": float(rec.get("new_u_value")) if rec.get("new_u_value") else None, + "starting_u_value": ( + float(rec.get("starting_u_value")) + if rec.get("starting_u_value") + else None + ), + "new_u_value": ( + float(rec.get("new_u_value")) if rec.get("new_u_value") else None + ), "sap_points": float(rec["sap_points"]), "energy_savings": float(rec["heat_demand"]), "kwh_savings": float(rec["kwh_savings"]), @@ -223,13 +273,17 @@ def upload_recommendations(session: Session, recommendations_to_upload, property "energy_cost_savings": float(rec["energy_cost_savings"]), "labour_days": float(rec["labour_days"]), "already_installed": rec["already_installed"], - "heat_demand": float(rec["heat_demand"]) + "heat_demand": float(rec["heat_demand"]), } for rec in recommendations_to_upload ] # Insert the recommendations, get back the IDs - stmt = insert(Recommendation).returning(Recommendation.id).values(recommendations_data) + stmt = ( + insert(Recommendation) + .returning(Recommendation.id) + .values(recommendations_data) + ) result = session.execute(stmt) uploaded_recommendation_ids = [row[0] for row in result] @@ -243,11 +297,15 @@ def upload_recommendations(session: Session, recommendations_to_upload, property "quantity_unit": part.get("quantity_unit", None), "estimated_cost": float(part.get("total", part.get("total_cost"))), } - for rec, recommendation_id in zip(recommendations_to_upload, uploaded_recommendation_ids) + for rec, recommendation_id in zip( + recommendations_to_upload, uploaded_recommendation_ids + ) for part in rec["parts"] ] - session.bulk_insert_mappings(RecommendationMaterials, recommendation_materials_data) + session.bulk_insert_mappings( + RecommendationMaterials, recommendation_materials_data + ) # flush the changes to get the newly created IDs session.flush() @@ -283,25 +341,27 @@ def bulk_upload_recommendations_and_materials( plan_ids_by_index = [] for rec in recommendation_payload: - recommendation_rows.append({ - "property_id": rec["property_id"], - "type": rec["type"], - "measure_type": rec["measure_type"], - "description": rec["description"], - "estimated_cost": rec["estimated_cost"], - "default": rec["default"], - "starting_u_value": rec["starting_u_value"], - "new_u_value": rec["new_u_value"], - "sap_points": rec["sap_points"], - "heat_demand": rec["heat_demand"], - "kwh_savings": rec["kwh_savings"], - "co2_equivalent_savings": rec["co2_equivalent_savings"], - "energy_savings": rec["energy_savings"], - "energy_cost_savings": rec["energy_cost_savings"], - "total_work_hours": rec["total_work_hours"], - "labour_days": rec["labour_days"], - "already_installed": rec["already_installed"], - }) + recommendation_rows.append( + { + "property_id": rec["property_id"], + "type": rec["type"], + "measure_type": rec["measure_type"], + "description": rec["description"], + "estimated_cost": rec["estimated_cost"], + "default": rec["default"], + "starting_u_value": rec["starting_u_value"], + "new_u_value": rec["new_u_value"], + "sap_points": rec["sap_points"], + "heat_demand": rec["heat_demand"], + "kwh_savings": rec["kwh_savings"], + "co2_equivalent_savings": rec["co2_equivalent_savings"], + "energy_savings": rec["energy_savings"], + "energy_cost_savings": rec["energy_cost_savings"], + "total_work_hours": rec["total_work_hours"], + "labour_days": rec["labour_days"], + "already_installed": rec["already_installed"], + } + ) parts_by_index.append(rec["parts"]) plan_ids_by_index.append(rec["plan_id"]) @@ -310,9 +370,7 @@ def bulk_upload_recommendations_and_materials( # 2. Insert recommendations and get IDs # --------------------------------------------------------- result = session.execute( - insert(Recommendation) - .values(recommendation_rows) - .returning(Recommendation.id) + insert(Recommendation).values(recommendation_rows).returning(Recommendation.id) ) recommendation_ids = [row[0] for row in result] @@ -324,19 +382,19 @@ def bulk_upload_recommendations_and_materials( for recommendation_id, parts in zip(recommendation_ids, parts_by_index): for part in parts: - materials_rows.append({ - "recommendation_id": recommendation_id, - "material_id": part["material_id"], - "depth": part["depth"], - "quantity": part["quantity"], - "quantity_unit": part["quantity_unit"], - "estimated_cost": part["estimated_cost"], - }) + materials_rows.append( + { + "recommendation_id": recommendation_id, + "material_id": part["material_id"], + "depth": part["depth"], + "quantity": part["quantity"], + "quantity_unit": part["quantity_unit"], + "estimated_cost": part["estimated_cost"], + } + ) if materials_rows: - session.execute( - insert(RecommendationMaterials).values(materials_rows) - ) + session.execute(insert(RecommendationMaterials).values(materials_rows)) # --------------------------------------------------------- # 4. Insert plan ↔ recommendation links @@ -346,26 +404,22 @@ def bulk_upload_recommendations_and_materials( "plan_id": plan_id, "recommendation_id": recommendation_id, } - for plan_id, recommendation_id in zip( - plan_ids_by_index, recommendation_ids - ) + for plan_id, recommendation_id in zip(plan_ids_by_index, recommendation_ids) ] - session.execute( - insert(PlanRecommendations).values(plan_recommendation_rows) - ) + session.execute(insert(PlanRecommendations).values(plan_recommendation_rows)) def chunked(iterable, size=100): for i in range(0, len(iterable), size): - yield iterable[i:i + size] + yield iterable[i : i + size] def get_property_ids(portfolio_id: int) -> list[int]: with db_read_session() as session: return [ - pid for (pid,) in - session.query(PropertyModel.id) + pid + for (pid,) in session.query(PropertyModel.id) .filter(PropertyModel.portfolio_id == portfolio_id) .all() ] @@ -381,12 +435,14 @@ def delete_property_batch(session: Session, property_ids: list[int]): # recommendation_materials (via recommendation) # -------------------------------------------------- session.execute( - text(""" + text( + """ DELETE FROM recommendation_materials rm USING recommendation r WHERE rm.recommendation_id = r.id AND r.property_id = ANY(:property_ids) - """), + """ + ), params, ) @@ -394,12 +450,14 @@ def delete_property_batch(session: Session, property_ids: list[int]): # plan_recommendations (via plan) # -------------------------------------------------- session.execute( - text(""" + text( + """ DELETE FROM plan_recommendations pr USING plan p WHERE pr.plan_id = p.id AND p.property_id = ANY(:property_ids) - """), + """ + ), params, ) @@ -407,13 +465,15 @@ def delete_property_batch(session: Session, property_ids: list[int]): # funding_package_measures # -------------------------------------------------- session.execute( - text(""" + text( + """ DELETE FROM funding_package_measures fpm USING funding_package fp, plan p WHERE fpm.funding_package_id = fp.id AND fp.plan_id = p.id AND p.property_id = ANY(:property_ids) - """), + """ + ), params, ) @@ -421,10 +481,12 @@ def delete_property_batch(session: Session, property_ids: list[int]): # inspections (direct) # -------------------------------------------------- session.execute( - text(""" + text( + """ DELETE FROM inspections WHERE property_id = ANY(:property_ids) - """), + """ + ), params, ) @@ -432,12 +494,14 @@ def delete_property_batch(session: Session, property_ids: list[int]): # funding_package # -------------------------------------------------- session.execute( - text(""" + text( + """ DELETE FROM funding_package fp USING plan p WHERE fp.plan_id = p.id AND p.property_id = ANY(:property_ids) - """), + """ + ), params, ) @@ -445,10 +509,12 @@ def delete_property_batch(session: Session, property_ids: list[int]): # recommendation (direct — CRITICAL FIX) # -------------------------------------------------- session.execute( - text(""" + text( + """ DELETE FROM recommendation WHERE property_id = ANY(:property_ids) - """), + """ + ), params, ) @@ -456,10 +522,12 @@ def delete_property_batch(session: Session, property_ids: list[int]): # plan (direct) # -------------------------------------------------- session.execute( - text(""" + text( + """ DELETE FROM plan WHERE property_id = ANY(:property_ids) - """), + """ + ), params, ) @@ -467,18 +535,22 @@ def delete_property_batch(session: Session, property_ids: list[int]): # property-scoped tables # -------------------------------------------------- session.execute( - text(""" + text( + """ DELETE FROM property_details_epc WHERE property_id = ANY(:property_ids) - """), + """ + ), params, ) session.execute( - text(""" + text( + """ DELETE FROM property_targets WHERE property_id = ANY(:property_ids) - """), + """ + ), params, ) @@ -486,10 +558,12 @@ def delete_property_batch(session: Session, property_ids: list[int]): # properties LAST # -------------------------------------------------- session.execute( - text(""" + text( + """ DELETE FROM property WHERE id = ANY(:property_ids) - """), + """ + ), params, ) @@ -510,8 +584,7 @@ def delete_portfolio_scenarios_if_empty(portfolio_id: int): with db_session() as session: session.execute( - delete(Scenario) - .where(Scenario.portfolio_id == portfolio_id) + delete(ScenarioModel).where(ScenarioModel.portfolio_id == portfolio_id) ) print("Deleted scenarios for empty portfolio") @@ -530,6 +603,7 @@ def clear_portfolio_in_batches( total = (len(property_ids) + property_batch_size - 1) // property_batch_size import time + for i, batch in enumerate(chunked(property_ids, property_batch_size), start=1): print(f"Deleting batch {i}/{total} ({len(batch)} properties)") start_time = time.time() @@ -542,3 +616,61 @@ def clear_portfolio_in_batches( delete_portfolio_scenarios_if_empty(portfolio_id) print("Portfolio cleared in batches.") + + +def get_plans_by_portfolio_id(portfolio_id: int) -> List[PlanModel]: + stmt = select(PlanModel).where(PlanModel.portfolio_id == portfolio_id) + with db_read_session() as session: + session_any: Any = session # Typehint as Any to satisfy Pylance... + return session_any.exec(stmt).scalars().all() + + +def get_scenario(scenario_id: int) -> Optional[ScenarioModel]: + stmt = select(ScenarioModel).where(ScenarioModel.id == scenario_id) + with db_read_session() as session: + session_any: Any = session # Typehint as Any to satisfy Pylance... + return session_any.exec(stmt).scalar_one_or_none() + + +def bulk_update_plans( + plan_models: List[PlanModel], + scenario_models: List[ScenarioModel], +) -> int: + if not plan_models: + return 0 + + with db_read_session() as session: + + plan_mapper: Mapper[Any] = inspect(PlanModel) + scenario_mapper: Mapper[Any] = inspect(ScenarioModel) + + plan_mappings: List[Dict[str, Any]] = ( + [] + ) # Typehint as Any to satisfy Pylance... + for plan in plan_models: + data: Dict[str, Any] = { + c.name: getattr(plan, c.name) + for c in plan.__table__.columns + if c.name != "id" + } + data["id"] = plan.id + plan_mappings.append(data) + + session.bulk_update_mappings(plan_mapper, plan_mappings) + + scenario_mappings: List[Dict[str, Any]] = ( + [] + ) # Typehint as Any to satisfy Pylance... + for scenario in scenario_models: + data: Dict[str, Any] = { + c.name: getattr(scenario, c.name) + for c in scenario.__table__.columns + if c.name not in {"id", "portfolio_id"} + } + data["id"] = scenario.id + scenario_mappings.append(data) + + session.bulk_update_mappings(scenario_mapper, scenario_mappings) + + session.commit() + return len(plan_models) diff --git a/backend/app/db/models/funding.py b/backend/app/db/models/funding.py index 6ea8364e..a7417e14 100644 --- a/backend/app/db/models/funding.py +++ b/backend/app/db/models/funding.py @@ -1,9 +1,18 @@ import enum -from sqlalchemy import Column, Integer, String, Float, Enum, TIMESTAMP, BigInteger, ForeignKey +from sqlalchemy import ( + Column, + Integer, + String, + Float, + Enum, + TIMESTAMP, + BigInteger, + ForeignKey, +) from sqlalchemy.orm import declarative_base from sqlalchemy.sql import func -from backend.app.db.models.recommendations import Plan +from backend.app.db.models.recommendations import PlanModel from backend.app.db.models.materials import MaterialType, Material Base = declarative_base() @@ -17,13 +26,17 @@ class SchemeEnum(enum.Enum): class FundingPackage(Base): - __tablename__ = 'funding_package' + __tablename__ = "funding_package" id = Column(Integer, primary_key=True, autoincrement=True) - plan_id = Column(BigInteger, ForeignKey(Plan.id), nullable=False) + plan_id = Column(BigInteger, ForeignKey(PlanModel.id), nullable=False) scheme = Column( - Enum(SchemeEnum, values_callable=lambda x: [e.value for e in x], create_constraint=False), - nullable=False + Enum( + SchemeEnum, + values_callable=lambda x: [e.value for e in x], + create_constraint=False, + ), + nullable=False, ) created_at = Column(TIMESTAMP, nullable=False, server_default=func.now()) project_funding = Column(Float) @@ -34,15 +47,23 @@ class FundingPackage(Base): class FundingPackageMeasures(Base): - __tablename__ = 'funding_package_measures' + __tablename__ = "funding_package_measures" id = Column(Integer, primary_key=True, autoincrement=True) - funding_package_id = Column(BigInteger, ForeignKey(FundingPackage.id), nullable=False) - measure = Column( - Enum(MaterialType, values_callable=lambda x: [e.value for e in x], create_constraint=False), - nullable=False + funding_package_id = Column( + BigInteger, ForeignKey(FundingPackage.id), nullable=False ) - material_id = Column(BigInteger, ForeignKey(Material.id), nullable=False) # Assuming material table exists + measure = Column( + Enum( + MaterialType, + values_callable=lambda x: [e.value for e in x], + create_constraint=False, + ), + nullable=False, + ) + material_id = Column( + BigInteger, ForeignKey(Material.id), nullable=False + ) # Assuming material table exists innovation_uplift = Column(Float) partial_project_score = Column(Float) uplift_project_score = Column(Float) diff --git a/backend/app/db/models/portfolio.py b/backend/app/db/models/portfolio.py index d151bdc4..f6a99a97 100644 --- a/backend/app/db/models/portfolio.py +++ b/backend/app/db/models/portfolio.py @@ -1,7 +1,17 @@ import enum import pytz import datetime -from sqlalchemy import Column, Integer, Text, Boolean, Float, DateTime, Enum, ForeignKey, CheckConstraint +from sqlalchemy import ( + Column, + Integer, + Text, + Boolean, + Float, + DateTime, + Enum, + ForeignKey, + CheckConstraint, +) from sqlalchemy.ext.declarative import declarative_base from backend.app.db.models.users import UserModel # noqa from backend.app.db.models.materials import MaterialType @@ -22,7 +32,7 @@ class PortfolioStatus(enum.Enum): NEEDS_REVIEW = "needs review" -class PortfolioGoal(enum.Enum): +class PortfolioGoal(enum.Enum): # TODO: Move to domain? VALUATION_IMPROVEMENT = "Valuation Improvement" INCREASING_EPC = "Increasing EPC" REDUCING_CO2_EMISSIONS = "Reducing CO2 emissions" @@ -31,23 +41,43 @@ class PortfolioGoal(enum.Enum): class Portfolio(Base): - __tablename__ = 'portfolio' + __tablename__ = "portfolio" id = Column(Integer, primary_key=True, autoincrement=True) name = Column(Text, nullable=False) budget = Column(Float) - status = Column(Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]), nullable=False) - goal = Column(Enum(PortfolioGoal, values_callable=lambda x: [e.value for e in x]), nullable=False) + status = Column( + Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]), + nullable=False, + ) + goal = Column( + Enum(PortfolioGoal, values_callable=lambda x: [e.value for e in x]), + nullable=False, + ) cost = Column(Float) number_of_properties = Column(Integer) - co2_equivalent_savings = Column(Float) # Unit is always tonnes so we don't need to store the unit - energy_savings = Column(Float) # Unit is always kWh so we don't need to store the unit - energy_cost_savings = Column(Float) # Unit is always £ so we don't need to store the unit for the moment - property_valuation_increase = Column(Float) # Unit is always £ so we don't need to store the unit for the moment - rental_yield_increase = Column(Float) # Unit is always £ so we don't need to store the unit for the moment + co2_equivalent_savings = Column( + Float + ) # Unit is always tonnes so we don't need to store the unit + energy_savings = Column( + Float + ) # Unit is always kWh so we don't need to store the unit + energy_cost_savings = Column( + Float + ) # Unit is always £ so we don't need to store the unit for the moment + property_valuation_increase = Column( + Float + ) # Unit is always £ so we don't need to store the unit for the moment + rental_yield_increase = Column( + Float + ) # Unit is always £ so we don't need to store the unit for the moment total_work_hours = Column(Float) labour_days = Column(Float) - created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) - updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) + created_at = Column( + DateTime, nullable=False, default=datetime.datetime.now(pytz.utc) + ) + updated_at = Column( + DateTime, nullable=False, default=datetime.datetime.now(pytz.utc) + ) # Aggregations for summary epc_breakdown_pre_retrofit = Column(Text) epc_breakdown_post_retrofit = Column(Text) @@ -71,7 +101,7 @@ class PropertyCreationStatus(enum.Enum): ERROR = "ERROR" -class Epc(enum.Enum): +class Epc(enum.Enum): # TODO: Move to domain? A = "A" B = "B" C = "C" @@ -82,20 +112,27 @@ class Epc(enum.Enum): class PropertyModel(Base): - __tablename__ = 'property' + __tablename__ = "property" id = Column(Integer, primary_key=True, autoincrement=True) - portfolio_id = Column(Integer, ForeignKey('portfolio.id'), nullable=False) + portfolio_id = Column(Integer, ForeignKey("portfolio.id"), nullable=False) creation_status = Column(Enum(PropertyCreationStatus), nullable=False) uprn = Column(Integer) landlord_property_id = Column(Text) building_reference_number = Column(Integer) - status = Column(Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]), nullable=False) + status = Column( + Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]), + nullable=False, + ) address = Column(Text) postcode = Column(Text) has_pre_condition_report = Column(Boolean) has_recommendations = Column(Boolean) - created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) - updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) + created_at = Column( + DateTime, nullable=False, default=datetime.datetime.now(pytz.utc) + ) + updated_at = Column( + DateTime, nullable=False, default=datetime.datetime.now(pytz.utc) + ) property_type = Column(Text) built_form = Column(Text) local_authority = Column(Text) @@ -127,7 +164,7 @@ rating_lookup = { "Average": FeatureRating.AVERAGE, "Poor": FeatureRating.POOR, "Very Poor": FeatureRating.VERY_POOR, - "N/A": FeatureRating.NA + "N/A": FeatureRating.NA, } @@ -136,32 +173,45 @@ def get_feature_rating_from_string(rating_str: str): class PropertyDetailsEpcModel(Base): - __tablename__ = 'property_details_epc' + __tablename__ = "property_details_epc" id = Column(Integer, primary_key=True, autoincrement=True) - property_id = Column(Integer, ForeignKey('property.id'), nullable=False) - portfolio_id = Column(Integer, ForeignKey('portfolio.id'), nullable=False) + property_id = Column(Integer, ForeignKey("property.id"), nullable=False) + portfolio_id = Column(Integer, ForeignKey("portfolio.id"), nullable=False) full_address = Column(Text) lodgement_date = Column(DateTime) is_expired = Column(Boolean) total_floor_area = Column(Float) walls = Column(Text) - walls_rating = Column(Integer, CheckConstraint('walls_rating>=1 AND walls_rating<=5')) + walls_rating = Column( + Integer, CheckConstraint("walls_rating>=1 AND walls_rating<=5") + ) roof = Column(Text) - roof_rating = Column(Integer, CheckConstraint('roof_rating>=1 AND roof_rating<=5')) + roof_rating = Column(Integer, CheckConstraint("roof_rating>=1 AND roof_rating<=5")) floor = Column(Text) - floor_rating = Column(Integer, CheckConstraint('floor_rating>=1 AND floor_rating<=5')) + floor_rating = Column( + Integer, CheckConstraint("floor_rating>=1 AND floor_rating<=5") + ) windows = Column(Text) - windows_rating = Column(Integer, CheckConstraint('windows_rating>=1 AND windows_rating<=5')) + windows_rating = Column( + Integer, CheckConstraint("windows_rating>=1 AND windows_rating<=5") + ) heating = Column(Text) - heating_rating = Column(Integer, CheckConstraint('heating_rating>=1 AND heating_rating<=5')) + heating_rating = Column( + Integer, CheckConstraint("heating_rating>=1 AND heating_rating<=5") + ) heating_controls = Column(Text) heating_controls_rating = Column( - Integer, CheckConstraint('heating_controls_rating>=1 AND heating_controls_rating<=5') + Integer, + CheckConstraint("heating_controls_rating>=1 AND heating_controls_rating<=5"), ) hot_water = Column(Text) - hot_water_rating = Column(Integer, CheckConstraint('hot_water_rating>=1 AND hot_water_rating<=5')) + hot_water_rating = Column( + Integer, CheckConstraint("hot_water_rating>=1 AND hot_water_rating<=5") + ) lighting = Column(Text) - lighting_rating = Column(Integer, CheckConstraint('lighting_rating>=1 AND lighting_rating<=5')) + lighting_rating = Column( + Integer, CheckConstraint("lighting_rating>=1 AND lighting_rating<=5") + ) mainfuel = Column(Text) ventilation = Column(Text) solar_pv = Column(Text) @@ -219,7 +269,7 @@ class PropertyDetailsSpatial(Base): class PropertyDetailsMeter(Base): - __tablename__ = 'property_details_meter' + __tablename__ = "property_details_meter" id = Column(Integer, primary_key=True, autoincrement=True) uprn = Column(Integer, nullable=False) energy_supplier = Column(Text) @@ -230,11 +280,13 @@ class PropertyDetailsMeter(Base): class PropertyTargetsModel(Base): - __tablename__ = 'property_targets' + __tablename__ = "property_targets" id = Column(Integer, primary_key=True, autoincrement=True) - property_id = Column(Integer, ForeignKey('property.id'), nullable=False) - portfolio_id = Column(Integer, ForeignKey('portfolio.id'), nullable=False) - created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) + property_id = Column(Integer, ForeignKey("property.id"), nullable=False) + portfolio_id = Column(Integer, ForeignKey("portfolio.id"), nullable=False) + created_at = Column( + DateTime, nullable=False, default=datetime.datetime.now(pytz.utc) + ) epc = Column(Enum(Epc)) heat_demand = Column(Text) @@ -242,23 +294,36 @@ class PropertyTargetsModel(Base): class PortfolioUsers(Base): __tablename__ = "portfolioUsers" id = Column(Integer, primary_key=True, autoincrement=True) - user_id = Column(Integer, ForeignKey('user.id'), nullable=False) - portfolioId = Column(Integer, ForeignKey('portfolio.id'), nullable=False) + user_id = Column(Integer, ForeignKey("user.id"), nullable=False) + portfolioId = Column(Integer, ForeignKey("portfolio.id"), nullable=False) role = Column(Text, nullable=False) - created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) - updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) + created_at = Column( + DateTime, nullable=False, default=datetime.datetime.now(pytz.utc) + ) + updated_at = Column( + DateTime, nullable=False, default=datetime.datetime.now(pytz.utc) + ) class PropertyInstalledMeasures(Base): """ This model keeps a record of the installed measures for each property, at the UPRN level """ - __tablename__ = 'property_installed_measures' + + __tablename__ = "property_installed_measures" id = Column(Integer, primary_key=True, autoincrement=True) uprn = Column(Integer, nullable=False) measure_type = Column( - Enum(MaterialType, values_callable=lambda x: [e.value for e in x], create_constraint=False), - nullable=False + Enum( + MaterialType, + values_callable=lambda x: [e.value for e in x], + create_constraint=False, + ), + nullable=False, + ) + created_at = Column( + DateTime, nullable=False, default=datetime.datetime.now(pytz.utc) + ) + installed_at = Column( + DateTime, nullable=False, default=datetime.datetime.now(pytz.utc) ) - created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) - installed_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py index ed1fcefa..538b11e3 100644 --- a/backend/app/db/models/recommendations.py +++ b/backend/app/db/models/recommendations.py @@ -1,7 +1,19 @@ -from sqlalchemy import Column, BigInteger, String, Float, Boolean, TIMESTAMP, ForeignKey, Enum -from sqlalchemy.orm import declarative_base +from typing import Iterable, List, NamedTuple, Optional, Type +from sqlalchemy import ( + Column, + BigInteger, + String, + Float, + Boolean, + TIMESTAMP, + ForeignKey, + Enum, +) +from sqlalchemy.orm import declarative_base, Mapped, mapped_column from sqlalchemy.sql import func -from backend.app.db.models.portfolio import Portfolio, PropertyModel +from datetime import datetime + +from backend.app.db.models.portfolio import Portfolio, PortfolioGoal, PropertyModel from backend.app.db.models.materials import Material from backend.app.db.models.portfolio import Epc from datatypes.enums import QuantityUnits @@ -10,8 +22,12 @@ import enum Base = declarative_base() +def portfolio_goal_values(enum_cls: Type[PortfolioGoal]) -> List[str]: + return [e.value for e in enum_cls] + + class Recommendation(Base): - __tablename__ = 'recommendation' + __tablename__ = "recommendation" id = Column(BigInteger, primary_key=True, autoincrement=True) property_id = Column(BigInteger, ForeignKey(PropertyModel.id), nullable=False) @@ -37,19 +53,24 @@ class Recommendation(Base): class RecommendationMaterials(Base): - __tablename__ = 'recommendation_materials' + __tablename__ = "recommendation_materials" id = Column(BigInteger, primary_key=True, autoincrement=True) - recommendation_id = Column(BigInteger, ForeignKey('recommendation.id'), nullable=False) + recommendation_id = Column( + BigInteger, ForeignKey("recommendation.id"), nullable=False + ) material_id = Column(BigInteger, ForeignKey(Material.id), nullable=False) created_at = Column(TIMESTAMP, nullable=False, server_default=func.now()) depth = Column(Float, nullable=False) quantity = Column(Float, nullable=False) - quantity_unit = Column(Enum(QuantityUnits, values_callable=lambda x: [e.value for e in x]), nullable=False) + quantity_unit = Column( + Enum(QuantityUnits, values_callable=lambda x: [e.value for e in x]), + nullable=False, + ) estimated_cost = Column(Float, nullable=False) -class PlanTypeEnum(enum.Enum): +class PlanTypeEnum(enum.Enum): # TODO: move this to domain? SOLAR_ECO4 = "solar_eco4" SOLAR_HHRSH_ECO4 = "solar_hhrsh_eco4" EMPTY_CAVITY_ECO = "empty_cavity_eco" @@ -57,20 +78,36 @@ class PlanTypeEnum(enum.Enum): EXTRACTION_ECO = "extraction_eco" -class Plan(Base): - __tablename__ = 'plan' +class PlanModel(Base): + __tablename__ = "plan" - id = Column(BigInteger, primary_key=True, autoincrement=True) - name = Column(String, nullable=True, default="") - portfolio_id = Column(BigInteger, ForeignKey(Portfolio.id), nullable=False) - property_id = Column(BigInteger, ForeignKey(PropertyModel.id), nullable=False) - scenario_id = Column(BigInteger, ForeignKey('scenario.id')) # Doesn't have to be linked to a scenario - created_at = Column(TIMESTAMP, nullable=False, server_default=func.now()) - is_default = Column(Boolean, nullable=False) - valuation_increase_lower_bound = Column(Float) - valuation_increase_upper_bound = Column(Float) - valuation_increase_average = Column(Float) - plan_type = Column( + id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True) + + name: Mapped[Optional[str]] = mapped_column(String, nullable=True, default="") + + portfolio_id: Mapped[int] = mapped_column( + BigInteger, ForeignKey(Portfolio.id), nullable=False + ) + + property_id: Mapped[int] = mapped_column( + BigInteger, ForeignKey(PropertyModel.id), nullable=False + ) + + scenario_id: Mapped[Optional[int]] = mapped_column( + BigInteger, ForeignKey("scenario.id") + ) + + created_at: Mapped[datetime] = mapped_column( # type: ignore + TIMESTAMP, nullable=False, server_default=func.now() + ) + + is_default: Mapped[bool] = mapped_column(Boolean, nullable=False) + + valuation_increase_lower_bound: Mapped[Optional[float]] = mapped_column(Float) + valuation_increase_upper_bound: Mapped[Optional[float]] = mapped_column(Float) + valuation_increase_average: Mapped[Optional[float]] = mapped_column(Float) + + plan_type: Mapped[Optional[PlanTypeEnum]] = mapped_column( Enum( PlanTypeEnum, name="plan_type", @@ -79,73 +116,90 @@ class Plan(Base): ), nullable=True, ) - post_sap_points = Column(Float) - post_epc_rating = Column(Enum(Epc)) - post_co2_emissions = Column(Float) - co2_savings = Column(Float) - post_energy_bill = Column(Float) - energy_bill_savings = Column(Float) - post_energy_consumption = Column(Float) # energy demand in kWh/year - energy_consumption_savings = Column(Float) - valuation_post_retrofit = Column(Float) - valuation_increase = Column(Float) + + post_sap_points: Mapped[Optional[float]] = mapped_column(Float) + post_epc_rating: Mapped[Optional[Epc]] = mapped_column(Enum(Epc)) + post_co2_emissions: Mapped[Optional[float]] = mapped_column(Float) + co2_savings: Mapped[Optional[float]] = mapped_column(Float) + post_energy_bill: Mapped[Optional[float]] = mapped_column(Float) + energy_bill_savings: Mapped[Optional[float]] = mapped_column(Float) + post_energy_consumption: Mapped[Optional[float]] = mapped_column(Float) + energy_consumption_savings: Mapped[Optional[float]] = mapped_column(Float) + valuation_post_retrofit: Mapped[Optional[float]] = mapped_column(Float) + valuation_increase: Mapped[Optional[float]] = mapped_column(Float) + # Financial metrics, excluding funding - cost_of_works = Column(Float) - contingency_cost = Column(Float) + cost_of_works: Mapped[Optional[float]] = mapped_column(Float) + contingency_cost: Mapped[Optional[float]] = mapped_column(Float) class PlanRecommendations(Base): - __tablename__ = 'plan_recommendations' + __tablename__ = "plan_recommendations" id = Column(BigInteger, primary_key=True, autoincrement=True) - plan_id = Column(BigInteger, ForeignKey('plan.id'), nullable=False) - recommendation_id = Column(BigInteger, ForeignKey('recommendation.id'), nullable=False) + plan_id = Column(BigInteger, ForeignKey("plan.id"), nullable=False) + recommendation_id = Column( + BigInteger, ForeignKey("recommendation.id"), nullable=False + ) -class Scenario(Base): - __tablename__ = 'scenario' +class ScenarioModel(Base): + __tablename__ = "scenario" - id = Column(BigInteger, primary_key=True, autoincrement=True) - name = Column(String, nullable=False) - created_at = Column(TIMESTAMP, nullable=False, server_default=func.now()) - budget = Column(Float) - portfolio_id = Column(BigInteger, ForeignKey(Portfolio.id), nullable=False) - housing_type = Column(String, nullable=False) - goal = Column(String, nullable=False) - goal_value = Column(String, nullable=False) - trigger_file_path = Column(String, nullable=False) - already_installed_file_path = Column(String) - patches_file_path = Column(String) - non_invasive_recommendations_file_path = Column(String) - exclusions = Column(String) - multi_plan = Column(Boolean, default=False) - is_default = Column(Boolean, default=False, nullable=False) + id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True) + name: Mapped[str] = mapped_column(String, nullable=False) + created_at: Mapped[datetime] = mapped_column( + TIMESTAMP, nullable=False, server_default=func.now() + ) + budget: Mapped[Optional[float]] = mapped_column(Float) + portfolio_id: Mapped[int] = mapped_column( + BigInteger, ForeignKey(Portfolio.id), nullable=False + ) + housing_type: Mapped[str] = mapped_column(String, nullable=False) + goal: Mapped[PortfolioGoal] = mapped_column( + Enum(PortfolioGoal, values_callable=portfolio_goal_values, name="goal"), + nullable=False, + ) + goal_value: Mapped[str] = mapped_column(String, nullable=False) + trigger_file_path: Mapped[str] = mapped_column(String, nullable=False) + already_installed_file_path: Mapped[Optional[str]] = mapped_column(String) + patches_file_path: Mapped[Optional[str]] = mapped_column(String) + non_invasive_recommendations_file_path: Mapped[Optional[str]] = mapped_column( + String + ) + exclusions: Mapped[Optional[str]] = mapped_column(String) + multi_plan: Mapped[bool] = mapped_column(Boolean, default=False) + is_default: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False) # Add in the fields we need, which were previously sitting at the portfolio level - cost = Column(Float) - contingency = Column(Float) - funding = Column(Float) - total_work_hours = Column(Float) - energy_savings = Column(Float) - co2_equivalent_savings = Column(Float) - energy_cost_savings = Column(Float) - epc_breakdown_pre_retrofit = Column(String) - epc_breakdown_post_retrofit = Column(String) - number_of_properties = Column(BigInteger) - n_units_to_retrofit = Column(BigInteger) - co2_per_unit_pre_retrofit = Column(String) - co2_per_unit_post_retrofit = Column(String) - energy_bill_per_unit_pre_retrofit = Column(String) - energy_bill_per_unit_post_retrofit = Column(String) - energy_consumption_per_unit_pre_retrofit = Column(String) - energy_consumption_per_unit_post_retrofit = Column(String) - valuation_improvement_per_unit = Column(String) - cost_per_unit = Column(String) - cost_per_co2_saved = Column(String) - cost_per_sap_point = Column(String) - valuation_return_on_investment = Column(String) - property_valuation_increase = Column(Float) - labour_days = Column(Float) + cost: Mapped[Optional[float]] = mapped_column(Float) + contingency: Mapped[Optional[float]] = mapped_column(Float) + funding: Mapped[Optional[float]] = mapped_column(Float) + total_work_hours: Mapped[Optional[float]] = mapped_column(Float) + energy_savings: Mapped[Optional[float]] = mapped_column(Float) + co2_equivalent_savings: Mapped[Optional[float]] = mapped_column(Float) + energy_cost_savings: Mapped[Optional[float]] = mapped_column(Float) + epc_breakdown_pre_retrofit: Mapped[Optional[str]] = mapped_column(String) + epc_breakdown_post_retrofit: Mapped[Optional[str]] = mapped_column(String) + number_of_properties: Mapped[Optional[int]] = mapped_column(BigInteger) + n_units_to_retrofit: Mapped[Optional[int]] = mapped_column(BigInteger) + co2_per_unit_pre_retrofit: Mapped[Optional[str]] = mapped_column(String) + co2_per_unit_post_retrofit: Mapped[Optional[str]] = mapped_column(String) + energy_bill_per_unit_pre_retrofit: Mapped[Optional[str]] = mapped_column(String) + energy_bill_per_unit_post_retrofit: Mapped[Optional[str]] = mapped_column(String) + energy_consumption_per_unit_pre_retrofit: Mapped[Optional[str]] = mapped_column( + String + ) + energy_consumption_per_unit_post_retrofit: Mapped[Optional[str]] = mapped_column( + String + ) + valuation_improvement_per_unit: Mapped[Optional[str]] = mapped_column(String) + cost_per_unit: Mapped[Optional[str]] = mapped_column(String) + cost_per_co2_saved: Mapped[Optional[str]] = mapped_column(String) + cost_per_sap_point: Mapped[Optional[str]] = mapped_column(String) + valuation_return_on_investment: Mapped[Optional[str]] = mapped_column(String) + property_valuation_increase: Mapped[Optional[float]] = mapped_column(Float) + labour_days: Mapped[Optional[float]] = mapped_column(Float) class MeasureType(enum.Enum): @@ -201,3 +255,12 @@ class InstalledMeasure(Base): heat_demand_savings = Column(Float) source = Column(String) is_active = Column(Boolean, nullable=False, default=True) + + +def enum_values(e: Iterable[PlanTypeEnum]) -> list[str]: + return [m.value for m in e] + + +class PlanPersistence(NamedTuple): + plan: PlanModel + scenario: ScenarioModel diff --git a/backend/app/domain/classes/plan.py b/backend/app/domain/classes/plan.py new file mode 100644 index 00000000..7970abcd --- /dev/null +++ b/backend/app/domain/classes/plan.py @@ -0,0 +1,150 @@ +from __future__ import annotations +from dataclasses import replace +from typing import Optional + +from backend.app.db.models.portfolio import PortfolioGoal +from backend.app.db.models.recommendations import ( + PlanModel, + PlanPersistence, + ScenarioModel, +) +from backend.app.domain.classes.scenario import Scenario +from backend.app.domain.records.plan_record import PlanRecord +from backend.app.utils import sap_to_epc + + +class Plan: + def __init__( + self, record: PlanRecord, scenario: Scenario, id: Optional[int] = None + ): + self.id: Optional[int] = id + self.record: PlanRecord = record + self.scenario: Scenario = scenario + + @classmethod + def from_sqlalchemy(cls, plan_model: PlanModel, scenario: Scenario) -> Plan: + if not scenario: + raise ValueError(f"No Scenario associated with Plan of ID {plan_model.id}") + + record = PlanRecord( + property_id=plan_model.property_id, + portfolio_id=plan_model.portfolio_id, + created_at=plan_model.created_at, + is_default=plan_model.is_default, + valuation_increase_lower_bound=plan_model.valuation_increase_lower_bound, + valuation_increase_upper_bound=plan_model.valuation_increase_upper_bound, + valuation_increase_average=plan_model.valuation_increase_average, + plan_type=plan_model.plan_type, + post_sap_points=plan_model.post_sap_points, + post_epc_rating=plan_model.post_epc_rating, + post_co2_emissions=plan_model.post_co2_emissions, + co2_savings=plan_model.co2_savings, + post_energy_bill=plan_model.post_energy_bill, + energy_bill_savings=plan_model.energy_bill_savings, + post_energy_consumption=plan_model.post_energy_consumption, + energy_consumption_savings=plan_model.energy_consumption_savings, + valuation_post_retrofit=plan_model.valuation_post_retrofit, + valuation_increase=plan_model.valuation_increase, + cost_of_works=plan_model.cost_of_works, + contingency_cost=plan_model.contingency_cost, + ) + return cls(record=record, scenario=scenario, id=plan_model.id) + + @property + def is_compliant(self) -> bool: + goal: PortfolioGoal = self.scenario.record.goal + + match goal: + case PortfolioGoal.INCREASING_EPC: + return self._is_compliant_epc() + case _: + raise NotImplementedError + + def to_sqlalchemy(self) -> PlanPersistence: + scenario_record = self.scenario.record + + scenario_model = ScenarioModel( + id=self.scenario.id, + name=scenario_record.name, + created_at=scenario_record.created_at, + housing_type=scenario_record.housing_type, + goal=scenario_record.goal, + goal_value=scenario_record.goal_value, + trigger_file_path=scenario_record.trigger_file_path, + multi_plan=scenario_record.multi_plan, + is_default=scenario_record.is_default, + budget=scenario_record.budget, + already_installed_file_path=scenario_record.already_installed_file_path, + patches_file_path=scenario_record.patches_file_path, + non_invasive_recommendations_file_path=scenario_record.non_invasive_recommendations_file_path, + exclusions=scenario_record.exclusions, + cost=scenario_record.cost, + contingency=scenario_record.contingency, + funding=scenario_record.funding, + total_work_hours=scenario_record.total_work_hours, + energy_savings=scenario_record.energy_savings, + co2_equivalent_savings=scenario_record.co2_equivalent_savings, + energy_cost_savings=scenario_record.energy_cost_savings, + epc_breakdown_pre_retrofit=scenario_record.epc_breakdown_pre_retrofit, + epc_breakdown_post_retrofit=scenario_record.epc_breakdown_post_retrofit, + number_of_properties=scenario_record.number_of_properties, + n_units_to_retrofit=scenario_record.n_units_to_retrofit, + co2_per_unit_pre_retrofit=scenario_record.co2_per_unit_pre_retrofit, + co2_per_unit_post_retrofit=scenario_record.co2_per_unit_post_retrofit, + energy_bill_per_unit_pre_retrofit=scenario_record.energy_bill_per_unit_pre_retrofit, + energy_bill_per_unit_post_retrofit=scenario_record.energy_bill_per_unit_post_retrofit, + energy_consumption_per_unit_pre_retrofit=scenario_record.energy_consumption_per_unit_pre_retrofit, + energy_consumption_per_unit_post_retrofit=scenario_record.energy_consumption_per_unit_post_retrofit, + valuation_improvement_per_unit=scenario_record.valuation_improvement_per_unit, + cost_per_unit=scenario_record.cost_per_unit, + cost_per_co2_saved=scenario_record.cost_per_co2_saved, + cost_per_sap_point=scenario_record.cost_per_sap_point, + valuation_return_on_investment=scenario_record.valuation_return_on_investment, + property_valuation_increase=scenario_record.property_valuation_increase, + labour_days=scenario_record.labour_days, + ) + + record = self.record + + plan_model = PlanModel( + id=self.id, + property_id=record.property_id, + portfolio_id=record.portfolio_id, + scenario_id=self.scenario.id, + created_at=record.created_at, + is_default=record.is_default, + valuation_increase_lower_bound=record.valuation_increase_lower_bound, + valuation_increase_upper_bound=record.valuation_increase_upper_bound, + valuation_increase_average=record.valuation_increase_average, + plan_type=record.plan_type, + post_sap_points=record.post_sap_points, + post_epc_rating=record.post_epc_rating, + post_co2_emissions=record.post_co2_emissions, + co2_savings=record.co2_savings, + post_energy_bill=record.post_energy_bill, + energy_bill_savings=record.energy_bill_savings, + post_energy_consumption=record.post_energy_consumption, + energy_consumption_savings=record.energy_consumption_savings, + valuation_post_retrofit=record.valuation_post_retrofit, + valuation_increase=record.valuation_increase, + cost_of_works=record.cost_of_works, + contingency_cost=record.contingency_cost, + ) + + return PlanPersistence(plan=plan_model, scenario=scenario_model) + + def set_default(self, value: bool) -> None: + self.record = replace(self.record, is_default=value) + self.scenario.record = replace(self.scenario.record, is_default=value) + + def _is_compliant_epc(self) -> bool: + goal_value: str = self.scenario.record.goal_value + + if self.record.post_epc_rating: + post_epc = self.record.post_epc_rating.value + elif self.record.post_sap_points: + post_epc = sap_to_epc(self.record.post_sap_points) + else: + return False + + return post_epc <= goal_value diff --git a/backend/app/domain/classes/scenario.py b/backend/app/domain/classes/scenario.py new file mode 100644 index 00000000..3c22657e --- /dev/null +++ b/backend/app/domain/classes/scenario.py @@ -0,0 +1,58 @@ +from __future__ import annotations +from dataclasses import replace +from typing import Optional + +from backend.app.db.models.recommendations import ScenarioModel +from backend.app.domain.records.scenario_record import ScenarioRecord + + +class Scenario: + def __init__(self, record: ScenarioRecord, id: Optional[int] = None): + self.id = id + self.record = record + + @classmethod + def from_sqlalchemy(cls, scenario_model: ScenarioModel) -> Scenario: + record = ScenarioRecord( + name=scenario_model.name, + created_at=scenario_model.created_at, + housing_type=scenario_model.housing_type, + goal=scenario_model.goal, + goal_value=scenario_model.goal_value, + trigger_file_path=scenario_model.trigger_file_path, + multi_plan=scenario_model.multi_plan, + is_default=scenario_model.is_default, + budget=scenario_model.budget, + already_installed_file_path=scenario_model.already_installed_file_path, + patches_file_path=scenario_model.patches_file_path, + non_invasive_recommendations_file_path=scenario_model.non_invasive_recommendations_file_path, + exclusions=scenario_model.exclusions, + cost=scenario_model.cost, + contingency=scenario_model.contingency, + funding=scenario_model.funding, + total_work_hours=scenario_model.total_work_hours, + energy_savings=scenario_model.energy_savings, + co2_equivalent_savings=scenario_model.co2_equivalent_savings, + energy_cost_savings=scenario_model.energy_cost_savings, + epc_breakdown_pre_retrofit=scenario_model.epc_breakdown_pre_retrofit, + epc_breakdown_post_retrofit=scenario_model.epc_breakdown_post_retrofit, + number_of_properties=scenario_model.number_of_properties, + n_units_to_retrofit=scenario_model.n_units_to_retrofit, + co2_per_unit_pre_retrofit=scenario_model.co2_per_unit_pre_retrofit, + co2_per_unit_post_retrofit=scenario_model.co2_per_unit_post_retrofit, + energy_bill_per_unit_pre_retrofit=scenario_model.energy_bill_per_unit_pre_retrofit, + energy_bill_per_unit_post_retrofit=scenario_model.energy_bill_per_unit_post_retrofit, + energy_consumption_per_unit_pre_retrofit=scenario_model.energy_consumption_per_unit_pre_retrofit, + energy_consumption_per_unit_post_retrofit=scenario_model.energy_consumption_per_unit_post_retrofit, + valuation_improvement_per_unit=scenario_model.valuation_improvement_per_unit, + cost_per_unit=scenario_model.cost_per_unit, + cost_per_co2_saved=scenario_model.cost_per_co2_saved, + cost_per_sap_point=scenario_model.cost_per_sap_point, + valuation_return_on_investment=scenario_model.valuation_return_on_investment, + property_valuation_increase=scenario_model.property_valuation_increase, + labour_days=scenario_model.labour_days, + ) + return cls(record, scenario_model.id) + + def set_default(self, value: bool) -> None: + self.record = replace(self.record, is_default=value) diff --git a/backend/app/domain/records/plan_record.py b/backend/app/domain/records/plan_record.py new file mode 100644 index 00000000..2df7a7c6 --- /dev/null +++ b/backend/app/domain/records/plan_record.py @@ -0,0 +1,31 @@ +from dataclasses import dataclass +from datetime import datetime +from typing import Optional + +from backend.app.db.models.portfolio import Epc +from backend.app.db.models.recommendations import PlanTypeEnum + + +@dataclass(frozen=True) +class PlanRecord: + property_id: int + portfolio_id: int + created_at: datetime + is_default: bool + + valuation_increase_lower_bound: Optional[float] = None + valuation_increase_upper_bound: Optional[float] = None + valuation_increase_average: Optional[float] = None + plan_type: Optional[PlanTypeEnum] = None + post_sap_points: Optional[float] = None + post_epc_rating: Optional[Epc] = None + post_co2_emissions: Optional[float] = None + co2_savings: Optional[float] = None + post_energy_bill: Optional[float] = None + energy_bill_savings: Optional[float] = None + post_energy_consumption: Optional[float] = None + energy_consumption_savings: Optional[float] = None + valuation_post_retrofit: Optional[float] = None + valuation_increase: Optional[float] = None + cost_of_works: Optional[float] = None + contingency_cost: Optional[float] = None diff --git a/backend/app/domain/records/scenario_record.py b/backend/app/domain/records/scenario_record.py new file mode 100644 index 00000000..0865cc88 --- /dev/null +++ b/backend/app/domain/records/scenario_record.py @@ -0,0 +1,47 @@ +from dataclasses import dataclass +from datetime import datetime +from typing import Optional + +from backend.app.db.models.portfolio import PortfolioGoal + + +@dataclass(frozen=True) +class ScenarioRecord: + name: str + created_at: datetime + housing_type: str + goal: PortfolioGoal + goal_value: str + trigger_file_path: str + multi_plan: bool + is_default: bool + budget: Optional[float] = None + already_installed_file_path: Optional[str] = None + patches_file_path: Optional[str] = None + non_invasive_recommendations_file_path: Optional[str] = None + exclusions: Optional[str] = None + + cost: Optional[float] = None + contingency: Optional[float] = None + funding: Optional[float] = None + total_work_hours: Optional[float] = None + energy_savings: Optional[float] = None + co2_equivalent_savings: Optional[float] = None + energy_cost_savings: Optional[float] = None + epc_breakdown_pre_retrofit: Optional[str] = None + epc_breakdown_post_retrofit: Optional[str] = None + number_of_properties: Optional[int] = None + n_units_to_retrofit: Optional[int] = None + co2_per_unit_pre_retrofit: Optional[str] = None + co2_per_unit_post_retrofit: Optional[str] = None + energy_bill_per_unit_pre_retrofit: Optional[str] = None + energy_bill_per_unit_post_retrofit: Optional[str] = None + energy_consumption_per_unit_pre_retrofit: Optional[str] = None + energy_consumption_per_unit_post_retrofit: Optional[str] = None + valuation_improvement_per_unit: Optional[str] = None + cost_per_unit: Optional[str] = None + cost_per_co2_saved: Optional[str] = None + cost_per_sap_point: Optional[str] = None + valuation_return_on_investment: Optional[str] = None + property_valuation_increase: Optional[float] = None + labour_days: Optional[float] = None diff --git a/backend/categorisation/__init__.py b/backend/categorisation/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/categorisation/categorisation_trigger_request.py b/backend/categorisation/categorisation_trigger_request.py new file mode 100644 index 00000000..9ef1d106 --- /dev/null +++ b/backend/categorisation/categorisation_trigger_request.py @@ -0,0 +1,5 @@ +from pydantic import BaseModel + + +class CategorisationTriggerRequest(BaseModel): + portfolio_id: int diff --git a/backend/categorisation/handler/Dockerfile b/backend/categorisation/handler/Dockerfile new file mode 100644 index 00000000..46c8d477 --- /dev/null +++ b/backend/categorisation/handler/Dockerfile @@ -0,0 +1,47 @@ +FROM public.ecr.aws/lambda/python:3.11 +# For local running: +# FROM python:3.11.10-bullseye + +ARG DEV_DB_HOST +ARG DEV_DB_PORT +ARG DEV_DB_NAME + + +# Set working directory (Lambda task root) +WORKDIR /var/task + +# Environment +ENV DB_HOST=${DEV_DB_HOST} +ENV DB_PORT=${DEV_DB_PORT} +ENV DB_NAME=${DEV_DB_NAME} + +COPY backend/.env.test backend/.env + +# ----------------------------- +# Copy requirements FIRST (for Docker layer caching) +# ----------------------------- +COPY backend/categorisation/handler/requirements.txt . + +# Install dependencies into Lambda runtime +RUN pip install --no-cache-dir -r requirements.txt + +# ----------------------------- +# Copy application code +# ----------------------------- +COPY utils/ utils/ +COPY backend/categorisation/ backend/categorisation/ + +COPY backend/app/db/connection.py backend/app/db/connection.py +COPY backend/app/config.py backend/app/config.py + +COPY backend/__init__.py backend/__init__.py +COPY backend/app/__init__.py backend/app/__init__.py +COPY backend/app/db/__init__.py backend/app/db/__init__.py + + +# ----------------------------- +# Lambda handler +# ----------------------------- +CMD ["backend/categorisation/handler/handler.handler"] +# For local running +# CMD ["python", "-m", "backend.categorisation.handler.handler"] diff --git a/backend/categorisation/handler/handler.py b/backend/categorisation/handler/handler.py new file mode 100644 index 00000000..e74bfeb5 --- /dev/null +++ b/backend/categorisation/handler/handler.py @@ -0,0 +1,10 @@ +from typing import Any, Mapping +from utils.logger import setup_logger + + +logger = setup_logger() + + +def handler(event: Mapping[str, Any], context: Any) -> None: + + pass diff --git a/backend/categorisation/handler/requirements.txt b/backend/categorisation/handler/requirements.txt new file mode 100644 index 00000000..48e5b561 --- /dev/null +++ b/backend/categorisation/handler/requirements.txt @@ -0,0 +1,3 @@ +sqlmodel +pydantic-settings +psycopg2-binary==2.9.10 \ No newline at end of file diff --git a/backend/categorisation/local_runner.py b/backend/categorisation/local_runner.py new file mode 100644 index 00000000..599cbbbb --- /dev/null +++ b/backend/categorisation/local_runner.py @@ -0,0 +1,11 @@ +from backend.categorisation.processor import process_portfolio + + +def main() -> None: + portfolio_id = 556 + + process_portfolio(portfolio_id) + + +if __name__ == "__main__": + main() diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py new file mode 100644 index 00000000..7c5698b7 --- /dev/null +++ b/backend/categorisation/processor.py @@ -0,0 +1,93 @@ +from collections import defaultdict +from typing import Dict, List + +from backend.app.db.functions.recommendations_functions import ( + bulk_update_plans, + get_plans_by_portfolio_id, + get_scenario, +) +from backend.app.db.models.recommendations import PlanModel, ScenarioModel +from backend.app.domain.classes.plan import Plan +from backend.app.domain.classes.scenario import Scenario +from utils.logger import setup_logger + +logger = setup_logger() + + +def process_portfolio(portfolio_id: int) -> None: + print(f"Processing portfolio {portfolio_id}") + plans: List[Plan] = _load_plans_for_portfolio(portfolio_id) + plans_by_property: Dict[int, List[Plan]] = _group_plans_by_property(plans) + + for uprn, property_plans in plans_by_property.items(): + + if not property_plans: + raise ValueError(f"No plans for property {uprn}") + + cheapest_plan = _choose_cheapest_relevant_plan(property_plans) + _update_default_flags(property_plans, cheapest_plan) + + +def _load_plans_for_portfolio(portfolio_id: int) -> List[Plan]: + plan_models = get_plans_by_portfolio_id(portfolio_id) + print(f"Got {len(plan_models)} plans from database") + + plans: List[Plan] = [] + + for model in plan_models: + if not model.scenario_id: + logger.info(f"No Scenario associated with Plan of ID {model.id}") + continue + + scenario_model = get_scenario(model.scenario_id) + plans.append( + Plan.from_sqlalchemy(model, Scenario.from_sqlalchemy(scenario_model)) + ) + print("Successfully mapped plan and scenario to domain object") + + return plans + + +def _group_plans_by_property(plans: List[Plan]) -> Dict[int, List[Plan]]: + grouped: dict[int, List[Plan]] = defaultdict(list) + + for plan in plans: + grouped[plan.record.property_id].append(plan) + + return grouped + + +def _choose_cheapest_relevant_plan(plans: List[Plan]) -> Plan: + plans_to_consider: List[Plan] = [p for p in plans if p.is_compliant] or plans + + def plan_cost(plan: Plan) -> float: + return ( + plan.record.cost_of_works + if plan.record.cost_of_works is not None + else float("inf") + ) + + cheapest_plan = min(plans_to_consider, key=plan_cost) + + return cheapest_plan + + +def _update_default_flags(plans: List[Plan], cheapest_plan: Plan) -> None: + plans_to_update: List[Plan] = [] + + for plan in plans: + should_be_default: bool = plan.id == cheapest_plan.id + if plan.record.is_default != should_be_default: + plan.set_default(should_be_default) + plans_to_update.append(plan) + + if plans_to_update: + plan_models: List[PlanModel] = [] + scenario_models: List[ScenarioModel] = [] + + for plan in plans_to_update: + plan_model, scenario_model = plan.to_sqlalchemy() + plan_models.append(plan_model) + scenario_models.append(scenario_model) + + bulk_update_plans(plan_models, scenario_models) diff --git a/backend/categorisation/tests/test_plan_is_compliant.py b/backend/categorisation/tests/test_plan_is_compliant.py new file mode 100644 index 00000000..62756652 --- /dev/null +++ b/backend/categorisation/tests/test_plan_is_compliant.py @@ -0,0 +1,73 @@ +from typing import Callable +import pytest +from datetime import datetime + +from backend.app.domain.classes.plan import Plan +from backend.app.domain.classes.scenario import Scenario +from backend.app.domain.records.plan_record import PlanRecord +from backend.app.domain.records.scenario_record import ScenarioRecord +from backend.app.db.models.portfolio import Epc, PortfolioGoal + + +@pytest.fixture +def created_at_datetime() -> datetime: + return datetime.now() + + +@pytest.fixture +def epc_c_scenario(created_at_datetime: datetime) -> "Scenario": + # arrange + scenario_record = ScenarioRecord( + name="EPC C", + created_at=created_at_datetime, + housing_type="", + goal=PortfolioGoal.INCREASING_EPC, + goal_value="C", + trigger_file_path="", + multi_plan=False, + is_default=False, + ) + return Scenario(record=scenario_record, id=1) + + +@pytest.fixture +def plan_factory( + epc_c_scenario: "Scenario", created_at_datetime: datetime +) -> Callable[[int, "Epc"], "Plan"]: + # returns a function to create plans with different attributes + def _create_plan(post_sap_points: int, post_epc_rating: "Epc") -> "Plan": + plan_record = PlanRecord( + property_id=1, + portfolio_id=1, + created_at=created_at_datetime, + is_default=False, + post_sap_points=post_sap_points, + post_epc_rating=post_epc_rating, + ) + return Plan(record=plan_record, scenario=epc_c_scenario, id=1) + + return _create_plan + + +@pytest.mark.parametrize( + "post_sap_points, post_epc_rating, expected_compliance", + [ + (75, Epc.C, True), + (100, Epc.A, True), + (60, Epc.D, False), + ], +) +def test_scenario_goal_is_epc_c( + plan_factory: Callable[[int, "Epc"], "Plan"], + post_sap_points: int, + post_epc_rating: "Epc", + expected_compliance: bool, +) -> None: + # arrange + plan = plan_factory(post_sap_points, post_epc_rating) + + # act + actual_compliance: bool = plan.is_compliant + + # assert + assert actual_compliance == expected_compliance diff --git a/etl/customers/l_and_g/ic_slides.py b/etl/customers/l_and_g/ic_slides.py index a5cb3511..de6edd49 100644 --- a/etl/customers/l_and_g/ic_slides.py +++ b/etl/customers/l_and_g/ic_slides.py @@ -41,7 +41,10 @@ epc_data = pd.read_csv( # Classify floor area in <73m2, 73-98, 99-200, 200+ epc_data["floor_area_bracket"] = epc_data["total_floor_area"].apply( - lambda x: "<73" if x < 73 else "73-98" if x < 99 else "99-200" if x < 200 else "200+") + lambda x: ( + "<73" if x < 73 else "73-98" if x < 99 else "99-200" if x < 200 else "200+" + ) +) # 73-98 185 # <73 156 @@ -65,7 +68,11 @@ import pandas as pd import numpy as np from sqlalchemy.orm import sessionmaker from backend.app.db.connection import db_engine -from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations +from backend.app.db.models.recommendations import ( + Recommendation, + PlanModel, + PlanRecommendations, +) from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel @@ -74,56 +81,79 @@ def get_data(portfolio_id, scenario_ids): session.begin() # Get properties and their details for a specific portfolio - properties_query = session.query( - PropertyModel, - PropertyDetailsEpcModel - ).join( - PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id - ).filter( - PropertyModel.portfolio_id == portfolio_id # Filter by portfolio ID - ).all() + properties_query = ( + session.query(PropertyModel, PropertyDetailsEpcModel) + .join( + PropertyDetailsEpcModel, + PropertyModel.id == PropertyDetailsEpcModel.property_id, + ) + .filter(PropertyModel.portfolio_id == portfolio_id) # Filter by portfolio ID + .all() + ) # Transform properties data to include all fields dynamically properties_data = [ - {**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns}, - **{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in - PropertyDetailsEpcModel.__table__.columns}} + { + **{ + col.name: getattr(prop.PropertyModel, col.name) + for col in PropertyModel.__table__.columns + }, + **{ + col.name: getattr(prop.PropertyDetailsEpcModel, col.name) + for col in PropertyDetailsEpcModel.__table__.columns + }, + } for prop in properties_query ] # Get property IDs from fetched properties # Get plans linked to the fetched properties - plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all() + plans_query = ( + session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all() + ) # Transform plans data to include all fields dynamically plans_data = [ - {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} + {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns} for plan in plans_query ] # Extract plan IDs for filtering recommendations through PlanRecommendations - plan_ids = [plan['id'] for plan in plans_data] + plan_ids = [plan["id"] for plan in plans_data] # Get recommendations through PlanRecommendations for those plans and that are default - recommendations_query = session.query( - Recommendation, - Plan.scenario_id - ).join( - PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id - ).join( - Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id - ).filter( - PlanRecommendations.plan_id.in_(plan_ids), - Recommendation.default == True # Filtering for default recommendations - ).all() + recommendations_query = ( + session.query(Recommendation, PlanModel.scenario_id) + .join( + PlanRecommendations, + Recommendation.id == PlanRecommendations.recommendation_id, + ) + .join( + PlanModel, + PlanModel.id + == PlanRecommendations.plan_id, # Join with Plan to access scenario_id + ) + .filter( + PlanRecommendations.plan_id.in_(plan_ids), + Recommendation.default == True, # Filtering for default recommendations + ) + .all() + ) # Transform recommendations data to include all fields dynamically and include scenario_id recommendations_data = [ - {**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation') else getattr(rec, - col.name) for - col in Recommendation.__table__.columns}, - "Scenario ID": rec.scenario_id} + { + **{ + col.name: ( + getattr(rec.Recommendation, col.name) + if hasattr(rec, "Recommendation") + else getattr(rec, col.name) + ) + for col in Recommendation.__table__.columns + }, + "Scenario ID": rec.scenario_id, + } for rec in recommendations_query ] @@ -132,7 +162,9 @@ def get_data(portfolio_id, scenario_ids): return properties_data, plans_data, recommendations_data -properties_data, plans_data, recommendations_data = get_data(portfolio_id=124, scenario_ids=[205]) +properties_data, plans_data, recommendations_data = get_data( + portfolio_id=124, scenario_ids=[205] +) properties_df = pd.DataFrame(properties_data) plans_df = pd.DataFrame(plans_data) @@ -147,12 +179,12 @@ recommended_measures_df = recommended_measures_df.drop(columns=["default"]) post_install_sap = recommendations_df[["property_id", "default", "sap_points"]] post_install_sap = post_install_sap[post_install_sap["default"]] # Sum up the sap points by property id -post_install_sap = post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index() +post_install_sap = ( + post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index() +) recommendations_measures_pivot = recommended_measures_df.pivot( - index='property_id', - columns='measure_type', - values='estimated_cost' + index="property_id", columns="measure_type", values="estimated_cost" ) recommendations_measures_pivot = recommendations_measures_pivot.reset_index() @@ -163,7 +195,7 @@ recommendations_measures_pivot = recommendations_measures_pivot.rename( "double_glazing": "Cost: Double Glazing", "loft_insulation": "Cost: Loft Insulation", "mechanical_ventilation": "Cost: Ventilation", - "solar_pv": "Cost: Solar PV" + "solar_pv": "Cost: Solar PV", } ) recommendations_measures_pivot = recommendations_measures_pivot.fillna(0) @@ -186,16 +218,26 @@ recommendations_measures_pivot["Recommendation: Solar PV"] = ( recommendations_measures_pivot["Cost: Solar PV"] > 0 ) -df = properties_df[ - [ - "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows", - "current_epc_rating", - "current_sap_points", "total_floor_area", "number_of_rooms", +df = ( + properties_df[ + [ + "property_id", + "uprn", + "address", + "postcode", + "property_type", + "walls", + "roof", + "heating", + "windows", + "current_epc_rating", + "current_sap_points", + "total_floor_area", + "number_of_rooms", + ] ] -].merge( - recommendations_measures_pivot, how="left", on="property_id" -).merge( - post_install_sap, how="left", on="property_id" + .merge(recommendations_measures_pivot, how="left", on="property_id") + .merge(post_install_sap, how="left", on="property_id") ) df = df.drop(columns=["property_id"]) @@ -222,25 +264,36 @@ df["Has Recommendations"] = ~pd.isnull(df["Cost: Air Source Heat Pump"]) # We fill missings: for col in [ - "Recommendation: Air Source Heat Pump", "Recommendation: Cavity Wall Insulation", - "Recommendation: Double Glazing", "Recommendation: Loft Insulation", "Recommendation: Ventilation", - "Recommendation: Solar PV" + "Recommendation: Air Source Heat Pump", + "Recommendation: Cavity Wall Insulation", + "Recommendation: Double Glazing", + "Recommendation: Loft Insulation", + "Recommendation: Ventilation", + "Recommendation: Solar PV", ]: df[col] = df[col].fillna(False) for col in [ - "Cost: Air Source Heat Pump", "Cost: Cavity Wall Insulation", - "Cost: Double Glazing", "Cost: Loft Insulation", "Cost: Ventilation", - "Cost: Solar PV" + "Cost: Air Source Heat Pump", + "Cost: Cavity Wall Insulation", + "Cost: Double Glazing", + "Cost: Loft Insulation", + "Cost: Ventilation", + "Cost: Solar PV", ]: df[col] = df[col].fillna(0) # Calculate post SAP df["Predicted Post Works SAP"] = df["Current SAP Points"] + df["sap_points"] df["Predicted Post Works SAP"] = df["Predicted Post Works SAP"].round() -df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(lambda x: sap_to_epc(x)) +df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply( + lambda x: sap_to_epc(x) +) df["Recommendation: Air Source Heat Pump"].sum() df["Cost: Air Source Heat Pump"].sum() -df.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/Basildon Data Export - 2.csv", index=False) +df.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/Basildon Data Export - 2.csv", + index=False, +) diff --git a/etl/customers/mod/pilot/2. Create Excel Model.py b/etl/customers/mod/pilot/2. Create Excel Model.py index 9a9eda86..810ab661 100644 --- a/etl/customers/mod/pilot/2. Create Excel Model.py +++ b/etl/customers/mod/pilot/2. Create Excel Model.py @@ -4,7 +4,11 @@ import numpy as np from backend.app.utils import sap_to_epc from sqlalchemy.orm import sessionmaker from backend.app.db.connection import db_engine -from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations +from backend.app.db.models.recommendations import ( + Recommendation, + PlanModel, + PlanRecommendations, +) from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel @@ -13,56 +17,79 @@ def get_data(portfolio_id, scenario_ids): session.begin() # Get properties and their details for a specific portfolio - properties_query = session.query( - PropertyModel, - PropertyDetailsEpcModel - ).join( - PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id - ).filter( - PropertyModel.portfolio_id == portfolio_id # Filter by portfolio ID - ).all() + properties_query = ( + session.query(PropertyModel, PropertyDetailsEpcModel) + .join( + PropertyDetailsEpcModel, + PropertyModel.id == PropertyDetailsEpcModel.property_id, + ) + .filter(PropertyModel.portfolio_id == portfolio_id) # Filter by portfolio ID + .all() + ) # Transform properties data to include all fields dynamically properties_data = [ - {**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns}, - **{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in - PropertyDetailsEpcModel.__table__.columns}} + { + **{ + col.name: getattr(prop.PropertyModel, col.name) + for col in PropertyModel.__table__.columns + }, + **{ + col.name: getattr(prop.PropertyDetailsEpcModel, col.name) + for col in PropertyDetailsEpcModel.__table__.columns + }, + } for prop in properties_query ] # Get property IDs from fetched properties # Get plans linked to the fetched properties - plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all() + plans_query = ( + session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all() + ) # Transform plans data to include all fields dynamically plans_data = [ - {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} + {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns} for plan in plans_query ] # Extract plan IDs for filtering recommendations through PlanRecommendations - plan_ids = [plan['id'] for plan in plans_data] + plan_ids = [plan["id"] for plan in plans_data] # Get recommendations through PlanRecommendations for those plans and that are default - recommendations_query = session.query( - Recommendation, - Plan.scenario_id - ).join( - PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id - ).join( - Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id - ).filter( - PlanRecommendations.plan_id.in_(plan_ids), - Recommendation.default == True # Filtering for default recommendations - ).all() + recommendations_query = ( + session.query(Recommendation, PlanModel.scenario_id) + .join( + PlanRecommendations, + Recommendation.id == PlanRecommendations.recommendation_id, + ) + .join( + PlanModel, + PlanModel.id + == PlanRecommendations.plan_id, # Join with Plan to access scenario_id + ) + .filter( + PlanRecommendations.plan_id.in_(plan_ids), + Recommendation.default == True, # Filtering for default recommendations + ) + .all() + ) # Transform recommendations data to include all fields dynamically and include scenario_id recommendations_data = [ - {**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation') - else getattr(rec, col.name) for - col in Recommendation.__table__.columns}, - "Scenario ID": rec.scenario_id} + { + **{ + col.name: ( + getattr(rec.Recommendation, col.name) + if hasattr(rec, "Recommendation") + else getattr(rec, col.name) + ) + for col in Recommendation.__table__.columns + }, + "Scenario ID": rec.scenario_id, + } for rec in recommendations_query ] @@ -94,16 +121,34 @@ def app(): ) property_asset_data = properties_df.merge( - mod_property_data.drop(columns=["address", "postcode", "tenure"]), how="left", on="uprn" + mod_property_data.drop(columns=["address", "postcode", "tenure"]), + how="left", + on="uprn", ) - property_asset_data["is_pitched"] = property_asset_data["roof"].str.contains("pitched", case=False) + property_asset_data["is_pitched"] = property_asset_data["roof"].str.contains( + "pitched", case=False + ) property_asset_data["pre_1970"] = property_asset_data["BUILD_YEAR"] < 1970 - property_asset_data["wall_type"] = property_asset_data["walls"].str.split(" ").str[0].str.strip() - property_asset_data["is_insulated"] = ( - property_asset_data["walls"].str.split(",").str[1].str.strip().isin( - ["filled cavity", "with external insulation", "filled cavity and external insulation"] - ) | property_asset_data["walls"].str.split(",").str[2].str.strip().isin(["insulated"]) + property_asset_data["wall_type"] = ( + property_asset_data["walls"].str.split(" ").str[0].str.strip() + ) + property_asset_data["is_insulated"] = property_asset_data["walls"].str.split( + "," + ).str[1].str.strip().isin( + [ + "filled cavity", + "with external insulation", + "filled cavity and external insulation", + ] + ) | property_asset_data[ + "walls" + ].str.split( + "," + ).str[ + 2 + ].str.strip().isin( + ["insulated"] ) property_asset_data["is_insulated"] = np.where( property_asset_data["is_insulated"], "Insulated", "Uninsulated" @@ -115,18 +160,26 @@ def app(): property_asset_data["pre_1970"], "Pre 1970", "Post 1970" ) - archetype_variables = ["property_type", "wall_type", "is_insulated", "is_pitched", "pre_1970"] + archetype_variables = [ + "property_type", + "wall_type", + "is_insulated", + "is_pitched", + "pre_1970", + ] assigned_archetypes = ( - property_asset_data.groupby( - archetype_variables - ).size().reset_index().rename(columns={0: "n_properties"}).sort_values("n_properties", ascending=False) + property_asset_data.groupby(archetype_variables) + .size() + .reset_index() + .rename(columns={0: "n_properties"}) + .sort_values("n_properties", ascending=False) ) # Make the archetype ID a concatenation of the variables - assigned_archetypes["archetype_id"] = assigned_archetypes[archetype_variables].apply( - lambda x: "_".join(x.astype(str)), axis=1 - ) + assigned_archetypes["archetype_id"] = assigned_archetypes[ + archetype_variables + ].apply(lambda x: "_".join(x.astype(str)), axis=1) # Most prominent archetypes prominent_archetypes = assigned_archetypes.head(6) @@ -136,7 +189,7 @@ def app(): property_asset_data = property_asset_data.merge( assigned_archetypes[archetype_variables + ["archetype_id"]], how="left", - on=archetype_variables + on=archetype_variables, ) # Create age bands: @@ -148,7 +201,7 @@ def app(): property_asset_data["age_band"] = pd.cut( property_asset_data["BUILD_YEAR"], bins=[1959, 1969, 1979, 1989, 1999, 2022], - labels=["1960-1969", "1970-1979", "1980-1989", "1990-1999", "2000+"] + labels=["1960-1969", "1970-1979", "1980-1989", "1990-1999", "2000+"], ) # Create floor area bands @@ -159,47 +212,59 @@ def app(): property_asset_data["floor_area_band"] = pd.cut( property_asset_data["total_floor_area"], bins=[0, 73, 97, 199, 10000], - labels=["0-73", "74-97", "98-199", "200+"] + labels=["0-73", "74-97", "98-199", "200+"], ) property_asset_data["archetype_group"] = property_asset_data["archetype_id"].copy() property_asset_data["archetype_group"] = np.where( - property_asset_data["archetype_id"].isin(other_archetypes["archetype_id"].values), + property_asset_data["archetype_id"].isin( + other_archetypes["archetype_id"].values + ), "other", - property_asset_data["archetype_group"] + property_asset_data["archetype_group"], ) # For colour wall_types = ( - property_asset_data[["wall_type"]].value_counts().to_frame().reset_index().rename( - columns={"wall_type": "Wall Type"} - ) + property_asset_data[["wall_type"]] + .value_counts() + .to_frame() + .reset_index() + .rename(columns={"wall_type": "Wall Type"}) ) # Group into age bands ages = ( - property_asset_data[["age_band"]].value_counts() + property_asset_data[["age_band"]] + .value_counts() .to_frame() - .reset_index().sort_values("age_band", ascending=True) + .reset_index() + .sort_values("age_band", ascending=True) .rename(columns={"age_band": "Age Band"}) ) floor_area_bands = ( - property_asset_data[["floor_area_band"]].value_counts() + property_asset_data[["floor_area_band"]] + .value_counts() .to_frame() - .reset_index().sort_values("floor_area_band", ascending=True) + .reset_index() + .sort_values("floor_area_band", ascending=True) .rename(columns={"floor_area_band": "Floor Area Band"}) ) archetype_counts = ( - property_asset_data[["archetype_group"]]. - value_counts(). - to_frame(). - reset_index() + property_asset_data[["archetype_group"]] + .value_counts() + .to_frame() + .reset_index() .rename(columns={"archetype_group": "Archetype"}) ) property_types = ( - (property_asset_data["property_type"] + ": " + property_asset_data["built_form"]). - value_counts(). - to_frame(). - reset_index() + ( + property_asset_data["property_type"] + + ": " + + property_asset_data["built_form"] + ) + .value_counts() + .to_frame() + .reset_index() .rename(columns={"index": "Property Type", 0: "Count"}) ) @@ -217,18 +282,24 @@ def app(): totals = property_asset_data[ [ "Total_household_members", - "co2_emissions", "current_energy_demand", "current_energy_demand_heating_hotwater", - "heating_cost_current", "hot_water_cost_current", "lighting_cost_current", - "appliances_cost_current", "gas_standing_charge", "electricity_standing_charge" + "co2_emissions", + "current_energy_demand", + "current_energy_demand_heating_hotwater", + "heating_cost_current", + "hot_water_cost_current", + "lighting_cost_current", + "appliances_cost_current", + "gas_standing_charge", + "electricity_standing_charge", ] ].copy() totals["total_cost"] = ( - totals["heating_cost_current"] + - totals["hot_water_cost_current"] + - totals["lighting_cost_current"] + - totals["appliances_cost_current"] + - totals["gas_standing_charge"] + - totals["electricity_standing_charge"] + totals["heating_cost_current"] + + totals["hot_water_cost_current"] + + totals["lighting_cost_current"] + + totals["appliances_cost_current"] + + totals["gas_standing_charge"] + + totals["electricity_standing_charge"] ) print( totals[ @@ -259,38 +330,59 @@ def app(): scenario_recommendations_df = recommendations_df[ recommendations_df["Scenario ID"] == scenario - ].copy() + ].copy() - scenario_recommendations_df["contingency"] = contingency * scenario_recommendations_df["estimated_cost"] + scenario_recommendations_df["contingency"] = ( + contingency * scenario_recommendations_df["estimated_cost"] + ) scenario_recommendations_df["total_cost"] = ( - scenario_recommendations_df["estimated_cost"] + scenario_recommendations_df["contingency"] + scenario_recommendations_df["estimated_cost"] + + scenario_recommendations_df["contingency"] ) recommended_measures_df = scenario_recommendations_df[ ["property_id", "measure_type", "estimated_cost", "default"] ] - recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]] + recommended_measures_df = recommended_measures_df[ + recommended_measures_df["default"] + ] recommended_measures_df = recommended_measures_df.drop(columns=["default"]) # Metrics by property ID aggregated_metrics = scenario_recommendations_df[ [ - "property_id", "type", "default", "sap_points", - "energy_cost_savings", "kwh_savings", "co2_equivalent_savings", "estimated_cost", "contingency", - "total_cost" + "property_id", + "type", + "default", + "sap_points", + "energy_cost_savings", + "kwh_savings", + "co2_equivalent_savings", + "estimated_cost", + "contingency", + "total_cost", ] ] aggregated_metrics = aggregated_metrics[aggregated_metrics["default"]] - aggregated_metrics = aggregated_metrics.groupby("property_id")[ - ["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings", "estimated_cost", - "total_cost", "contingency"] - ].sum().reset_index() + aggregated_metrics = ( + aggregated_metrics.groupby("property_id")[ + [ + "sap_points", + "co2_equivalent_savings", + "energy_cost_savings", + "kwh_savings", + "estimated_cost", + "total_cost", + "contingency", + ] + ] + .sum() + .reset_index() + ) recommendations_measures_pivot = recommended_measures_df.pivot( - index='property_id', - columns='measure_type', - values='estimated_cost' + index="property_id", columns="measure_type", values="estimated_cost" ) recommendations_measures_pivot = recommendations_measures_pivot.reset_index() recommendations_measures_pivot = recommendations_measures_pivot.fillna(0) @@ -299,30 +391,58 @@ def app(): for c in recommendations_measures_pivot.columns: if c == "property_id": continue - recommendations_measures_pivot["Recommendation: " + c] = recommendations_measures_pivot[c] > 0 + recommendations_measures_pivot["Recommendation: " + c] = ( + recommendations_measures_pivot[c] > 0 + ) # We now create a final output - df = properties_df[ - [ - "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows", - "current_epc_rating", "current_sap_points", "total_floor_area", "number_of_rooms", - "co2_emissions", "current_energy_demand", "current_energy_demand_heating_hotwater", - "heating_cost_current", "hot_water_cost_current", "lighting_cost_current", - "appliances_cost_current", "gas_standing_charge", "electricity_standing_charge" + df = ( + properties_df[ + [ + "property_id", + "uprn", + "address", + "postcode", + "property_type", + "walls", + "roof", + "heating", + "windows", + "current_epc_rating", + "current_sap_points", + "total_floor_area", + "number_of_rooms", + "co2_emissions", + "current_energy_demand", + "current_energy_demand_heating_hotwater", + "heating_cost_current", + "hot_water_cost_current", + "lighting_cost_current", + "appliances_cost_current", + "gas_standing_charge", + "electricity_standing_charge", + ] ] - ].merge( - recommendations_measures_pivot, how="left", on="property_id" - ).merge( - aggregated_metrics, how="left", on="property_id" + .merge(recommendations_measures_pivot, how="left", on="property_id") + .merge(aggregated_metrics, how="left", on="property_id") ) df["bills_total_cost"] = ( - df["heating_cost_current"] + df["hot_water_cost_current"] + df["lighting_cost_current"] + - df["appliances_cost_current"] + df["gas_standing_charge"] + df["electricity_standing_charge"] + df["heating_cost_current"] + + df["hot_water_cost_current"] + + df["lighting_cost_current"] + + df["appliances_cost_current"] + + df["gas_standing_charge"] + + df["electricity_standing_charge"] ) df = df.drop(columns=["property_id"]) - for c in ["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings"]: + for c in [ + "sap_points", + "co2_equivalent_savings", + "energy_cost_savings", + "kwh_savings", + ]: df[c] = df[c].fillna(0) df = df.rename( @@ -345,16 +465,23 @@ def app(): # Calculate post SAP df["Predicted Post Works SAP"] = df["Current SAP Points"] + df["sap_points"] df["Predicted Post Works SAP"] = df["Predicted Post Works SAP"].round() - df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(lambda x: sap_to_epc(x)) + df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply( + lambda x: sap_to_epc(x) + ) # Calculate the relative savings on carbon, kwh, and bills - df["relative_carbon_savings"] = df["co2_equivalent_savings"] / df["co2_emissions"] + df["relative_carbon_savings"] = ( + df["co2_equivalent_savings"] / df["co2_emissions"] + ) df["relative_kwh_savings"] = df["kwh_savings"] / df["current_energy_demand"] df["relative_bill_savings"] = df["energy_cost_savings"] / df["bills_total_cost"] # Add on the archetype df = df.merge( - property_asset_data[["uprn", "archetype_group"]], how="left", left_on="UPRN", right_on="uprn" + property_asset_data[["uprn", "archetype_group"]], + how="left", + left_on="UPRN", + right_on="uprn", ) # For properties that don't make it to EPC B, check why. E.g. for a property that has an oil boiler, it @@ -387,7 +514,9 @@ def app(): printing_scenario_id = scenario_ids[0] # EPC breakdown - print(scenario_data[printing_scenario_id]['Predicted Post Works EPC'].value_counts()) + print( + scenario_data[printing_scenario_id]["Predicted Post Works EPC"].value_counts() + ) # Cost # Total cost print(scenario_data[printing_scenario_id]["total_cost"].sum()) @@ -408,16 +537,24 @@ def app(): measure_details = {} for scenario in scenario_ids: measure_details[scenario] = {} - recommendation_cols = [c for c in scenario_data[scenario].columns if "Recommendation:" in c] - measure_details[scenario]["count"] = scenario_data[scenario][recommendation_cols].sum().to_dict() + recommendation_cols = [ + c for c in scenario_data[scenario].columns if "Recommendation:" in c + ] + measure_details[scenario]["count"] = ( + scenario_data[scenario][recommendation_cols].sum().to_dict() + ) # Get average cost per measure measure_columns = [ - c.split("Recommendation: ")[1] for c in scenario_data[scenario].columns if "Recommendation:" in c + c.split("Recommendation: ")[1] + for c in scenario_data[scenario].columns + if "Recommendation:" in c ] # Take the mean, drop zero columns measure_costs = {} for m in measure_columns: - measure_costs[m] = float(scenario_data[scenario][scenario_data[scenario][m] > 0][m].mean()) + measure_costs[m] = float( + scenario_data[scenario][scenario_data[scenario][m] > 0][m].mean() + ) measure_details[scenario]["cost_per_measure"] = measure_costs pprint(measure_details[scenario_ids[0]]["count"]) @@ -452,12 +589,27 @@ def app(): for scenario in scenario_ids: df = scenario_data[scenario].copy() - avg_savings = df[ - ["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings", "estimated_cost", - "total_cost", "contingency"] - ].mean().to_dict() - avg_savings["cost_per_sap_point"] = avg_savings["total_cost"] / avg_savings["sap_points"] - avg_savings["cost_per_carbon"] = avg_savings["total_cost"] / avg_savings["co2_equivalent_savings"] + avg_savings = ( + df[ + [ + "sap_points", + "co2_equivalent_savings", + "energy_cost_savings", + "kwh_savings", + "estimated_cost", + "total_cost", + "contingency", + ] + ] + .mean() + .to_dict() + ) + avg_savings["cost_per_sap_point"] = ( + avg_savings["total_cost"] / avg_savings["sap_points"] + ) + avg_savings["cost_per_carbon"] = ( + avg_savings["total_cost"] / avg_savings["co2_equivalent_savings"] + ) scenario_metrics[scenario] = avg_savings pprint(scenario_metrics[scenario_ids[0]]) @@ -465,11 +617,11 @@ def app(): scenario_data[scenario_ids[0]]["loft_insulation"][ scenario_data[scenario_ids[0]]["loft_insulation"] > 0 - ].mean() + ].mean() scenario_data[scenario_ids[0]]["cavity_wall_insulation"][ scenario_data[scenario_ids[0]]["cavity_wall_insulation"] > 0 - ].mean() + ].mean() # Testing checking floor risk @@ -477,11 +629,7 @@ def app(): def get_flood_risk(lat, lon, radius_km=1): url = "https://environment.data.gov.uk/flood-monitoring/id/floods" - params = { - 'lat': lat, - 'long': lon, - 'dist': radius_km # search radius in km - } + params = {"lat": lat, "long": lon, "dist": radius_km} # search radius in km response = requests.get(url, params=params) response.raise_for_status() @@ -495,20 +643,19 @@ def app(): print(f"{len(flood_warnings)} warning(s) found near the location:") for warning in flood_warnings: print(f"- Area: {warning.get('description')}") - print(f" Severity: {warning.get('severity')} (Level {warning.get('severityLevel')})") + print( + f" Severity: {warning.get('severity')} (Level {warning.get('severityLevel')})" + ) print(f" Message changed at: {warning.get('timeMessageChanged')}") print() return flood_warnings from shapely.geometry import shape, Point + def get_flood_areas_near_point(lat, lon, radius_km=2): url = "https://environment.data.gov.uk/flood-monitoring/id/floodAreas" - params = { - 'lat': lat, - 'long': lon, - 'dist': radius_km - } + params = {"lat": lat, "long": lon, "dist": radius_km} response = requests.get(url, params=params) response.raise_for_status() @@ -531,7 +678,7 @@ def app(): if not features: continue - flood_polygon = shape(features[0]['geometry']) + flood_polygon = shape(features[0]["geometry"]) try: is_inside = flood_polygon.contains(point) @@ -539,12 +686,17 @@ def app(): is_inside = False if is_inside: - print(f"📍 Point is inside flood area: {area['label']} ({area['notation']})") + print( + f"📍 Point is inside flood area: {area['label']} ({area['notation']})" + ) return area from tqdm import tqdm + floor_warnings_data = [] - for _, property in tqdm(property_asset_data.iterrows(), total=len(property_asset_data)): + for _, property in tqdm( + property_asset_data.iterrows(), total=len(property_asset_data) + ): # warnings = floor_warnings_data.extend( # get_flood_risk(lat=property["LATITUDE"], lon=property["LONGITUDE"], radius_km=1) # ) @@ -556,7 +708,7 @@ def app(): "uprn": property["uprn"], "address": property["address"], "postcode": property["postcode"], - "area": resp + "area": resp, } ) continue @@ -570,7 +722,7 @@ def app(): "House_Cavity_Uninsulated_Pitched roof_Post 1970", "other", "House_System_Uninsulated_Pitched roof_Pre 1970", - "House_Solid_Uninsulated_Not Pitched Roof_Pre 1970" + "House_Solid_Uninsulated_Not Pitched Roof_Pre 1970", ] values = [62, 36, 21, 16, 16, 4, 2] @@ -582,36 +734,39 @@ def app(): "Cavity wall insulation, ventilation", "Bespoke retrofit measures", "External wall insulation, roof insulation", - "Flat roof insulation, internal wall insulation" + "Flat roof insulation, internal wall insulation", ] - fig = go.Figure(go.Treemap( - labels=labels, - parents=[""] * len(labels), # No root - values=values, - hovertext=hovertext, - hoverinfo="text", - textinfo="none", - marker=dict( - line=dict(color="white", width=4), - colors=values, - colorscale="Blues" + fig = go.Figure( + go.Treemap( + labels=labels, + parents=[""] * len(labels), # No root + values=values, + hovertext=hovertext, + hoverinfo="text", + textinfo="none", + marker=dict( + line=dict(color="white", width=4), colors=values, colorscale="Blues" + ), ) - )) + ) fig.update_layout( - margin=dict(t=10, l=10, r=10, b=10), - plot_bgcolor="white", - paper_bgcolor="white" + margin=dict(t=10, l=10, r=10, b=10), plot_bgcolor="white", paper_bgcolor="white" ) fig.show() # Get the recommended measures by scenario id - recommendation_cols = [c for c in scenario_data[scenario_ids[1]].columns if "Recommendation:" in c] - measure_counts_by_scenario = scenario_data[scenario_ids[1]].groupby("archetype_group")[ - recommendation_cols - ].sum().reset_index() + recommendation_cols = [ + c for c in scenario_data[scenario_ids[1]].columns if "Recommendation:" in c + ] + measure_counts_by_scenario = ( + scenario_data[scenario_ids[1]] + .groupby("archetype_group")[recommendation_cols] + .sum() + .reset_index() + ) measure_counts_by_scenario.to_csv( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/measure_counts_by_scenario.csv" @@ -630,15 +785,13 @@ def app(): to_append = {"uprn": uprn} for _id in scenario_ids: - scenario = scenario_data[_id][ - scenario_data[_id]["uprn"] == uprn - ].squeeze() + scenario = scenario_data[_id][scenario_data[_id]["uprn"] == uprn].squeeze() val = PropertyValuation.estimate_valuation_improvement( current_value=x["valuation"], current_epc=scenario["Current EPC Rating"].value, target_epc=scenario["Predicted Post Works EPC"], - total_cost=None + total_cost=None, ) to_append[_id] = val["average_increase"] diff --git a/etl/customers/newhaven/slides.py b/etl/customers/newhaven/slides.py index 45108fec..efedb844 100644 --- a/etl/customers/newhaven/slides.py +++ b/etl/customers/newhaven/slides.py @@ -3,7 +3,12 @@ import pandas as pd import numpy as np from sqlalchemy.orm import sessionmaker from backend.app.db.connection import db_engine -from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, Scenario +from backend.app.db.models.recommendations import ( + Recommendation, + PlanModel, + PlanRecommendations, + ScenarioModel, +) from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel from utils.s3 import read_csv_from_s3 @@ -13,56 +18,79 @@ def get_data(portfolio_id, scenario_ids): session.begin() # Get properties and their details for a specific portfolio - properties_query = session.query( - PropertyModel, - PropertyDetailsEpcModel - ).join( - PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id - ).filter( - PropertyModel.portfolio_id == portfolio_id # Filter by portfolio ID - ).all() + properties_query = ( + session.query(PropertyModel, PropertyDetailsEpcModel) + .join( + PropertyDetailsEpcModel, + PropertyModel.id == PropertyDetailsEpcModel.property_id, + ) + .filter(PropertyModel.portfolio_id == portfolio_id) # Filter by portfolio ID + .all() + ) # Transform properties data to include all fields dynamically properties_data = [ - {**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns}, - **{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in - PropertyDetailsEpcModel.__table__.columns}} + { + **{ + col.name: getattr(prop.PropertyModel, col.name) + for col in PropertyModel.__table__.columns + }, + **{ + col.name: getattr(prop.PropertyDetailsEpcModel, col.name) + for col in PropertyDetailsEpcModel.__table__.columns + }, + } for prop in properties_query ] # Get property IDs from fetched properties # Get plans linked to the fetched properties - plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all() + plans_query = ( + session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all() + ) # Transform plans data to include all fields dynamically plans_data = [ - {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} + {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns} for plan in plans_query ] # Extract plan IDs for filtering recommendations through PlanRecommendations - plan_ids = [plan['id'] for plan in plans_data] + plan_ids = [plan["id"] for plan in plans_data] # Get recommendations through PlanRecommendations for those plans and that are default - recommendations_query = session.query( - Recommendation, - Plan.scenario_id - ).join( - PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id - ).join( - Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id - ).filter( - PlanRecommendations.plan_id.in_(plan_ids), - Recommendation.default == True # Filtering for default recommendations - ).all() + recommendations_query = ( + session.query(Recommendation, PlanModel.scenario_id) + .join( + PlanRecommendations, + Recommendation.id == PlanRecommendations.recommendation_id, + ) + .join( + PlanModel, + PlanModel.id + == PlanRecommendations.plan_id, # Join with Plan to access scenario_id + ) + .filter( + PlanRecommendations.plan_id.in_(plan_ids), + Recommendation.default == True, # Filtering for default recommendations + ) + .all() + ) # Transform recommendations data to include all fields dynamically and include scenario_id recommendations_data = [ - {**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation') else getattr(rec, - col.name) for - col in Recommendation.__table__.columns}, - "Scenario ID": rec.scenario_id} + { + **{ + col.name: ( + getattr(rec.Recommendation, col.name) + if hasattr(rec, "Recommendation") + else getattr(rec, col.name) + ) + for col in Recommendation.__table__.columns + }, + "Scenario ID": rec.scenario_id, + } for rec in recommendations_query ] @@ -71,7 +99,9 @@ def get_data(portfolio_id, scenario_ids): return properties_data, plans_data, recommendations_data -def estimate_post_retrofit_heating_hotwater_kwh(properties_df, recommendations_df, scenario_ids): +def estimate_post_retrofit_heating_hotwater_kwh( + properties_df, recommendations_df, scenario_ids +): # properties_starting_with_electric_heating = properties_df[ # properties_df["mainfuel"].isin( # ["Electricity not community", "Electricity electricity unspecified tariff"] @@ -85,20 +115,29 @@ def estimate_post_retrofit_heating_hotwater_kwh(properties_df, recommendations_d for scenario_id in scenario_ids: # Get the recommendations for the scenario, default scenario_recommendations = recommendations_df[ - (recommendations_df["Scenario ID"] == scenario_id) & - (recommendations_df["default"] == True) - ].copy() + (recommendations_df["Scenario ID"] == scenario_id) + & (recommendations_df["default"] == True) + ].copy() - scenario_recommendations['ligting_kwh'] = scenario_recommendations.apply( - lambda x: x['kwh_savings'] if x['type'] == 'low_energy_lighting' else 0, - axis=1) - scenario_recommendations['solar_kwh'] = scenario_recommendations.apply( - lambda x: x['kwh_savings'] if x['type'] == 'solar_pv' else 0, axis=1) + scenario_recommendations["ligting_kwh"] = scenario_recommendations.apply( + lambda x: x["kwh_savings"] if x["type"] == "low_energy_lighting" else 0, + axis=1, + ) + scenario_recommendations["solar_kwh"] = scenario_recommendations.apply( + lambda x: x["kwh_savings"] if x["type"] == "solar_pv" else 0, axis=1 + ) # Set 'Estimated Kwh Savings' to zero where specific kwh columns are used - scenario_recommendations['Estimated Kwh Savings'] = scenario_recommendations.apply( - lambda x: 0 if x['type'] in ['low_energy_lighting', 'solar_pv'] else x[ - 'kwh_savings'], axis=1) + scenario_recommendations["Estimated Kwh Savings"] = ( + scenario_recommendations.apply( + lambda x: ( + 0 + if x["type"] in ["low_energy_lighting", "solar_pv"] + else x["kwh_savings"] + ), + axis=1, + ) + ) # We need to determine if any of the properties start with electric heating or end with it # property_electric_heating = [] @@ -112,51 +151,76 @@ def estimate_post_retrofit_heating_hotwater_kwh(properties_df, recommendations_d # property_electric_heating.append(pid) # continue - grouped_data = scenario_recommendations.groupby(['property_id']).agg({ - 'Estimated Kwh Savings': 'sum', - 'ligting_kwh': 'sum', - 'solar_kwh': 'sum', - "estimated_cost": "sum" - }).reset_index() + grouped_data = ( + scenario_recommendations.groupby(["property_id"]) + .agg( + { + "Estimated Kwh Savings": "sum", + "ligting_kwh": "sum", + "solar_kwh": "sum", + "estimated_cost": "sum", + } + ) + .reset_index() + ) comparison = properties_df.drop_duplicates().merge( grouped_data, on=["property_id"], how="left" ) comparison["Post Retrofit Heating & Hotwater kwh"] = ( - comparison["current_energy_demand_heating_hotwater"] - \ - comparison["Estimated Kwh Savings"] + comparison["current_energy_demand_heating_hotwater"] + - comparison["Estimated Kwh Savings"] ) - avgs = comparison[['current_energy_demand_heating_hotwater', 'Post Retrofit Heating & Hotwater kwh']].mean() + avgs = comparison[ + [ + "current_energy_demand_heating_hotwater", + "Post Retrofit Heating & Hotwater kwh", + ] + ].mean() # We now, for properties that have a plan, do a before and after with_savings = comparison[~pd.isnull(comparison["Estimated Kwh Savings"])] avgs2 = with_savings[ - ['current_energy_demand_heating_hotwater', 'Post Retrofit Heating & Hotwater kwh']].mean() - avgs2["difference"] = avgs2["current_energy_demand_heating_hotwater"] - avgs2[ - "Post Retrofit Heating & Hotwater kwh"] - avgs2["percentage_reduction"] = 100 * avgs2["difference"] / avgs2["current_energy_demand_heating_hotwater"] + [ + "current_energy_demand_heating_hotwater", + "Post Retrofit Heating & Hotwater kwh", + ] + ].mean() + avgs2["difference"] = ( + avgs2["current_energy_demand_heating_hotwater"] + - avgs2["Post Retrofit Heating & Hotwater kwh"] + ) + avgs2["percentage_reduction"] = ( + 100 * avgs2["difference"] / avgs2["current_energy_demand_heating_hotwater"] + ) # We also calculate the cost per kwh saves total_kwh_saved = ( - with_savings["Estimated Kwh Savings"].sum() + - with_savings["ligting_kwh"].sum() + - with_savings["solar_kwh"].sum() + with_savings["Estimated Kwh Savings"].sum() + + with_savings["ligting_kwh"].sum() + + with_savings["solar_kwh"].sum() ) total_cost = with_savings["estimated_cost"].sum() cost_per_kwh_saved = total_cost / total_kwh_saved scenario_comparison_df.append({"scenario_id": scenario_id, **avgs}) scenario_comparison_df_2.append({"scenario_id": scenario_id, **avgs2}) - cost_per_kwh_saved_table.append({"scenario_id": scenario_id, "cost_per_kwh_saved": cost_per_kwh_saved}) + cost_per_kwh_saved_table.append( + {"scenario_id": scenario_id, "cost_per_kwh_saved": cost_per_kwh_saved} + ) scenario_comparison_population = pd.DataFrame(scenario_comparison_df) scenario_comparison_retrofitted_units = pd.DataFrame(scenario_comparison_df_2) cost_per_kwh_saved_table = pd.DataFrame(cost_per_kwh_saved_table) - return scenario_comparison_population, scenario_comparison_retrofitted_units, cost_per_kwh_saved_table + return ( + scenario_comparison_population, + scenario_comparison_retrofitted_units, + cost_per_kwh_saved_table, + ) def slides(): @@ -167,7 +231,9 @@ def slides(): # Look at one scenario at a time, otherwise this is agony scenario_ids = [47, 48, 49, 50, 51] - properties_data, plans_data, recommendations_data = get_data(portfolio_id, scenario_ids) + properties_data, plans_data, recommendations_data = get_data( + portfolio_id, scenario_ids + ) properties_df = pd.DataFrame(properties_data) plans_df = pd.DataFrame(plans_data) @@ -177,16 +243,19 @@ def slides(): raise ValueError("The number of unique properties is not 2553") # Q1: What is the baseline heating and energy demand for the properties in the portfolio - baseline? - heating_hotwater_kwh = ( - properties_df[['current_energy_demand', 'current_energy_demand_heating_hotwater']] - .mean() - ) + heating_hotwater_kwh = properties_df[ + ["current_energy_demand", "current_energy_demand_heating_hotwater"] + ].mean() # Q2: For each scenario, what is for what is the heating and hot water kwh after retrofit, on the entire # popoulation (incl those without retrofit) and for just those being retrofit # We also calculat the cost per kwh saved - scenario_comparison_population, scenario_comparison_retrofitted_units, cost_per_kwh_saved_table = ( - estimate_post_retrofit_heating_hotwater_kwh(properties_df, recommendations_df, scenario_ids) + ( + scenario_comparison_population, + scenario_comparison_retrofitted_units, + cost_per_kwh_saved_table, + ) = estimate_post_retrofit_heating_hotwater_kwh( + properties_df, recommendations_df, scenario_ids ) # Q3: For each scenario, we want to answer what the heating and hot water kwh looks like after retrofit @@ -194,42 +263,55 @@ def slides(): # By property - recommendations_df["type_mapped"] = recommendations_df["type"].copy().replace( - { - "loft_insulation": "roof_insulation", - "room_roof_insulation": "roof_insulation", - "flat_roof_insulation": "roof_insulation", - "hot_water_tank_insulation": "other", - "cylinder_thermostat": "other", - "sealing_open_fireplace": "other", - "suspended_floor_insulation": "floor_insulation", - "solid_floor_insulation": "floor_insulation", - } + recommendations_df["type_mapped"] = ( + recommendations_df["type"] + .copy() + .replace( + { + "loft_insulation": "roof_insulation", + "room_roof_insulation": "roof_insulation", + "flat_roof_insulation": "roof_insulation", + "hot_water_tank_insulation": "other", + "cylinder_thermostat": "other", + "sealing_open_fireplace": "other", + "suspended_floor_insulation": "floor_insulation", + "solid_floor_insulation": "floor_insulation", + } + ) ) recommendations_df["type_mapped"] = np.where( recommendations_df["description"].str.contains("air source heat pump"), "air_source_heat_pump", - recommendations_df["type_mapped"] + recommendations_df["type_mapped"], ) # Group by 'Plan Name' and 'Recommendation Type' and count unique 'Property ID' - recommendation_summary = recommendations_df[recommendations_df["default"] == True].groupby( - ['Scenario ID', 'type_mapped'] - ).agg({ - 'property_id': 'nunique' - }).reset_index() + recommendation_summary = ( + recommendations_df[recommendations_df["default"] == True] + .groupby(["Scenario ID", "type_mapped"]) + .agg({"property_id": "nunique"}) + .reset_index() + ) - recommendation_summary.columns = ['Scenario ID', 'Type Mapped', 'Number of Properties'] + recommendation_summary.columns = [ + "Scenario ID", + "Type Mapped", + "Number of Properties", + ] recommendation_summary["Percentage of Properties"] = 100 * ( recommendation_summary["Number of Properties"] / properties_df["id"].nunique() ) - recommendation_summary_final_scenario = recommendation_summary[recommendation_summary["Scenario ID"].isin([51])] + recommendation_summary_final_scenario = recommendation_summary[ + recommendation_summary["Scenario ID"].isin([51]) + ] # MVP implementation of funding estimation for the most basic scenario, using GBIS - project_scores_matrix = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv") + project_scores_matrix = pd.read_csv( + "/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv" + ) def find_abs(sap_movement, starting_sap, floor_area): starting_band = find_band(starting_sap) @@ -238,7 +320,7 @@ def slides(): return 0 if floor_area <= 72: - floor_area_segment = '0-72' + floor_area_segment = "0-72" elif (floor_area > 72) and (floor_area <= 97): floor_area_segment = "73-97" elif (floor_area > 97) and (floor_area <= 199): @@ -247,26 +329,26 @@ def slides(): floor_area_segment = "200+" return project_scores_matrix[ - (project_scores_matrix["Floor Area Segment"] == floor_area_segment) & - (project_scores_matrix["Starting Band"] == starting_band) & - (project_scores_matrix["Finishing Band"] == finishing_band) - ].squeeze()["Cost Savings"] + (project_scores_matrix["Floor Area Segment"] == floor_area_segment) + & (project_scores_matrix["Starting Band"] == starting_band) + & (project_scores_matrix["Finishing Band"] == finishing_band) + ].squeeze()["Cost Savings"] eco4_scores_sap_table = [ - {'Band': 'High_A', 'From': 96.0, 'Up to': 100.0, 'Mid-point': 98.0}, - {'Band': 'Low_A', 'From': 92.0, 'Up to': 96.0, 'Mid-point': 94.0}, - {'Band': 'High_B', 'From': 86.0, 'Up to': 91.0, 'Mid-point': 88.5}, - {'Band': 'Low_B', 'From': 81.0, 'Up to': 86.0, 'Mid-point': 83.5}, - {'Band': 'High_C', 'From': 74.5, 'Up to': 80.0, 'Mid-point': 77.25}, - {'Band': 'Low_C', 'From': 69.0, 'Up to': 74.5, 'Mid-point': 71.75}, - {'Band': 'High_D', 'From': 61.5, 'Up to': 68.0, 'Mid-point': 64.75}, - {'Band': 'Low_D', 'From': 55.0, 'Up to': 61.5, 'Mid-point': 58.25}, - {'Band': 'High_E', 'From': 46.5, 'Up to': 54.0, 'Mid-point': 50.25}, - {'Band': 'Low_E', 'From': 39.0, 'Up to': 46.5, 'Mid-point': 42.75}, - {'Band': 'High_F', 'From': 29.5, 'Up to': 38.0, 'Mid-point': 33.75}, - {'Band': 'Low_F', 'From': 21.0, 'Up to': 29.5, 'Mid-point': 25.25}, - {'Band': 'High_G', 'From': 10.5, 'Up to': 20.0, 'Mid-point': 15.25}, - {'Band': 'Low_G', 'From': 1.0, 'Up to': 10.5, 'Mid-point': 5.75} + {"Band": "High_A", "From": 96.0, "Up to": 100.0, "Mid-point": 98.0}, + {"Band": "Low_A", "From": 92.0, "Up to": 96.0, "Mid-point": 94.0}, + {"Band": "High_B", "From": 86.0, "Up to": 91.0, "Mid-point": 88.5}, + {"Band": "Low_B", "From": 81.0, "Up to": 86.0, "Mid-point": 83.5}, + {"Band": "High_C", "From": 74.5, "Up to": 80.0, "Mid-point": 77.25}, + {"Band": "Low_C", "From": 69.0, "Up to": 74.5, "Mid-point": 71.75}, + {"Band": "High_D", "From": 61.5, "Up to": 68.0, "Mid-point": 64.75}, + {"Band": "Low_D", "From": 55.0, "Up to": 61.5, "Mid-point": 58.25}, + {"Band": "High_E", "From": 46.5, "Up to": 54.0, "Mid-point": 50.25}, + {"Band": "Low_E", "From": 39.0, "Up to": 46.5, "Mid-point": 42.75}, + {"Band": "High_F", "From": 29.5, "Up to": 38.0, "Mid-point": 33.75}, + {"Band": "Low_F", "From": 21.0, "Up to": 29.5, "Mid-point": 25.25}, + {"Band": "High_G", "From": 10.5, "Up to": 20.0, "Mid-point": 15.25}, + {"Band": "Low_G", "From": 1.0, "Up to": 10.5, "Mid-point": 5.75}, ] eco4_scores_sap_table = pd.DataFrame(eco4_scores_sap_table) @@ -274,8 +356,9 @@ def slides(): # Iterate through each row in the DataFrame to find the correct band value_floored = np.floor(value) return eco4_scores_sap_table[ - (eco4_scores_sap_table["From"] <= value_floored) & (eco4_scores_sap_table["Up to"] >= value_floored) - ].squeeze()["Band"] + (eco4_scores_sap_table["From"] <= value_floored) + & (eco4_scores_sap_table["Up to"] >= value_floored) + ].squeeze()["Band"] def identify_funding_measure(p, p_recs, is_social): measures = ["cavity_wall_insulation", "loft_insulation"] @@ -287,15 +370,17 @@ def slides(): project_abs = find_abs( sap_movement=funding_measure["sap_points"], starting_sap=p["current_sap_points"], - floor_area=p["total_floor_area"] + floor_area=p["total_floor_area"], + ) + property_abs.append( + { + "property_id": p["property_id"], + "measure": funding_measure["type"], + "cost": funding_measure["estimated_cost"], + "abs": project_abs, + "is_social": is_social, + } ) - property_abs.append({ - "property_id": p["property_id"], - "measure": funding_measure["type"], - "cost": funding_measure["estimated_cost"], - "abs": project_abs, - "is_social": is_social - }) if not property_abs: return None @@ -351,7 +436,9 @@ def slides(): band_b_proportion = 0.195 band_c_proportion = 0.219 band_d_proportion = 0.156 - a_to_d_proportion = band_a_proportion + band_b_proportion + band_c_proportion + band_d_proportion + a_to_d_proportion = ( + band_a_proportion + band_b_proportion + band_c_proportion + band_d_proportion + ) benefits_proportion = 0.51 @@ -360,20 +447,26 @@ def slides(): # We scale the private funding based on these two factors private_funding_scaled = private_funding * benefits_proportion * a_to_d_proportion - n_private_projects = np.round((~funding["is_social"]).sum() * benefits_proportion * a_to_d_proportion) + n_private_projects = np.round( + (~funding["is_social"]).sum() * benefits_proportion * a_to_d_proportion + ) # Look at the impact of EWI for scenario ewi_jobs = recommendations_df[ - (recommendations_df["Scenario ID"] == 49) & (recommendations_df["type"] == "external_wall_insulation") - ] + (recommendations_df["Scenario ID"] == 49) + & (recommendations_df["type"] == "external_wall_insulation") + ] ewi_jobs["estimated_cost"].sum() has_cavity = recommendations_df[ - (recommendations_df["type"] == "cavity_wall_insulation") & (recommendations_df["Scenario ID"] == 47) - ] + (recommendations_df["type"] == "cavity_wall_insulation") + & (recommendations_df["Scenario ID"] == 47) + ] # Take the some properties in this - cavity_units = properties_df[properties_df["property_id"].isin(has_cavity["property_id"].values)] + cavity_units = properties_df[ + properties_df["property_id"].isin(has_cavity["property_id"].values) + ] cavity_units[cavity_units.index == 3][["uprn", "property_id"]] @@ -381,41 +474,52 @@ def slides(): # Recommenation type by kwh savings per unit recommendations_final_scenario = recommendations_df[ - recommendations_df["Scenario ID"].isin([51]) & - (recommendations_df["default"] == True) - ].copy() + recommendations_df["Scenario ID"].isin([51]) + & (recommendations_df["default"] == True) + ].copy() # Merge on floor area recommendations_final_scenario = recommendations_final_scenario.merge( properties_df[["property_id", "total_floor_area"]], on="property_id", how="left" ) recommendations_final_scenario = recommendations_final_scenario[ - ~pd.isnull(recommendations_final_scenario["total_floor_area"])] - recommendations_final_scenario["kwh_savings_per_unit"] = recommendations_final_scenario["kwh_savings"] / \ - recommendations_final_scenario["total_floor_area"] - - recommendations_final_scenario["type_mapped2"] = recommendations_df["type"].copy().replace( - { - "room_roof_insulation": "roof_insulation", - "flat_roof_insulation": "roof_insulation", - "hot_water_tank_insulation": "other", - "cylinder_thermostat": "other", - "sealing_open_fireplace": "other", - "suspended_floor_insulation": "floor_insulation", - "solid_floor_insulation": "floor_insulation", - } + ~pd.isnull(recommendations_final_scenario["total_floor_area"]) + ] + recommendations_final_scenario["kwh_savings_per_unit"] = ( + recommendations_final_scenario["kwh_savings"] + / recommendations_final_scenario["total_floor_area"] ) - aggs = recommendations_final_scenario.groupby("type_mapped")[ - ["kwh_savings_per_unit", "estimated_cost"]].mean().reset_index().sort_values( - "kwh_savings_per_unit", ascending=False + recommendations_final_scenario["type_mapped2"] = ( + recommendations_df["type"] + .copy() + .replace( + { + "room_roof_insulation": "roof_insulation", + "flat_roof_insulation": "roof_insulation", + "hot_water_tank_insulation": "other", + "cylinder_thermostat": "other", + "sealing_open_fireplace": "other", + "suspended_floor_insulation": "floor_insulation", + "solid_floor_insulation": "floor_insulation", + } + ) + ) + + aggs = ( + recommendations_final_scenario.groupby("type_mapped")[ + ["kwh_savings_per_unit", "estimated_cost"] + ] + .mean() + .reset_index() + .sort_values("kwh_savings_per_unit", ascending=False) ) aggs["cost_per_kwh_saved"] = aggs["estimated_cost"] / aggs["kwh_savings_per_unit"] # Show more columns with pandas - pd.set_option('display.max_columns', None) + pd.set_option("display.max_columns", None) # Show more rows with pandas - pd.set_option('display.max_rows', None) + pd.set_option("display.max_rows", None) # Show more characters in a column - pd.set_option('display.max_colwidth', None) + pd.set_option("display.max_colwidth", None) def lewes_outputs(): @@ -427,12 +531,14 @@ def lewes_outputs(): """ # get the asset list - asset_list = read_csv_from_s3(bucket_name="retrofit-plan-inputs-dev", filepath="8/90/pilot.csv") + asset_list = read_csv_from_s3( + bucket_name="retrofit-plan-inputs-dev", filepath="8/90/pilot.csv" + ) asset_list = pd.DataFrame(asset_list) # Get non-invasive recommendations non_intrusive_recommendations = read_csv_from_s3( bucket_name="retrofit-plan-inputs-dev", - filepath="8/90/non_invasive_recommendations.csv" + filepath="8/90/non_invasive_recommendations.csv", ) non_intrusive_recommendations = pd.DataFrame(non_intrusive_recommendations) @@ -440,20 +546,21 @@ def lewes_outputs(): portfolio_id = 90 # Look at one scenario at a time, otherwise this is agony scenario_ids = [47, 48, 49, 50, 51] - properties_data, plans_data, recommendations_data = get_data(portfolio_id, scenario_ids) + properties_data, plans_data, recommendations_data = get_data( + portfolio_id, scenario_ids + ) properties_df = pd.DataFrame(properties_data) recommendations_df = pd.DataFrame(recommendations_data) # Unnest this import ast + survey_recs = [] for _, row in non_intrusive_recommendations.iterrows(): recs = ast.literal_eval(row["recommendations"]) ashp_rec = next((r for r in recs if r["type"] == "air_source_heat_pump"), None) solar_rec = next((r for r in recs if r["type"] == "solar_pv"), None) - to_append = { - "uprn": row["uprn"] - } + to_append = {"uprn": row["uprn"]} if ashp_rec["suitable"]: to_append = { **to_append, @@ -479,44 +586,57 @@ def lewes_outputs(): domna_kwh = 10850 scaling_factor = vital_kwh / domna_kwh - next_gen_dataset = properties_df[[ - "uprn", "address", "postcode", - "property_type", "built_form", "current_energy_demand_heating_hotwater", - "mainfuel", "total_floor_area", "floor_height" - ]].rename( - columns={ - "mainfuel": "primary_fuel_type", - "total_floor_area": "gross_floor_area", - "current_energy_demand_heating_hotwater": "estimated_heating_hotwater_kwh" - } - ).merge( - asset_list[["uprn", "number_of_floors"]], - how="left", - on="uprn" - ).merge( - survey_recs, - how="left", - on="uprn" + next_gen_dataset = ( + properties_df[ + [ + "uprn", + "address", + "postcode", + "property_type", + "built_form", + "current_energy_demand_heating_hotwater", + "mainfuel", + "total_floor_area", + "floor_height", + ] + ] + .rename( + columns={ + "mainfuel": "primary_fuel_type", + "total_floor_area": "gross_floor_area", + "current_energy_demand_heating_hotwater": "estimated_heating_hotwater_kwh", + } + ) + .merge(asset_list[["uprn", "number_of_floors"]], how="left", on="uprn") + .merge(survey_recs, how="left", on="uprn") ) next_gen_dataset["estimated_heating_hotwater_kwh_scaled"] = ( next_gen_dataset["estimated_heating_hotwater_kwh"] * scaling_factor ) next_gen_dataset["ashp_suitable"] = next_gen_dataset["ashp_suitable"].fillna(False) - next_gen_dataset["solar_suitable"] = next_gen_dataset["solar_suitable"].fillna(False) + next_gen_dataset["solar_suitable"] = next_gen_dataset["solar_suitable"].fillna( + False + ) # We prepare the scenario outputs by property type grouped_data = next_gen_dataset.copy() grouped_data["property_sub_type"] = grouped_data["built_form"].copy() # If a property is a flat, re-map sub_type just to flat - grouped_data.loc[grouped_data["property_type"] == "Flat", "property_sub_type"] = "Flat" + grouped_data.loc[grouped_data["property_type"] == "Flat", "property_sub_type"] = ( + "Flat" + ) # Same for maisonettes - grouped_data.loc[grouped_data["property_type"] == "Maisonette", "property_sub_type"] = "Maisonette" + grouped_data.loc[ + grouped_data["property_type"] == "Maisonette", "property_sub_type" + ] = "Maisonette" # We now pull out the recommendations impact by property type and sub type # Exclude sealing open fireplaces - recommendations_df = recommendations_df[recommendations_df["type"] != "sealing_open_fireplace"] + recommendations_df = recommendations_df[ + recommendations_df["type"] != "sealing_open_fireplace" + ] # We update the type column so that if type == heating, and the description contains "air source heat pump", # the type is "air_source_heat_pump", else if the description contains "high heat retention storage heaters", else @@ -532,108 +652,130 @@ def lewes_outputs(): np.where( recommendations_df["description"].str.contains("condensing boiler"), "Boiler Upgrade", - recommendations_df["type"] - ) - ) + recommendations_df["type"], + ), + ), ), - recommendations_df["type"] + recommendations_df["type"], ) recommendation_types = recommendations_df["type"].unique().tolist() rename_dict = { - 'hot_water_tank_insulation': 'Hot Water Tank Insulation', - 'windows_glazing': 'Windows Glazing', - 'secondary_heating': 'Secondary Heating', - 'cavity_wall_insulation': 'Cavity Wall Insulation', - 'flat_roof_insulation': 'Flat Roof Insulation', - 'mechanical_ventilation': 'Mechanical Ventilation', - 'loft_insulation': 'Loft Insulation', - 'cylinder_thermostat': 'Cylinder Thermostat', - 'room_roof_insulation': 'Room Roof Insulation', - 'low_energy_lighting': 'Low Energy Lighting', - 'external_wall_insulation': 'External Wall Insulation', - 'solar_pv': 'Solar PV', - 'heating_control': 'Heating Control', - 'solid_floor_insulation': 'Solid Floor Insulation', - 'suspended_floor_insulation': 'Suspended Floor Insulation', - 'internal_wall_insulation': 'Internal Wall Insulation' + "hot_water_tank_insulation": "Hot Water Tank Insulation", + "windows_glazing": "Windows Glazing", + "secondary_heating": "Secondary Heating", + "cavity_wall_insulation": "Cavity Wall Insulation", + "flat_roof_insulation": "Flat Roof Insulation", + "mechanical_ventilation": "Mechanical Ventilation", + "loft_insulation": "Loft Insulation", + "cylinder_thermostat": "Cylinder Thermostat", + "room_roof_insulation": "Room Roof Insulation", + "low_energy_lighting": "Low Energy Lighting", + "external_wall_insulation": "External Wall Insulation", + "solar_pv": "Solar PV", + "heating_control": "Heating Control", + "solid_floor_insulation": "Solid Floor Insulation", + "suspended_floor_insulation": "Suspended Floor Insulation", + "internal_wall_insulation": "Internal Wall Insulation", } property_scenario_impact = [] for scenario_id in tqdm(scenario_ids): # Get the recommendations for the scenario, default scenario_recommendations = recommendations_df[ - (recommendations_df["Scenario ID"] == scenario_id) & - (recommendations_df["default"] == True) - ].copy() + (recommendations_df["Scenario ID"] == scenario_id) + & (recommendations_df["default"] == True) + ].copy() - scenario_recommendations['Estimated Lighting kWh Savings'] = scenario_recommendations.apply( - lambda x: x['kwh_savings'] if x['type'] == 'low_energy_lighting' else 0, - axis=1) - scenario_recommendations['Estimated Solar kWh Savings'] = scenario_recommendations.apply( - lambda x: x['kwh_savings'] if x['type'] == 'solar_pv' else 0, axis=1) + scenario_recommendations["Estimated Lighting kWh Savings"] = ( + scenario_recommendations.apply( + lambda x: x["kwh_savings"] if x["type"] == "low_energy_lighting" else 0, + axis=1, + ) + ) + scenario_recommendations["Estimated Solar kWh Savings"] = ( + scenario_recommendations.apply( + lambda x: x["kwh_savings"] if x["type"] == "solar_pv" else 0, axis=1 + ) + ) # Set 'Estimated Kwh Savings' to zero where specific kwh columns are used - scenario_recommendations['Estimated Heating Demand kWh Savings'] = scenario_recommendations.apply( - lambda x: 0 if x['type'] in ['low_energy_lighting', 'solar_pv'] else x[ - 'kwh_savings'], axis=1) + scenario_recommendations["Estimated Heating Demand kWh Savings"] = ( + scenario_recommendations.apply( + lambda x: ( + 0 + if x["type"] in ["low_energy_lighting", "solar_pv"] + else x["kwh_savings"] + ), + axis=1, + ) + ) - scenario_grouped_data = scenario_recommendations.groupby(['property_id']).agg({ - 'Estimated Heating Demand kWh Savings': 'sum', - 'Estimated Lighting kWh Savings': 'sum', - 'Estimated Solar kWh Savings': 'sum', - "estimated_cost": "sum" - }).reset_index() + scenario_grouped_data = ( + scenario_recommendations.groupby(["property_id"]) + .agg( + { + "Estimated Heating Demand kWh Savings": "sum", + "Estimated Lighting kWh Savings": "sum", + "Estimated Solar kWh Savings": "sum", + "estimated_cost": "sum", + } + ) + .reset_index() + ) comparison = properties_df.drop_duplicates()[ ["uprn", "property_id", "current_energy_demand_heating_hotwater"] - ].merge( - scenario_grouped_data, on=["property_id"], how="left" - ) - comparison["Estimated Heating Demand kWh Savings"] = ( - comparison["Estimated Heating Demand kWh Savings"].fillna(0) - ) - comparison["Estimated Lighting kWh Savings"] = ( - comparison["Estimated Lighting kWh Savings"].fillna(0) - ) - comparison["Estimated Solar kWh Savings"] = ( - comparison["Estimated Solar kWh Savings"].fillna(0) - ) + ].merge(scenario_grouped_data, on=["property_id"], how="left") + comparison["Estimated Heating Demand kWh Savings"] = comparison[ + "Estimated Heating Demand kWh Savings" + ].fillna(0) + comparison["Estimated Lighting kWh Savings"] = comparison[ + "Estimated Lighting kWh Savings" + ].fillna(0) + comparison["Estimated Solar kWh Savings"] = comparison[ + "Estimated Solar kWh Savings" + ].fillna(0) comparison["estimated_cost"] = comparison["estimated_cost"].fillna(0) comparison["post_scenario_heating_hotwater_kwh"] = ( - comparison["current_energy_demand_heating_hotwater"] - comparison["Estimated Heating Demand kWh Savings"] + comparison["current_energy_demand_heating_hotwater"] + - comparison["Estimated Heating Demand kWh Savings"] ) # For each scenario, we create a measure matrix measure_matrix = scenario_recommendations.pivot_table( - index='property_id', - columns='type', - values='id', # Using 'id' just as a placeholder for the pivot + index="property_id", + columns="type", + values="id", # Using 'id' just as a placeholder for the pivot aggfunc=lambda x: True, # If an ID exists for a given type, mark as True - fill_value=False # Fill other entries as False + fill_value=False, # Fill other entries as False ).reset_index() non_zero_heat_demand_impact = comparison[ - (comparison["Estimated Heating Demand kWh Savings"] > 0) | - (comparison["Estimated Lighting kWh Savings"] > 0) | - (comparison["Estimated Solar kWh Savings"] > 0) - ] + (comparison["Estimated Heating Demand kWh Savings"] > 0) + | (comparison["Estimated Lighting kWh Savings"] > 0) + | (comparison["Estimated Solar kWh Savings"] > 0) + ] measure_matrix = measure_matrix[ - measure_matrix["property_id"].isin(non_zero_heat_demand_impact["property_id"].values) + measure_matrix["property_id"].isin( + non_zero_heat_demand_impact["property_id"].values + ) ] measure_matrix = measure_matrix.rename(columns=rename_dict) - comparison = comparison.merge( - measure_matrix, on="property_id", how="left" - ) + comparison = comparison.merge(measure_matrix, on="property_id", how="left") comparison["scenario_id"] = scenario_id property_scenario_impact.append(comparison) property_scenario_impact = pd.concat(property_scenario_impact) # property_scenario_impact = property_scenario_impact.drop(columns=["property_id", "Estimated Kwh Savings"]) - for v in list(rename_dict.values()) + ["Air Source Heat Pump", "High Heat Retention Storage", "Boiler Upgrade"]: + for v in list(rename_dict.values()) + [ + "Air Source Heat Pump", + "High Heat Retention Storage", + "Boiler Upgrade", + ]: # Fill NaNs with False property_scenario_impact[v] = property_scenario_impact[v].fillna(False) @@ -642,18 +784,22 @@ def lewes_outputs(): property_scenario_impact["post_scenario_heating_hotwater_kwh"] * scaling_factor ) - grouped_data = grouped_data.merge( - property_scenario_impact, how="left", on="uprn" - ) + grouped_data = grouped_data.merge(property_scenario_impact, how="left", on="uprn") # Agg the data - grouped_data = grouped_data.groupby(["property_type", "property_sub_type", "scenario_id"]).agg({ - "estimated_heating_hotwater_kwh": "mean", - "estimated_heating_hotwater_kwh_scaled": "mean", - "estimated_cost": "mean", - "post_scenario_heating_hotwater_kwh": "mean", - "post_scenario_heating_hotwater_kwh_scaled": "mean" - }).reset_index() + grouped_data = ( + grouped_data.groupby(["property_type", "property_sub_type", "scenario_id"]) + .agg( + { + "estimated_heating_hotwater_kwh": "mean", + "estimated_heating_hotwater_kwh_scaled": "mean", + "estimated_cost": "mean", + "post_scenario_heating_hotwater_kwh": "mean", + "post_scenario_heating_hotwater_kwh_scaled": "mean", + } + ) + .reset_index() + ) scenario_names = pd.DataFrame( [ @@ -665,45 +811,40 @@ def lewes_outputs(): "scenario_id": 48, "scenario": "Demand reduction – no solid wall, floors or heating/renewables", }, - { - "scenario_id": 49, - "scenario": "Demand reduction – no decant" - }, + {"scenario_id": 49, "scenario": "Demand reduction – no decant"}, { "scenario_id": 50, "scenario": "Demand reduction – no decant + heating & solar", }, - { - "scenario_id": 51, - "scenario": "Whole house retrofit" - } + {"scenario_id": 51, "scenario": "Whole house retrofit"}, ] - ) - grouped_data = grouped_data.merge( - scenario_names, how="left", on="scenario_id" - ) + grouped_data = grouped_data.merge(scenario_names, how="left", on="scenario_id") if not grouped_data[ - grouped_data["estimated_heating_hotwater_kwh"] < grouped_data["post_scenario_heating_hotwater_kwh"]].empty: + grouped_data["estimated_heating_hotwater_kwh"] + < grouped_data["post_scenario_heating_hotwater_kwh"] + ].empty: raise Exception("someting went wrong") - if not grouped_data[grouped_data["estimated_heating_hotwater_kwh_scaled"] < grouped_data[ - "post_scenario_heating_hotwater_kwh_scaled"]].empty: + if not grouped_data[ + grouped_data["estimated_heating_hotwater_kwh_scaled"] + < grouped_data["post_scenario_heating_hotwater_kwh_scaled"] + ].empty: raise Exception("someting went wrong") # Reorder the columns grouped_data = grouped_data[ [ - 'property_type', - 'property_sub_type', - 'scenario', - 'estimated_heating_hotwater_kwh', - 'post_scenario_heating_hotwater_kwh', - 'estimated_heating_hotwater_kwh_scaled', - 'post_scenario_heating_hotwater_kwh_scaled', - 'estimated_cost', + "property_type", + "property_sub_type", + "scenario", + "estimated_heating_hotwater_kwh", + "post_scenario_heating_hotwater_kwh", + "estimated_heating_hotwater_kwh_scaled", + "post_scenario_heating_hotwater_kwh_scaled", + "estimated_cost", ] ] @@ -730,9 +871,7 @@ def lewes_outputs(): scenario_names, how="left", on="scenario_id" ) - lewes_data = next_gen_dataset.merge( - property_scenario_impact, how="left", on="uprn" - ) + lewes_data = next_gen_dataset.merge(property_scenario_impact, how="left", on="uprn") lewes_data = lewes_data.sort_values( ["postcode", "uprn", "scenario_id"], ascending=True @@ -742,31 +881,52 @@ def lewes_outputs(): # TODO - remap the heating type lewes_data = lewes_data[ [ - 'uprn', 'address', 'postcode', 'property_type', 'built_form', + "uprn", + "address", + "postcode", + "property_type", + "built_form", # 'estimated_heating_hotwater_kwh', - 'primary_fuel_type', 'gross_floor_area', 'floor_height', 'number_of_floors', 'ashp_suitable', - 'ashp_size_kw', - 'ashp_cost', 'solar_suitable', 'solar_size_kwp', 'solar_cost', - 'scenario', - 'estimated_heating_hotwater_kwh_scaled', - 'post_scenario_heating_hotwater_kwh_scaled', + "primary_fuel_type", + "gross_floor_area", + "floor_height", + "number_of_floors", + "ashp_suitable", + "ashp_size_kw", + "ashp_cost", + "solar_suitable", + "solar_size_kwp", + "solar_cost", + "scenario", + "estimated_heating_hotwater_kwh_scaled", + "post_scenario_heating_hotwater_kwh_scaled", # 'property_id', - dropped # 'current_energy_demand_heating_hotwater', - 'Estimated Heating Demand kWh Savings', - 'Estimated Lighting kWh Savings', - 'Estimated Solar kWh Savings', - 'estimated_cost', - 'post_scenario_heating_hotwater_kwh', 'Cavity Wall Insulation', 'Cylinder Thermostat', - 'Flat Roof Insulation', - 'Hot Water Tank Insulation', 'Loft Insulation', 'Mechanical Ventilation', 'Room Roof Insulation', + "Estimated Heating Demand kWh Savings", + "Estimated Lighting kWh Savings", + "Estimated Solar kWh Savings", + "estimated_cost", + "post_scenario_heating_hotwater_kwh", + "Cavity Wall Insulation", + "Cylinder Thermostat", + "Flat Roof Insulation", + "Hot Water Tank Insulation", + "Loft Insulation", + "Mechanical Ventilation", + "Room Roof Insulation", # 'scenario_id', - dropped - 'Low Energy Lighting', 'Secondary Heating', 'Windows Glazing', 'External Wall Insulation', - 'Heating Control', - 'Solar PV', - 'Air Source Heat Pump', 'Boiler Upgrade', 'High Heat Retention Storage', - 'Internal Wall Insulation', - 'Solid Floor Insulation', - 'Suspended Floor Insulation', + "Low Energy Lighting", + "Secondary Heating", + "Windows Glazing", + "External Wall Insulation", + "Heating Control", + "Solar PV", + "Air Source Heat Pump", + "Boiler Upgrade", + "High Heat Retention Storage", + "Internal Wall Insulation", + "Solid Floor Insulation", + "Suspended Floor Insulation", ] ].rename( columns={ @@ -783,29 +943,34 @@ def lewes_outputs(): # "estimated_heating_hotwater_kwh": "Estimated Heating & Hot Water kwh", "estimated_heating_hotwater_kwh_scaled": "Estimated Heating & Hot Water kwh", "post_scenario_heating_hotwater_kwh_scaled": "Post Scenario Heating & Hot Water kwh", - "estimated_cost": "Estimated Cost of Scenario" + "estimated_cost": "Estimated Cost of Scenario", } ) # We save this dataset, which will be shared with Lewes Council lewes_data.to_csv( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/outputs/Lewes property data.csv", index=False + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/outputs/Lewes property data.csv", + index=False, ) - df_pivot = property_scenario_impact.pivot_table(index='uprn', columns='scenario', - values=['post_scenario_heating_hotwater_kwh', - 'post_scenario_heating_hotwater_kwh_scaled']) + df_pivot = property_scenario_impact.pivot_table( + index="uprn", + columns="scenario", + values=[ + "post_scenario_heating_hotwater_kwh", + "post_scenario_heating_hotwater_kwh_scaled", + ], + ) # Flattening multi-index columns - df_pivot.columns = [f'{col[0]}_{col[1]}' for col in df_pivot.columns] + df_pivot.columns = [f"{col[0]}_{col[1]}" for col in df_pivot.columns] # Reset the index to have a clean dataframe df_pivot.reset_index(inplace=True) - next_gen_dataset = next_gen_dataset.merge( - df_pivot, how="left", on="uprn" - ) + next_gen_dataset = next_gen_dataset.merge(df_pivot, how="left", on="uprn") next_gen_dataset.to_csv( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/outputs/next_gen_dataset.csv", index=False + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/outputs/next_gen_dataset.csv", + index=False, ) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py b/etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py index 68978b08..d86be050 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py @@ -10,6 +10,7 @@ Additionally, we wil find the problematic records and remove them Given we ran an EPC C scenario, we should check how many properties, below EPC C we have, that have no plan or recommendations in case something went wrong """ + import pandas as pd from sqlalchemy.orm import Session from backend.app.db.models.portfolio import PropertyModel @@ -19,8 +20,7 @@ from backend.app.db.connection import db_session def get_uprns_for_portfolio(session: Session, portfolio_id: int) -> list[int]: return [ uprn - for (uprn,) in - session.query(PropertyModel.uprn) + for (uprn,) in session.query(PropertyModel.uprn) .filter(PropertyModel.portfolio_id == portfolio_id) .all() if uprn is not None @@ -34,7 +34,7 @@ with db_session() as session: sal = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model " "data.xlsx", - sheet_name="Standardised Asset List" + sheet_name="Standardised Asset List", ) missed_properties = sal[~sal["epc_os_uprn"].isin(completed_uprns)] @@ -44,7 +44,7 @@ missed_properties.to_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" "d_failed_properties_to_restart_20260102.xlsx", sheet_name="Standardised Asset List", - index=False + index=False, ) # Fixing an error - triggered jobs without removing EWI/IWI so need to delete all plans associated to these scenarios: @@ -52,14 +52,14 @@ scenario_id = None from sqlalchemy import select, func from sqlalchemy.orm import Session -from backend.app.db.models.recommendations import Plan +from backend.app.db.models.recommendations import PlanModel def count_plans_for_scenario(session: Session, scenario_id: int) -> int: return session.execute( select(func.count()) - .select_from(Plan) - .where(Plan.scenario_id == scenario_id) + .select_from(PlanModel) + .where(PlanModel.scenario_id == scenario_id) ).scalar_one() @@ -69,8 +69,7 @@ with db_session() as session: def get_plan_ids_for_scenario(session: Session, scenario_id: int) -> list[int]: result = session.execute( - select(Plan.id) - .where(Plan.scenario_id == scenario_id) + select(PlanModel.id).where(PlanModel.scenario_id == scenario_id) ) return [row.id for row in result] @@ -84,7 +83,7 @@ from sqlalchemy.orm import Session def chunked(iterable, size): for i in range(0, len(iterable), size): - yield iterable[i:i + size] + yield iterable[i : i + size] from sqlalchemy import text @@ -103,12 +102,14 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # recommendation_materials # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM recommendation_materials rm USING plan_recommendations pr WHERE rm.recommendation_id = pr.recommendation_id AND pr.plan_id = ANY(:plan_ids) - """), + """ + ), params, ) @@ -116,10 +117,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # plan_recommendations # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM plan_recommendations WHERE plan_id = ANY(:plan_ids) - """), + """ + ), params, ) @@ -127,14 +130,16 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # recommendations (only those used by these plans) # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM recommendation r WHERE r.id IN ( SELECT DISTINCT recommendation_id FROM plan_recommendations WHERE plan_id = ANY(:plan_ids) ) - """), + """ + ), params, ) @@ -142,10 +147,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # plans LAST # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM plan WHERE id = ANY(:plan_ids) - """), + """ + ), params, ) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/f_diagnostics.py b/etl/customers/peabody/Nov 2025 Consulting Project/f_diagnostics.py index 4b946c60..509c8179 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/f_diagnostics.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/f_diagnostics.py @@ -5,6 +5,7 @@ This includes: # EPC C, there should be a plan 2) If the plan is fabric first, make sure they are actually fabric first """ + import pandas as pd scenario_names = { @@ -33,7 +34,9 @@ for scenario_id, scenario_name in scenario_names.items(): ) # find properties that are below the scenario sap target, but have no recommended measures - df["below_scenario_target"] = df["current_sap_points"] < scenario_sap_targets[scenario_id] + df["below_scenario_target"] = ( + df["current_sap_points"] < scenario_sap_targets[scenario_id] + ) df["no_recommended_measures"] = df["sap_points"] == 0 df["zero_cost"] = df["total_retrofit_cost"] == 0 df["sap_points_above_zero"] = df["sap_points"] > 0 @@ -45,7 +48,9 @@ for scenario_id, scenario_name in scenario_names.items(): ].copy() if scenario_sap_targets[scenario_id] == 81: - problematic_properties = problematic_properties[problematic_properties["property_type"] != "Flat"] + problematic_properties = problematic_properties[ + problematic_properties["property_type"] != "Flat" + ] zero_cost_above_zero_sap = df[ (df["sap_points_above_zero"] & df["zero_cost"]) @@ -61,8 +66,12 @@ for scenario_id, scenario_name in scenario_names.items(): # pd.set_option('display.width', 1000) # problematic_properties.head(len(problematic_properties)) - print(f"We have {len(problematic_properties)} problematic properties for scenario {scenario_name} ({scenario_id})") - print(f"We have {len(zero_cost_above_zero_sap)} zero cost properties for scenario {scenario_name} ({scenario_id})") + print( + f"We have {len(problematic_properties)} problematic properties for scenario {scenario_name} ({scenario_id})" + ) + print( + f"We have {len(zero_cost_above_zero_sap)} zero cost properties for scenario {scenario_name} ({scenario_id})" + ) problems.append(problematic_properties) problems.append(zero_cost_above_zero_sap) @@ -97,12 +106,12 @@ all_problems = all_problems.drop_duplicates(subset=["uprn"]) sal = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model " "data.xlsx", - sheet_name="Standardised Asset List" + sheet_name="Standardised Asset List", ) sal2 = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260105 - additional " "UPRNS.xlsx", - sheet_name="Standardised Asset List" + sheet_name="Standardised Asset List", ) sal = pd.concat([sal, sal2]) @@ -114,7 +123,7 @@ retry.to_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" "d_problematic_properties_to_review_20260106.xlsx", sheet_name="Standardised Asset List", - index=False + index=False, ) # Delete associated plans @@ -126,19 +135,20 @@ uprns = retry["epc_os_uprn"].tolist() from sqlalchemy.orm import Session from backend.app.db.models.portfolio import PropertyModel from backend.app.db.connection import db_session -from backend.app.db.models.recommendations import Plan +from backend.app.db.models.recommendations import PlanModel from sqlalchemy import select, delete from sqlalchemy.exc import NoResultFound from sqlalchemy.orm import sessionmaker -def get_property_ids_for_uprns(session: Session, portfolio_id: int, uprns: list[int]) -> list[int]: +def get_property_ids_for_uprns( + session: Session, portfolio_id: int, uprns: list[int] +) -> list[int]: return [ property.id for property in session.query(PropertyModel) .filter( - PropertyModel.portfolio_id == portfolio_id, - PropertyModel.uprn.in_(uprns) + PropertyModel.portfolio_id == portfolio_id, PropertyModel.uprn.in_(uprns) ) .all() ] @@ -149,15 +159,21 @@ with db_session() as session: # Get all and delete plans for these property IDs -def get_all_plans_for_property_ids(session: Session, property_ids: list[int]) -> list[Plan]: - return session.query(Plan).filter(Plan.property_id.in_(property_ids)).all() +def get_all_plans_for_property_ids( + session: Session, property_ids: list[int] +) -> list[PlanModel]: + return ( + session.query(PlanModel).filter(PlanModel.property_id.in_(property_ids)).all() + ) -def get_ids_of_plans_for_deletion(session: Session, property_ids: list[int]) -> list[int]: +def get_ids_of_plans_for_deletion( + session: Session, property_ids: list[int] +) -> list[int]: return [ plan.id - for plan in session.query(Plan) - .filter(Plan.property_id.in_(property_ids)) + for plan in session.query(PlanModel) + .filter(PlanModel.property_id.in_(property_ids)) .all() ] @@ -168,7 +184,7 @@ with db_session() as session: def chunked(iterable, size): for i in range(0, len(iterable), size): - yield iterable[i:i + size] + yield iterable[i : i + size] from sqlalchemy import text @@ -187,12 +203,14 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # recommendation_materials # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM recommendation_materials rm USING plan_recommendations pr WHERE rm.recommendation_id = pr.recommendation_id AND pr.plan_id = ANY(:plan_ids) - """), + """ + ), params, ) @@ -200,10 +218,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # plan_recommendations # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM plan_recommendations WHERE plan_id = ANY(:plan_ids) - """), + """ + ), params, ) @@ -211,14 +231,16 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # recommendations (only those used by these plans) # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM recommendation r WHERE r.id IN ( SELECT DISTINCT recommendation_id FROM plan_recommendations WHERE plan_id = ANY(:plan_ids) ) - """), + """ + ), params, ) @@ -226,10 +248,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # plans LAST # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM plan WHERE id = ANY(:plan_ids) - """), + """ + ), params, ) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py b/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py index 4405d113..c451938d 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py @@ -2,17 +2,22 @@ import pandas as pd from tqdm import tqdm from sqlalchemy.orm import sessionmaker from backend.app.db.connection import db_engine, db_read_session, db_session -from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, RecommendationMaterials, \ - InstalledMeasure +from backend.app.db.models.recommendations import ( + Recommendation, + PlanModel, + PlanRecommendations, + RecommendationMaterials, + InstalledMeasure, +) from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel from backend.app.utils import sap_to_epc from typing import Dict, List, Set from recommendations.Costs import Costs from backend.app.db.models.portfolio import Epc -pd.set_option('display.max_rows', 500) -pd.set_option('display.max_columns', 500) -pd.set_option('display.width', 1000) +pd.set_option("display.max_rows", 500) +pd.set_option("display.max_columns", 500) +pd.set_option("display.width", 1000) def get_all_data(portfolio_id, scenario_ids): @@ -22,22 +27,26 @@ def get_all_data(portfolio_id, scenario_ids): # -------------------- # Properties # -------------------- - properties_query = session.query( - PropertyModel, - PropertyDetailsEpcModel - ).join( - PropertyDetailsEpcModel, - PropertyModel.id == PropertyDetailsEpcModel.property_id - ).filter( - PropertyModel.portfolio_id == portfolio_id - ).all() + properties_query = ( + session.query(PropertyModel, PropertyDetailsEpcModel) + .join( + PropertyDetailsEpcModel, + PropertyModel.id == PropertyDetailsEpcModel.property_id, + ) + .filter(PropertyModel.portfolio_id == portfolio_id) + .all() + ) properties_data = [ { - **{col.name: getattr(p.PropertyModel, col.name) - for col in PropertyModel.__table__.columns}, - **{col.name: getattr(p.PropertyDetailsEpcModel, col.name) - for col in PropertyDetailsEpcModel.__table__.columns}, + **{ + col.name: getattr(p.PropertyModel, col.name) + for col in PropertyModel.__table__.columns + }, + **{ + col.name: getattr(p.PropertyDetailsEpcModel, col.name) + for col in PropertyDetailsEpcModel.__table__.columns + }, } for p in properties_query ] @@ -45,12 +54,12 @@ def get_all_data(portfolio_id, scenario_ids): # -------------------- # Plans # -------------------- - plans_query = session.query(Plan).filter( - Plan.scenario_id.in_(scenario_ids) - ).all() + plans_query = ( + session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all() + ) plans_data = [ - {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} + {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns} for plan in plans_query ] @@ -59,25 +68,27 @@ def get_all_data(portfolio_id, scenario_ids): # -------------------- # Recommendations (NO materials yet) # -------------------- - recommendations_query = session.query( - Recommendation, - Plan.scenario_id - ).join( - PlanRecommendations, - Recommendation.id == PlanRecommendations.recommendation_id - ).join( - Plan, - Plan.id == PlanRecommendations.plan_id - ).filter( - PlanRecommendations.plan_id.in_(plan_ids), - ).all() + recommendations_query = ( + session.query(Recommendation, PlanModel.scenario_id) + .join( + PlanRecommendations, + Recommendation.id == PlanRecommendations.recommendation_id, + ) + .join(PlanModel, PlanModel.id == PlanRecommendations.plan_id) + .filter( + PlanRecommendations.plan_id.in_(plan_ids), + ) + .all() + ) recommendations_data = [ { - **{col.name: getattr(r.Recommendation, col.name) - for col in Recommendation.__table__.columns}, + **{ + col.name: getattr(r.Recommendation, col.name) + for col in Recommendation.__table__.columns + }, "scenario_id": r.scenario_id, - "materials": [] # placeholder + "materials": [], # placeholder } for r in recommendations_query ] @@ -131,7 +142,7 @@ recommendations_df = pd.read_csv( sustainability_data = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " "- Data Extracts for Domna.xlsx", - sheet_name="Sustainability" + sheet_name="Sustainability", ) sustainability_data_with_sap = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/Parity Data " @@ -140,10 +151,16 @@ sustainability_data_with_sap = pd.read_excel( properties_df["uprn"] = properties_df["uprn"].astype(str) property_data_comparison = properties_df.merge( - sustainability_data, how="inner", left_on="uprn", right_on="UPRN", suffixes=("_prop", "_sust") + sustainability_data, + how="inner", + left_on="uprn", + right_on="UPRN", + suffixes=("_prop", "_sust"), ) -property_data_comparison["wall_type"] = property_data_comparison["walls"].str.split(",").str[0].str.strip() +property_data_comparison["wall_type"] = ( + property_data_comparison["walls"].str.split(",").str[0].str.strip() +) column_pairs = { "built_form": "Attachment", @@ -154,25 +171,28 @@ column_pairs = { combination_tables = {} for v1, v2 in column_pairs.items(): - df = property_data_comparison.groupby([v1, v2]).size().reset_index(name='count') + df = property_data_comparison.groupby([v1, v2]).size().reset_index(name="count") combination_tables[v1] = df # We just need all of the measure types, per property recommendation_measure_types = recommendations_df[ - ["property_id", "measure_type" - , "sap_points", "heat_demand", "kwh_savings", "co2_equivalent_savings", - "energy_cost_savings" - ] + [ + "property_id", + "measure_type", + "sap_points", + "heat_demand", + "kwh_savings", + "co2_equivalent_savings", + "energy_cost_savings", + ] ].drop_duplicates() recommendation_measure_types["flag"] = True # We pivot -recommendations_measures_pivot = recommendation_measure_types[ - ["property_id", "measure_type", "flag"] -].drop_duplicates().pivot( - index='property_id', - columns='measure_type', - values='flag' +recommendations_measures_pivot = ( + recommendation_measure_types[["property_id", "measure_type", "flag"]] + .drop_duplicates() + .pivot(index="property_id", columns="measure_type", values="flag") ) recommendations_measures_pivot = recommendations_measures_pivot.reset_index() @@ -180,137 +200,157 @@ properties_to_recs = properties_df.rename(columns={"solar_pv": "solar_data"}).me recommendations_measures_pivot, how="left", on="property_id" ) -sustainability_data["cavity_wall_insulation"] = sustainability_data["Wall Insulation"].isin( - ["FilledCavity", "FilledCavityPlusInternal", "FilledCavityPlusExternal"] -) -sustainability_data["internal_wall_insulation"] = sustainability_data["Wall Insulation"].isin( - ["Internal", "FilledCavityPlusInternal"] -) -sustainability_data["external_wall_insulation"] = sustainability_data["Wall Insulation"].isin( - ["External", "FilledCavityPlusExternal"] -) +sustainability_data["cavity_wall_insulation"] = sustainability_data[ + "Wall Insulation" +].isin(["FilledCavity", "FilledCavityPlusInternal", "FilledCavityPlusExternal"]) +sustainability_data["internal_wall_insulation"] = sustainability_data[ + "Wall Insulation" +].isin(["Internal", "FilledCavityPlusInternal"]) +sustainability_data["external_wall_insulation"] = sustainability_data[ + "Wall Insulation" +].isin(["External", "FilledCavityPlusExternal"]) sustainability_data["loft_insulation"] = sustainability_data["Roof Insulation"].isin( ["mm300", "mm250", "mm350", "mm400", "mm270"] ) sustainability_data["double_glazing"] = sustainability_data["Glazing"].isin( - ["Double 2002 or later", "Double but age unknown", "Triple", "DoubleKnownData", "Secondary", "TripleKnownData"] + [ + "Double 2002 or later", + "Double but age unknown", + "Triple", + "DoubleKnownData", + "Secondary", + "TripleKnownData", + ] ) sustainability_data["secondary_glazing"] = sustainability_data["Glazing"].isin( ["Secondary"] ) -sustainability_data["suspended_floor_insulation"] = sustainability_data["Floor Insulation"].isin( - ["RetroFitted"] +sustainability_data["suspended_floor_insulation"] = sustainability_data[ + "Floor Insulation" +].isin(["RetroFitted"]) + +sustainability_data["boiler_upgrade"] = sustainability_data["Heating"].isin( + ["Boilers"] +) & sustainability_data["Boiler Efficiency"].isin(["A"]) +sustainability_data["air_source_heat_pump"] = sustainability_data["Heating"].isin( + ["Heat pumps (wet)"] ) -sustainability_data["boiler_upgrade"] = ( - sustainability_data["Heating"].isin(["Boilers"]) & sustainability_data["Boiler Efficiency"].isin(["A"]) -) -sustainability_data["air_source_heat_pump"] = (sustainability_data["Heating"].isin(["Heat pumps (wet)"])) +sustainability_data["time_temperature_zone_control"] = sustainability_data[ + "Controls Adequacy" +].isin(["Top Spec"]) -sustainability_data["time_temperature_zone_control"] = ( - sustainability_data["Controls Adequacy"].isin(["Top Spec"]) -) - -sustainability_data["roomstat_programmer_trvs"] = ( - sustainability_data["Controls Adequacy"].isin(["Optimal"]) -) +sustainability_data["roomstat_programmer_trvs"] = sustainability_data[ + "Controls Adequacy" +].isin(["Optimal"]) sustainability_data["flat_roof_insulation"] = ( - (sustainability_data["Roof Construction"] == "Flat") & - (sustainability_data["Roof Insulation"].isin(["mm50", "mm150", "mm100"])) -) + sustainability_data["Roof Construction"] == "Flat" +) & (sustainability_data["Roof Insulation"].isin(["mm50", "mm150", "mm100"])) properties_to_recs["uprn"] = properties_to_recs["uprn"].astype(str) comparison = sustainability_data.merge( properties_to_recs[ - ["uprn", "cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation", "loft_insulation", - "double_glazing", "secondary_glazing", "suspended_floor_insulation", "boiler_upgrade", "air_source_heat_pump", - "time_temperature_zone_control", "roomstat_programmer_trvs", "flat_roof_insulation", "room_roof_insulation" - ] + [ + "uprn", + "cavity_wall_insulation", + "external_wall_insulation", + "internal_wall_insulation", + "loft_insulation", + "double_glazing", + "secondary_glazing", + "suspended_floor_insulation", + "boiler_upgrade", + "air_source_heat_pump", + "time_temperature_zone_control", + "roomstat_programmer_trvs", + "flat_roof_insulation", + "room_roof_insulation", + ] ], left_on="UPRN", right_on="uprn", how="left", - suffixes=("", "_from_recs") + suffixes=("", "_from_recs"), ) # Flag entries where we've been told that walls are already insulated, but we have recommendations for wall insulation # ------------ Walls ------------ cwi_conflicting = comparison[ - (comparison["cavity_wall_insulation"]) & - (pd.isnull(comparison["cavity_wall_insulation_from_recs"]) == False) - ].copy() + (comparison["cavity_wall_insulation"]) + & (pd.isnull(comparison["cavity_wall_insulation_from_recs"]) == False) +].copy() cwi_conflicting["conflict_cavity_wall_insulation"] = True iwi_conflicting = comparison[ - (comparison["internal_wall_insulation"]) & - (pd.isnull(comparison["internal_wall_insulation_from_recs"]) == False) - ].copy() + (comparison["internal_wall_insulation"]) + & (pd.isnull(comparison["internal_wall_insulation_from_recs"]) == False) +].copy() iwi_conflicting["conflict_iwi_wall_insulation"] = True ewi_conflicting = comparison[ - (comparison["external_wall_insulation"]) & - (pd.isnull(comparison["external_wall_insulation_from_recs"]) == False) - ].copy() + (comparison["external_wall_insulation"]) + & (pd.isnull(comparison["external_wall_insulation_from_recs"]) == False) +].copy() ewi_conflicting["conflict_ewi_wall_insulation"] = True # ------------ Roof ------------ loft_conflicting = comparison[ - (comparison["loft_insulation"]) & - (pd.isnull(comparison["loft_insulation_from_recs"]) == False) - ].copy() + (comparison["loft_insulation"]) + & (pd.isnull(comparison["loft_insulation_from_recs"]) == False) +].copy() loft_conflicting["conflict_loft_insulation"] = True # ------------ Windows ------------ double_glazing_conflicting = comparison[ - (comparison["double_glazing"] | comparison["secondary_glazing"]) & - (pd.isnull(comparison["double_glazing_from_recs"]) == False) & - (pd.isnull(comparison["secondary_glazing_from_recs"]) == True) - ].copy() + (comparison["double_glazing"] | comparison["secondary_glazing"]) + & (pd.isnull(comparison["double_glazing_from_recs"]) == False) + & (pd.isnull(comparison["secondary_glazing_from_recs"]) == True) +].copy() double_glazing_conflicting["conflict_double_glazing"] = True secondary_glazing_conflicting = comparison[ - (comparison["secondary_glazing"]) & - (pd.isnull(comparison["secondary_glazing_from_recs"]) == False) - ].copy() + (comparison["secondary_glazing"]) + & (pd.isnull(comparison["secondary_glazing_from_recs"]) == False) +].copy() secondary_glazing_conflicting["conflict_secondary_glazing"] = True # ------------ Floors ------------ floors_conflicting = comparison[ - (comparison["suspended_floor_insulation"]) & - (pd.isnull(comparison["suspended_floor_insulation_from_recs"]) == False) - ].copy() + (comparison["suspended_floor_insulation"]) + & (pd.isnull(comparison["suspended_floor_insulation_from_recs"]) == False) +].copy() floors_conflicting["conflict_suspended_floor_insulation"] = True # ------------ Boiler Upgrade ------------ boiler_conflicting = comparison[ - (comparison["boiler_upgrade"]) & - (pd.isnull(comparison["boiler_upgrade_from_recs"]) == False) - ].copy() + (comparison["boiler_upgrade"]) + & (pd.isnull(comparison["boiler_upgrade_from_recs"]) == False) +].copy() boiler_conflicting["conflict_boiler_upgrade"] = True # ------------ ASHP ------------ ashp_conflicting = comparison[ - (comparison["air_source_heat_pump"]) & - (pd.isnull(comparison["air_source_heat_pump_from_recs"]) == False) - ].copy() + (comparison["air_source_heat_pump"]) + & (pd.isnull(comparison["air_source_heat_pump_from_recs"]) == False) +].copy() ashp_conflicting["conflict_air_source_heat_pump"] = True # ------------ heat controls ------------ ttzc_conflicting = comparison[ - (comparison["time_temperature_zone_control"]) & - (pd.isnull(comparison["time_temperature_zone_control_from_recs"]) == False) - ].copy() + (comparison["time_temperature_zone_control"]) + & (pd.isnull(comparison["time_temperature_zone_control_from_recs"]) == False) +].copy() ttzc_conflicting["conflict_time_temperature_zone_control"] = True rst_conflicting = comparison[ - (comparison["roomstat_programmer_trvs"]) & - (pd.isnull(comparison["roomstat_programmer_trvs_from_recs"]) == False) - ].copy() + (comparison["roomstat_programmer_trvs"]) + & (pd.isnull(comparison["roomstat_programmer_trvs_from_recs"]) == False) +].copy() rst_conflicting["conflict_roomstat_programmer_trvs"] = True # ------------ Flat Roof Insulation ----------- flat_roof_conflicting = comparison[ - (comparison["flat_roof_insulation"]) & - (pd.isnull(comparison["flat_roof_insulation_from_recs"]) == False) - ].copy() + (comparison["flat_roof_insulation"]) + & (pd.isnull(comparison["flat_roof_insulation_from_recs"]) == False) +].copy() flat_roof_conflicting["conflict_flat_roof_insulation"] = True # All properties with conflicts @@ -327,22 +367,26 @@ all_conflicts = pd.concat( ashp_conflicting, ttzc_conflicting, rst_conflicting, - flat_roof_conflicting + flat_roof_conflicting, ] ) all_conflicts = all_conflicts[ [ "uprn", - 'conflict_cavity_wall_insulation', - 'conflict_iwi_wall_insulation', - 'conflict_ewi_wall_insulation', - 'conflict_loft_insulation', - 'conflict_double_glazing', - 'conflict_secondary_glazing', - 'conflict_suspended_floor_insulation', 'conflict_boiler_upgrade', - 'conflict_air_source_heat_pump', - 'conflict_time_temperature_zone_control', 'conflict_roomstat_programmer_trvs', 'conflict_flat_roof_insulation'] + "conflict_cavity_wall_insulation", + "conflict_iwi_wall_insulation", + "conflict_ewi_wall_insulation", + "conflict_loft_insulation", + "conflict_double_glazing", + "conflict_secondary_glazing", + "conflict_suspended_floor_insulation", + "conflict_boiler_upgrade", + "conflict_air_source_heat_pump", + "conflict_time_temperature_zone_control", + "conflict_roomstat_programmer_trvs", + "conflict_flat_roof_insulation", + ] ] all_conflicts = all_conflicts.rename( @@ -358,31 +402,29 @@ all_conflicts = all_conflicts.rename( "conflict_air_source_heat_pump": "air_source_heat_pump", "conflict_time_temperature_zone_control": "time_temperature_zone_control", "conflict_roomstat_programmer_trvs": "roomstat_programmer_trvs", - "conflict_flat_roof_insulation": "flat_roof_insulation" - + "conflict_flat_roof_insulation": "flat_roof_insulation", } ) # Reshape by UPRN by melting all_conflicts = all_conflicts.melt( - id_vars=["uprn"], - var_name="measure_type", - value_name="already_installed" + id_vars=["uprn"], var_name="measure_type", value_name="already_installed" ) -recommendations_df["property_id"] = recommendations_df["property_id"].astype(int).astype(str) +recommendations_df["property_id"] = ( + recommendations_df["property_id"].astype(int).astype(str) +) properties_df["property_id"] = properties_df["property_id"].astype(int).astype(str) recs_with_uprn = recommendations_df.merge( properties_df[["property_id", "uprn"]], on="property_id", how="left", - suffixes=("", "_prop") + suffixes=("", "_prop"), ) recs_with_uprn = ( - recs_with_uprn - .sort_values("sap_points", ascending=False) + recs_with_uprn.sort_values("sap_points", ascending=False) .groupby(["uprn", "measure_type"], as_index=False) .first() ) @@ -390,13 +432,24 @@ recs_with_uprn = ( recs_with_uprn["uprn"] = recs_with_uprn["uprn"].astype(str) installed_measures_df = all_conflicts.merge( - recs_with_uprn[["uprn", "measure_type", "sap_points", "heat_demand", "kwh_savings", "co2_equivalent_savings", - "energy_cost_savings"]], + recs_with_uprn[ + [ + "uprn", + "measure_type", + "sap_points", + "heat_demand", + "kwh_savings", + "co2_equivalent_savings", + "energy_cost_savings", + ] + ], how="left", - on=["uprn", "measure_type"] + on=["uprn", "measure_type"], ) -installed_measures_df = installed_measures_df[installed_measures_df["already_installed"] == True] +installed_measures_df = installed_measures_df[ + installed_measures_df["already_installed"] == True +] ## --- Sense checking ---- @@ -423,27 +476,26 @@ def add_mechanical_ventilation_for_fabric(installed_measures_df, recs_with_uprn) recs_with_uprn[ (recs_with_uprn["measure_type"] == "mechanical_ventilation") & (recs_with_uprn["uprn"].isin(fabric_uprns)) - ] + ] .sort_values("sap_points", ascending=False) .drop_duplicates(subset=["uprn"]) ) - mv_installed = mv_recs[[ - "uprn", - "measure_type", - "sap_points", - "heat_demand", - "kwh_savings", - "co2_equivalent_savings", - "energy_cost_savings", - ]].copy() + mv_installed = mv_recs[ + [ + "uprn", + "measure_type", + "sap_points", + "heat_demand", + "kwh_savings", + "co2_equivalent_savings", + "energy_cost_savings", + ] + ].copy() mv_installed["already_installed"] = True - return pd.concat( - [installed_measures_df, mv_installed], - ignore_index=True - ) + return pd.concat([installed_measures_df, mv_installed], ignore_index=True) # installed_measures_df = add_mechanical_ventilation_for_fabric( @@ -453,24 +505,39 @@ def add_mechanical_ventilation_for_fabric(installed_measures_df, recs_with_uprn) assert installed_measures_df[["uprn", "measure_type"]].duplicated().sum() == 0 -for col in ["sap_points", "heat_demand", "kwh_savings", "co2_equivalent_savings", "energy_cost_savings"]: - print(f"n missings for {col}: {pd.isnull(installed_measures_df[col]).sum()}", ) +for col in [ + "sap_points", + "heat_demand", + "kwh_savings", + "co2_equivalent_savings", + "energy_cost_savings", +]: + print( + f"n missings for {col}: {pd.isnull(installed_measures_df[col]).sum()}", + ) # Do some calcs on SAP impact sap_impact = installed_measures_df.groupby(["uprn"])["sap_points"].sum().reset_index() -properties_sap = properties_df[["uprn", "current_sap_points", "current_epc_rating"]].copy() +properties_sap = properties_df[ + ["uprn", "current_sap_points", "current_epc_rating"] +].copy() properties_sap["uprn"] = properties_sap["uprn"].astype(str) -old_sap_vs_new = properties_sap.merge( - sap_impact, how="inner", on="uprn" +old_sap_vs_new = properties_sap.merge(sap_impact, how="inner", on="uprn") +old_sap_vs_new["new_sap_points"] = ( + old_sap_vs_new["current_sap_points"] + old_sap_vs_new["sap_points"] +) +old_sap_vs_new["new_epc_rating"] = old_sap_vs_new["new_sap_points"].apply( + lambda x: sap_to_epc(x) ) -old_sap_vs_new["new_sap_points"] = old_sap_vs_new["current_sap_points"] + old_sap_vs_new["sap_points"] -old_sap_vs_new["new_epc_rating"] = old_sap_vs_new["new_sap_points"].apply(lambda x: sap_to_epc(x)) # How many properties go from below C to above -old_sap_vs_new[old_sap_vs_new["current_sap_points"] < 69]["new_epc_rating"].value_counts() +old_sap_vs_new[old_sap_vs_new["current_sap_points"] < 69][ + "new_epc_rating" +].value_counts() changed = old_sap_vs_new[ - (old_sap_vs_new["current_sap_points"] < 69) & (old_sap_vs_new["new_sap_points"] >= 69) - ] + (old_sap_vs_new["current_sap_points"] < 69) + & (old_sap_vs_new["new_sap_points"] >= 69) +] # What do I need to do: # TODO: - need to get a view of "all" measures for the property, not just recommended. We can do this but just looking @@ -499,22 +566,38 @@ def bulk_insert_installed_measures(installed_measures_df): now = datetime.utcnow() for _, row in installed_measures_df.iterrows(): - records.append({ - "uprn": int(row["uprn"]), - "measure_type": row["measure_type"], - "installed_at": now, - "sap_points": float(row["sap_points"]) if pd.notna(row["sap_points"]) else None, - "carbon_savings": float(row["co2_equivalent_savings"]) if pd.notna(row["co2_equivalent_savings"]) else None, - "kwh_savings": float(row["kwh_savings"]) if pd.notna(row["kwh_savings"]) else None, - "bill_savings": float(row["energy_cost_savings"]) if pd.notna(row["energy_cost_savings"]) else None, - "heat_demand_savings": float(row["heat_demand"]) if pd.notna(row["heat_demand"]) else None, - "source": SOURCE, - "is_active": True, - }) + records.append( + { + "uprn": int(row["uprn"]), + "measure_type": row["measure_type"], + "installed_at": now, + "sap_points": ( + float(row["sap_points"]) if pd.notna(row["sap_points"]) else None + ), + "carbon_savings": ( + float(row["co2_equivalent_savings"]) + if pd.notna(row["co2_equivalent_savings"]) + else None + ), + "kwh_savings": ( + float(row["kwh_savings"]) if pd.notna(row["kwh_savings"]) else None + ), + "bill_savings": ( + float(row["energy_cost_savings"]) + if pd.notna(row["energy_cost_savings"]) + else None + ), + "heat_demand_savings": ( + float(row["heat_demand"]) if pd.notna(row["heat_demand"]) else None + ), + "source": SOURCE, + "is_active": True, + } + ) try: for i in range(0, len(records), BATCH_SIZE): - batch = records[i:i + BATCH_SIZE] + batch = records[i : i + BATCH_SIZE] session.bulk_insert_mappings(InstalledMeasure, batch) session.commit() print(f"✅ Inserted {i + len(batch)} / {len(records)}") @@ -580,9 +663,7 @@ def get_installed_measure_adjustments_by_uprn_for_portfolio( def exclude_ventilation(column): return case( ( - InstalledMeasure.measure_type.notin_( - REBASING_EXCLUDED_MEASURES - ), + InstalledMeasure.measure_type.notin_(REBASING_EXCLUDED_MEASURES), column, ), else_=0.0, @@ -594,33 +675,24 @@ def get_installed_measure_adjustments_by_uprn_for_portfolio( rows = ( session.query( InstalledMeasure.uprn.label("uprn"), - func.coalesce( func.sum(exclude_ventilation(InstalledMeasure.sap_points)), 0.0, ).label("sap_points"), - func.coalesce( func.sum(exclude_ventilation(InstalledMeasure.carbon_savings)), 0.0, ).label("co2"), - func.coalesce( func.sum(exclude_ventilation(InstalledMeasure.kwh_savings)), 0.0, ).label("energy_kwh"), - func.coalesce( func.sum(exclude_ventilation(InstalledMeasure.bill_savings)), 0.0, ).label("energy_bill"), - func.coalesce( - func.sum( - exclude_ventilation( - InstalledMeasure.heat_demand_savings - ) - ), + func.sum(exclude_ventilation(InstalledMeasure.heat_demand_savings)), 0.0, ).label("heat_demand"), ) @@ -657,16 +729,14 @@ def get_installed_measure_types_by_uprn( ) # Convert enums → strings - return { - r[0].value if hasattr(r[0], "value") else r[0] - for r in rows - } + return {r[0].value if hasattr(r[0], "value") else r[0] for r in rows} # ------------------------------------------------------------ # PROPERTY REBASING (READ-ONLY) # ------------------------------------------------------------ + def compute_property_sap_updates( properties: List[PropertyModel], sap_adjustments: Dict[int, float], # keyed by uprn @@ -692,14 +762,16 @@ def compute_property_sap_updates( sap_delta = sap_adjustments[prop.uprn] new_sap = prop.original_sap_points + sap_delta - updates.append({ - "property_id": prop.id, - "uprn": prop.uprn, - "original_sap_points": prop.original_sap_points, - "installed_sap_delta": sap_delta, - "new_sap_points": new_sap, - "is_adjusted": True, - }) + updates.append( + { + "property_id": prop.id, + "uprn": prop.uprn, + "original_sap_points": prop.original_sap_points, + "installed_sap_delta": sap_delta, + "new_sap_points": new_sap, + "is_adjusted": True, + } + ) return updates @@ -708,6 +780,7 @@ def compute_property_sap_updates( # PLAN RECOMPUTATION HELPERS # ------------------------------------------------------------ + def get_effective_plan_recommendations( session, plan_id: int, excluded_measure_types: Set[str] ) -> List[Recommendation]: @@ -715,11 +788,10 @@ def get_effective_plan_recommendations( session.query(Recommendation) .join(PlanRecommendations) .filter(PlanRecommendations.plan_id == plan_id) - .filter(Recommendation.default.is_(True))) + .filter(Recommendation.default.is_(True)) + ) if excluded_measure_types: - q = q.filter( - ~Recommendation.measure_type.in_(excluded_measure_types) - ) + q = q.filter(~Recommendation.measure_type.in_(excluded_measure_types)) return q.all() @@ -791,7 +863,11 @@ def get_installed_measure_types_by_property_id_for_portfolio( installed_by_property[property_id].add(mt) # drag-along rules - if mt in {"cavity_wall_insulation", "internal_wall_insulation", "external_wall_insulation"}: + if mt in { + "cavity_wall_insulation", + "internal_wall_insulation", + "external_wall_insulation", + }: installed_by_property[property_id].add("mechanical_ventilation") return installed_by_property @@ -810,7 +886,9 @@ def get_all_default_plan_recommendations( PlanRecommendations.plan_id, Recommendation, ) - .join(Recommendation, Recommendation.id == PlanRecommendations.recommendation_id) + .join( + Recommendation, Recommendation.id == PlanRecommendations.recommendation_id + ) .filter(PlanRecommendations.plan_id.in_(plan_ids)) .filter(Recommendation.default.is_(True)) .all() @@ -835,9 +913,14 @@ def filter_remaining_recommendations( return recommendations return [ - r for r in recommendations + r + for r in recommendations if ( - (r.measure_type.value if hasattr(r.measure_type, "value") else r.measure_type) + ( + r.measure_type.value + if hasattr(r.measure_type, "value") + else r.measure_type + ) not in installed_types ) ] @@ -845,11 +928,11 @@ def filter_remaining_recommendations( def compute_plan_updates( session, - plans: List[Plan], + plans: List[PlanModel], properties_by_id: Dict[int, PropertyModel], epcs_by_property_id: Dict[int, PropertyDetailsEpcModel], installed_types_by_property_id, - all_ventilation_measures + all_ventilation_measures, ) -> List[dict]: """ Computes plan metrics after marking some recommendations as already installed. @@ -921,39 +1004,34 @@ def compute_plan_updates( # ): # continue - updates.append({ - "plan_id": plan.id, - "property_id": plan.property_id, - - # SAP / EPC - "post_sap_points": post_sap, - "post_epc_rating": sap_to_epc(post_sap), - - # Carbon - "co2_savings": remaining["co2_savings"], - "post_co2_emissions": post_co2, - - # Energy bills - "energy_bill_savings": remaining["energy_bill_savings"], - "post_energy_bill": post_bill, - - # Energy consumption - "energy_consumption_savings": remaining["energy_consumption_savings"], - "post_energy_consumption": post_kwh, - - # Valuation (safe) - "valuation_increase": remaining["valuation_increase"], - "valuation_post_retrofit": ( - prop.current_valuation - + remaining["valuation_increase"] - if prop.current_valuation is not None - else None - ), - - # Costs - "cost_of_works": remaining["cost_of_works"], - "contingency_cost": remaining["contingency_cost"], - }) + updates.append( + { + "plan_id": plan.id, + "property_id": plan.property_id, + # SAP / EPC + "post_sap_points": post_sap, + "post_epc_rating": sap_to_epc(post_sap), + # Carbon + "co2_savings": remaining["co2_savings"], + "post_co2_emissions": post_co2, + # Energy bills + "energy_bill_savings": remaining["energy_bill_savings"], + "post_energy_bill": post_bill, + # Energy consumption + "energy_consumption_savings": remaining["energy_consumption_savings"], + "post_energy_consumption": post_kwh, + # Valuation (safe) + "valuation_increase": remaining["valuation_increase"], + "valuation_post_retrofit": ( + prop.current_valuation + remaining["valuation_increase"] + if prop.current_valuation is not None + else None + ), + # Costs + "cost_of_works": remaining["cost_of_works"], + "contingency_cost": remaining["contingency_cost"], + } + ) property_to_installed_types[prop.id] = installed_types @@ -1065,7 +1143,6 @@ def compute_epc_rebasing_updates( updates[property_id] = { "property_id": property_id, - # Originals (only set once) "original_co2_emissions": ( epc.original_co2_emissions @@ -1087,7 +1164,6 @@ def compute_epc_rebasing_updates( if epc.original_current_energy_demand_heating_hotwater is not None else epc.current_energy_demand_heating_hotwater ), - # Adjustments (always re-applied from originals) "installed_measures_co2_adjustment": adj["co2"], "installed_measures_energy_demand_adjustment": adj["energy_kwh"], @@ -1106,8 +1182,8 @@ def persist_plan_updates(plan_updates: list[dict]): with db_session() as session: plans = ( - session.query(Plan) - .filter(Plan.id.in_([u["plan_id"] for u in plan_updates])) + session.query(PlanModel) + .filter(PlanModel.id.in_([u["plan_id"] for u in plan_updates])) .all() ) @@ -1168,20 +1244,17 @@ def persist_epc_rebasing_updates( # Store originals once epc.original_co2_emissions = u["original_co2_emissions"] - epc.original_primary_energy_consumption = ( - u["original_primary_energy_consumption"] - ) - epc.original_current_energy_demand = ( - u["original_current_energy_demand"] - ) - epc.original_current_energy_demand_heating_hotwater = ( - u["original_current_energy_demand_heating_hotwater"] - ) + epc.original_primary_energy_consumption = u[ + "original_primary_energy_consumption" + ] + epc.original_current_energy_demand = u["original_current_energy_demand"] + epc.original_current_energy_demand_heating_hotwater = u[ + "original_current_energy_demand_heating_hotwater" + ] # Apply rebased values epc.co2_emissions = ( - u["original_co2_emissions"] - - u["installed_measures_co2_adjustment"] + u["original_co2_emissions"] - u["installed_measures_co2_adjustment"] ) epc.primary_energy_consumption = ( @@ -1195,18 +1268,18 @@ def persist_epc_rebasing_updates( ) # Flags + audit fields - epc.installed_measures_co2_adjustment = ( - u["installed_measures_co2_adjustment"] - ) - epc.installed_measures_energy_demand_adjustment = ( - u["installed_measures_energy_demand_adjustment"] - ) - epc.installed_measures_total_energy_bill_adjustment = ( - u["installed_measures_total_energy_bill_adjustment"] - ) - epc.installed_measures_heat_demand_adjustment = ( - u["installed_measures_heat_demand_adjustment"] - ) + epc.installed_measures_co2_adjustment = u[ + "installed_measures_co2_adjustment" + ] + epc.installed_measures_energy_demand_adjustment = u[ + "installed_measures_energy_demand_adjustment" + ] + epc.installed_measures_total_energy_bill_adjustment = u[ + "installed_measures_total_energy_bill_adjustment" + ] + epc.installed_measures_heat_demand_adjustment = u[ + "installed_measures_heat_demand_adjustment" + ] epc.is_epc_adjusted_for_installed_measures = True print(f"✅ Updated {len(epcs)} EPC records") @@ -1254,9 +1327,7 @@ def initialise_original_property_and_epc_values(portfolio_id: int): updated = True if epc.original_primary_energy_consumption is None: - epc.original_primary_energy_consumption = ( - epc.primary_energy_consumption - ) + epc.original_primary_energy_consumption = epc.primary_energy_consumption updated = True if epc.original_current_energy_demand is None: @@ -1314,21 +1385,19 @@ def get_installed_ventilation_adjustments_by_uprn_for_portfolio( rows = ( session.query( InstalledMeasure.uprn.label("uprn"), - - func.coalesce(func.sum(InstalledMeasure.sap_points), 0.0) - .label("sap_points"), - - func.coalesce(func.sum(InstalledMeasure.carbon_savings), 0.0) - .label("co2"), - - func.coalesce(func.sum(InstalledMeasure.kwh_savings), 0.0) - .label("energy_kwh"), - - func.coalesce(func.sum(InstalledMeasure.bill_savings), 0.0) - .label("energy_bill"), - - func.coalesce(func.sum(InstalledMeasure.heat_demand_savings), 0.0) - .label("heat_demand"), + func.coalesce(func.sum(InstalledMeasure.sap_points), 0.0).label( + "sap_points" + ), + func.coalesce(func.sum(InstalledMeasure.carbon_savings), 0.0).label("co2"), + func.coalesce(func.sum(InstalledMeasure.kwh_savings), 0.0).label( + "energy_kwh" + ), + func.coalesce(func.sum(InstalledMeasure.bill_savings), 0.0).label( + "energy_bill" + ), + func.coalesce(func.sum(InstalledMeasure.heat_demand_savings), 0.0).label( + "heat_demand" + ), ) .filter(InstalledMeasure.is_active.is_(True)) .filter(InstalledMeasure.measure_type == "mechanical_ventilation") @@ -1370,8 +1439,9 @@ def mark_recommendations_as_installed( stmt = ( update(Recommendation) .where( - tuple_(Recommendation.property_id, Recommendation.measure_type) - .in_(property_measure_pairs) + tuple_(Recommendation.property_id, Recommendation.measure_type).in_( + property_measure_pairs + ) ) .values(already_installed=True) ) @@ -1400,13 +1470,17 @@ with db_read_session() as session: .all() ) - all_ventilation_measures = get_installed_ventilation_adjustments_by_uprn_for_portfolio(session, PORTFOLIO_ID) - installed_types_by_property_id = get_installed_measure_types_by_property_id_for_portfolio(session, PORTFOLIO_ID) + all_ventilation_measures = ( + get_installed_ventilation_adjustments_by_uprn_for_portfolio( + session, PORTFOLIO_ID + ) + ) + installed_types_by_property_id = ( + get_installed_measure_types_by_property_id_for_portfolio(session, PORTFOLIO_ID) + ) plans = ( - session.query(Plan) - .filter(Plan.portfolio_id == PORTFOLIO_ID) - .all() + session.query(PlanModel).filter(PlanModel.portfolio_id == PORTFOLIO_ID).all() ) epcs = { @@ -1419,23 +1493,17 @@ with db_read_session() as session: ) } - installed_adjustments = ( - get_installed_measure_adjustments_by_uprn_for_portfolio( - session, - PORTFOLIO_ID, - ) + installed_adjustments = get_installed_measure_adjustments_by_uprn_for_portfolio( + session, + PORTFOLIO_ID, ) property_updates = compute_property_sap_updates( - properties, - {uprn: v["sap_points"] for uprn, v in installed_adjustments.items()} + properties, {uprn: v["sap_points"] for uprn, v in installed_adjustments.items()} ) properties_by_id = {p.id: p for p in properties} - property_updates_by_id = { - u["property_id"]: u - for u in property_updates - } + property_updates_by_id = {u["property_id"]: u for u in property_updates} epc_updates = compute_epc_rebasing_updates( epcs, @@ -1453,9 +1521,7 @@ with db_read_session() as session: ) # Used to mark recommendations - pairs = build_installed_recommendation_pairs( - installed_types_by_property_id - ) + pairs = build_installed_recommendation_pairs(installed_types_by_property_id) from copy import deepcopy @@ -1466,36 +1532,33 @@ for u in plan_updates_comparison: if not before: continue - u.update({ - # SAP - "before_sap_points": before.post_sap_points, - "after_sap_points": u["post_sap_points"], - - # Carbon - "before_post_co2_emissions": before.post_co2_emissions, - "after_post_co2_emissions": u["post_co2_emissions"], - - # Costs - "before_cost_of_works": before.cost_of_works, - "after_cost_of_works": u["cost_of_works"], - - "before_contingency_cost": before.contingency_cost, - "after_contingency_cost": u["contingency_cost"], - }) + u.update( + { + # SAP + "before_sap_points": before.post_sap_points, + "after_sap_points": u["post_sap_points"], + # Carbon + "before_post_co2_emissions": before.post_co2_emissions, + "after_post_co2_emissions": u["post_co2_emissions"], + # Costs + "before_cost_of_works": before.cost_of_works, + "after_cost_of_works": u["cost_of_works"], + "before_contingency_cost": before.contingency_cost, + "after_contingency_cost": u["contingency_cost"], + } + ) plan_updates_df = pd.DataFrame(plan_updates_comparison) plan_updates_df["delta_sap_points"] = ( - plan_updates_df["after_sap_points"] - - plan_updates_df["before_sap_points"] + plan_updates_df["after_sap_points"] - plan_updates_df["before_sap_points"] ) plan_updates_df["delta_carbon"] = ( plan_updates_df["after_post_co2_emissions"] - plan_updates_df["before_post_co2_emissions"] ) plan_updates_df["delta_cost_of_works"] = ( - plan_updates_df["after_cost_of_works"] - - plan_updates_df["before_cost_of_works"] + plan_updates_df["after_cost_of_works"] - plan_updates_df["before_cost_of_works"] ) plan_updates_df["delta_contingency_cost"] = ( plan_updates_df["after_contingency_cost"] @@ -1503,12 +1566,14 @@ plan_updates_df["delta_contingency_cost"] = ( ) # High-level sanity checks -summary = plan_updates_df[[ - "delta_sap_points", - "delta_carbon", - "delta_cost_of_works", - "delta_contingency_cost", -]].sum() +summary = plan_updates_df[ + [ + "delta_sap_points", + "delta_carbon", + "delta_cost_of_works", + "delta_contingency_cost", + ] +].sum() print(summary) @@ -1619,17 +1684,15 @@ def apply_appliance_carbon_to_plans( .all() ) - epc_by_property_id = { - e.property_id: e for e in epcs - } + epc_by_property_id = {e.property_id: e for e in epcs} # -------------------------------------------- # Load plans with post carbon # -------------------------------------------- plans = ( - session.query(Plan) - .filter(Plan.portfolio_id == portfolio_id) - .filter(Plan.post_co2_emissions.isnot(None)) + session.query(PlanModel) + .filter(PlanModel.portfolio_id == portfolio_id) + .filter(PlanModel.post_co2_emissions.isnot(None)) .all() ) @@ -1682,13 +1745,7 @@ def apply_appliance_carbon_to_plans( # Get all uprns for entries in already installed, from the database with db_read_session() as session: - db_uprns = { - str(r[0]) - for r in ( - session.query(InstalledMeasure.uprn) - .all() - ) - } + db_uprns = {str(r[0]) for r in (session.query(InstalledMeasure.uprn).all())} # What is the overlap of these properties and the properties in portfolo 430 sal_data = pd.read_excel( diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py b/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py index 67ff2c85..e3008f65 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py @@ -3,31 +3,41 @@ from sqlalchemy.orm import Session from sqlalchemy import text, select from backend.app.db.connection import db_read_session from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel -from backend.app.db.models.recommendations import Plan +from backend.app.db.models.recommendations import PlanModel PORTFOLIO_ID = 435 with db_read_session() as session: # Get all properties from PropertyDetailsEpcModel, where estimated is True, for portfolio 419 - estimated_epcs = session.query(PropertyDetailsEpcModel).filter( - # PropertyDetailsEpcModel.estimated == True, - PropertyDetailsEpcModel.property_id.in_( - session.query(PropertyModel.id).filter(PropertyModel.portfolio_id == PORTFOLIO_ID) + estimated_epcs = ( + session.query(PropertyDetailsEpcModel) + .filter( + # PropertyDetailsEpcModel.estimated == True, + PropertyDetailsEpcModel.property_id.in_( + session.query(PropertyModel.id).filter( + PropertyModel.portfolio_id == PORTFOLIO_ID + ) + ) ) - ).all() + .all() + ) # Get the ids estimated_epc_ids = [epc.property_id for epc in estimated_epcs] # I want to get the UPRNS for these properties, from the property model with db_read_session() as session: - estimated_uprns = session.query(PropertyModel.uprn).filter( - PropertyModel.id.in_( - session.query(PropertyDetailsEpcModel.property_id).filter( - PropertyDetailsEpcModel.id.in_(estimated_epc_ids) + estimated_uprns = ( + session.query(PropertyModel.uprn) + .filter( + PropertyModel.id.in_( + session.query(PropertyDetailsEpcModel.property_id).filter( + PropertyDetailsEpcModel.id.in_(estimated_epc_ids) + ) ) ) - ).all() + .all() + ) estimated_uprns_list = [uprn for (uprn,) in estimated_uprns] @@ -35,16 +45,16 @@ with db_read_session() as session: sal_1 = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model " "data.xlsx", - sheet_name="Standardised Asset List" + sheet_name="Standardised Asset List", ) sal_2 = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260105 - additional " "UPRNS.xlsx", - sheet_name="Standardised Asset List" + sheet_name="Standardised Asset List", ) sal = pd.concat([sal_1, sal_2]) -sal = sal.drop_duplicates(subset=['epc_os_uprn']) +sal = sal.drop_duplicates(subset=["epc_os_uprn"]) estimated_to_refresh = sal[sal["epc_os_uprn"].isin(estimated_uprns_list)].copy() @@ -55,20 +65,24 @@ SCENARIOS = [ # 861, # EPC C, No EWI/IWI, No Solid Floor, ASHP 3.0 COP # 859, # EPC C - no solid floor, ashp 3.0 # 885, # EPC B - fabric first, no solid floor, ashp 3.0 - 908, 909, 910 + 908, + 909, + 910, ] # Get all plans, associated to these properties - the property IDs are in estimated_epc_ids with db_read_session() as session: result = session.execute( - select(Plan.id, Plan.property_id) - .where(Plan.property_id.in_(estimated_epc_ids)) + select(PlanModel.id, PlanModel.property_id).where( + PlanModel.property_id.in_(estimated_epc_ids) + ) ) plans = [ { "plan_id": row.id, "property_id": row.property_id, - } for row in result + } + for row in result ] df = pd.DataFrame(plans) @@ -96,12 +110,14 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # recommendation_materials # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM recommendation_materials rm USING plan_recommendations pr WHERE rm.recommendation_id = pr.recommendation_id AND pr.plan_id = ANY(:plan_ids) - """), + """ + ), params, ) @@ -109,10 +125,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # plan_recommendations # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM plan_recommendations WHERE plan_id = ANY(:plan_ids) - """), + """ + ), params, ) @@ -120,14 +138,16 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # recommendations (only those used by these plans) # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM recommendation r WHERE r.id IN ( SELECT DISTINCT recommendation_id FROM plan_recommendations WHERE plan_id = ANY(:plan_ids) ) - """), + """ + ), params, ) @@ -135,17 +155,21 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # plans LAST # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM plan WHERE id = ANY(:plan_ids) - """), + """ + ), params, ) # Store the SAL -filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260101 " - "sal.xlsx") +filename = ( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260101 " + "sal.xlsx" +) with pd.ExcelWriter(filename) as writer: sal.to_excel(writer, sheet_name="Standardised Asset List", index=False) @@ -164,34 +188,36 @@ with pd.ExcelWriter(filename) as writer: b1 = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 " "sal.xlsx", - sheet_name="batch 1" + sheet_name="batch 1", ) b2 = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 " "sal.xlsx", - sheet_name="batch 2" + sheet_name="batch 2", ) b3 = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 " "sal.xlsx", - sheet_name="batch 3" + sheet_name="batch 3", ) b4 = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 " "sal.xlsx", - sheet_name="batch 4" + sheet_name="batch 4", ) b5 = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 " "sal.xlsx", - sheet_name="batch 5" + sheet_name="batch 5", ) # Batch 6 should be the remaining total = pd.concat([b1, b2, b3, b4, b5]) remaining = sal[~sal["epc_os_uprn"].isin(total["epc_os_uprn"].values)] # Create new output -filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/" - "20260107 corrected batch 6 sal.xlsx") +filename = ( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/" + "20260107 corrected batch 6 sal.xlsx" +) with pd.ExcelWriter(filename) as writer: sal.to_excel(writer, sheet_name="Standardised Asset List", index=False) @@ -206,6 +232,4 @@ with pd.ExcelWriter(filename) as writer: b5.to_excel(writer, sheet_name="batch 5", index=False) remaining.to_excel(writer, sheet_name="batch 6", index=False) -all_together = pd.concat( - [b1, b2, b3, b4, b5, remaining] -) +all_together = pd.concat([b1, b2, b3, b4, b5, remaining]) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py b/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py index 68655e80..0ec34e7c 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py @@ -110,14 +110,17 @@ import pandas as pd # Solar PV savings - we need the amount of solar PV bill savings from sqlalchemy.orm import sessionmaker from backend.app.db.connection import db_engine -from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, RecommendationMaterials +from backend.app.db.models.recommendations import ( + Recommendation, + PlanModel, + PlanRecommendations, + RecommendationMaterials, +) from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel from collections import defaultdict PORTFOLIO_ID = 485 # Peabody -SCENARIOS = [ - 970 -] +SCENARIOS = [970] scenario_names = { 970: "EPC C - no solid floor, ashp 3.0", } @@ -130,22 +133,26 @@ def get_data(portfolio_id, scenario_ids): # -------------------- # Properties # -------------------- - properties_query = session.query( - PropertyModel, - PropertyDetailsEpcModel - ).join( - PropertyDetailsEpcModel, - PropertyModel.id == PropertyDetailsEpcModel.property_id - ).filter( - PropertyModel.portfolio_id == portfolio_id - ).all() + properties_query = ( + session.query(PropertyModel, PropertyDetailsEpcModel) + .join( + PropertyDetailsEpcModel, + PropertyModel.id == PropertyDetailsEpcModel.property_id, + ) + .filter(PropertyModel.portfolio_id == portfolio_id) + .all() + ) properties_data = [ { - **{col.name: getattr(p.PropertyModel, col.name) - for col in PropertyModel.__table__.columns}, - **{col.name: getattr(p.PropertyDetailsEpcModel, col.name) - for col in PropertyDetailsEpcModel.__table__.columns}, + **{ + col.name: getattr(p.PropertyModel, col.name) + for col in PropertyModel.__table__.columns + }, + **{ + col.name: getattr(p.PropertyDetailsEpcModel, col.name) + for col in PropertyDetailsEpcModel.__table__.columns + }, } for p in properties_query ] @@ -153,12 +160,12 @@ def get_data(portfolio_id, scenario_ids): # -------------------- # Plans # -------------------- - plans_query = session.query(Plan).filter( - Plan.scenario_id.in_(scenario_ids) - ).all() + plans_query = ( + session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all() + ) plans_data = [ - {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} + {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns} for plan in plans_query ] @@ -167,27 +174,29 @@ def get_data(portfolio_id, scenario_ids): # -------------------- # Recommendations (NO materials yet) # -------------------- - recommendations_query = session.query( - Recommendation, - Plan.scenario_id - ).join( - PlanRecommendations, - Recommendation.id == PlanRecommendations.recommendation_id - ).join( - Plan, - Plan.id == PlanRecommendations.plan_id - ).filter( - PlanRecommendations.plan_id.in_(plan_ids), - Recommendation.default.is_(True), - Recommendation.already_installed.is_(False) - ).all() + recommendations_query = ( + session.query(Recommendation, PlanModel.scenario_id) + .join( + PlanRecommendations, + Recommendation.id == PlanRecommendations.recommendation_id, + ) + .join(PlanModel, PlanModel.id == PlanRecommendations.plan_id) + .filter( + PlanRecommendations.plan_id.in_(plan_ids), + Recommendation.default.is_(True), + Recommendation.already_installed.is_(False), + ) + .all() + ) recommendations_data = [ { - **{col.name: getattr(r.Recommendation, col.name) - for col in Recommendation.__table__.columns}, + **{ + col.name: getattr(r.Recommendation, col.name) + for col in Recommendation.__table__.columns + }, "scenario_id": r.scenario_id, - "materials": [] # placeholder + "materials": [], # placeholder } for r in recommendations_query ] @@ -197,23 +206,25 @@ def get_data(portfolio_id, scenario_ids): # -------------------- # Recommendation materials (SEPARATE QUERY) # -------------------- - materials_query = session.query( - RecommendationMaterials - ).filter( - RecommendationMaterials.recommendation_id.in_(recommendation_ids) - ).all() + materials_query = ( + session.query(RecommendationMaterials) + .filter(RecommendationMaterials.recommendation_id.in_(recommendation_ids)) + .all() + ) # Group materials by recommendation_id materials_by_recommendation = defaultdict(list) for m in materials_query: - materials_by_recommendation[m.recommendation_id].append({ - "material_id": m.material_id, - "depth": m.depth, - "quantity": m.quantity, - "quantity_unit": m.quantity_unit, - "estimated_cost": m.estimated_cost, - }) + materials_by_recommendation[m.recommendation_id].append( + { + "material_id": m.material_id, + "depth": m.depth, + "quantity": m.quantity, + "quantity_unit": m.quantity_unit, + "estimated_cost": m.estimated_cost, + } + ) # Attach materials safely (no filtering side effects) for r in recommendations_data: @@ -236,12 +247,11 @@ with pd.ExcelWriter("hackney.xlsx", engine="openpyxl") as writer: recommendations_df.to_excel(writer, sheet_name="recommendations", index=False) properties_df.to_excel(writer, sheet_name="properties", index=False) - + # solar_pv_recommendations = recommendations_df[recommendations_df["measure_type"] == "solar_pv"] # average_savings = solar_pv_recommendations.groupby("scenario_id")["energy_cost_savings"].mean().reset_index() - # # Check tenures # initial_asset_data = pd.read_excel( # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/m_reduced_sample_revised.py b/etl/customers/peabody/Nov 2025 Consulting Project/m_reduced_sample_revised.py index a18dc315..b7010cf7 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/m_reduced_sample_revised.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/m_reduced_sample_revised.py @@ -4,7 +4,7 @@ import pandas as pd full_sal = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final " "SAL/Depracated/20260107 corrected batch 6 sal.xlsx", - sheet_name="Standardised Asset List" + sheet_name="Standardised Asset List", ) # ------Pull in the reduced sample ------ @@ -12,7 +12,7 @@ full_sal = pd.read_excel( reduced_sal = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260112 - " "ownership filtered sal.xlsx", - sheet_name="Standardised Asset List" + sheet_name="Standardised Asset List", ) # ------ Pull in the confirmed ownership column from Peabody ------ @@ -20,18 +20,20 @@ new_asset_data = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 " "- Peabody " "- Data Extracts for Domna v2.xlsx", - sheet_name="Properties" + sheet_name="Properties", ) correct_sample = new_asset_data[ ~new_asset_data["AH Tenure"].isin( - ["Commercial", - "Freeholder", - "HOMEBUY / EQUITY LOAN", - "Leaseholder", - "Outright Sale", - "SHARED EQUITY", - "Shared Ownership"] + [ + "Commercial", + "Freeholder", + "HOMEBUY / EQUITY LOAN", + "Leaseholder", + "Outright Sale", + "SHARED EQUITY", + "Shared Ownership", + ] ) ].copy() @@ -41,9 +43,7 @@ stuff_to_add = correct_sample[ ~correct_sample["UPRN"].isin(reduced_sal["landlord_property_id"].values) ]["UPRN"].values -sal_to_add = full_sal[ - full_sal["domna_property_id"].isin(stuff_to_add) -].copy() +sal_to_add = full_sal[full_sal["domna_property_id"].isin(stuff_to_add)].copy() # ------- Stuff to remove ------- stuff_to_remove = reduced_sal[ @@ -88,7 +88,7 @@ from backend.app.db.models.portfolio import PropertyModel from backend.app.db.connection import db_session, db_read_session from sqlalchemy import select, func from sqlalchemy.orm import Session -from backend.app.db.models.recommendations import Plan +from backend.app.db.models.recommendations import PlanModel uprns_to_be_deleted = to_delete["epc_os_uprn"].values.tolist() diff --git a/etl/customers/slide_utils.py b/etl/customers/slide_utils.py index 9170ab17..5e027a56 100644 --- a/etl/customers/slide_utils.py +++ b/etl/customers/slide_utils.py @@ -7,7 +7,7 @@ from sqlalchemy.sql import true from backend.app.db.utils import row2dict from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel from backend.app.db.models.recommendations import Recommendation -from backend.app.db.models.recommendations import Plan +from backend.app.db.models.recommendations import PlanModel from backend.app.utils import sap_to_epc EPC_COLOURS = { @@ -17,7 +17,7 @@ EPC_COLOURS = { "D": "#fdd401", "E": "#fdab67", "F": "#ee8023", - "G": "#e71437" + "G": "#e71437", } @@ -33,22 +33,27 @@ def get_properties_with_default_recommendations(session: Session, portfolio_id: its associated default recommendations if any. """ # Adjust the join to correctly filter recommendations while including all properties - query = session.query(PropertyModel, Recommendation).outerjoin(Recommendation, - (Recommendation.property_id == PropertyModel.id) & ( - Recommendation.default == true())) \ - .filter(PropertyModel.portfolio_id == portfolio_id) \ + query = ( + session.query(PropertyModel, Recommendation) + .outerjoin( + Recommendation, + (Recommendation.property_id == PropertyModel.id) + & (Recommendation.default == true()), + ) + .filter(PropertyModel.portfolio_id == portfolio_id) .all() + ) properties = {} for property, recommendation in query: # Ensure the property is added once with an empty list of recommendations initially if property.id not in properties: properties[property.id] = row2dict(property) - properties[property.id]['recommendations'] = [] + properties[property.id]["recommendations"] = [] # Append recommendations if they exist and meet the criteria (already filtered by the query) if recommendation and recommendation.default: - properties[property.id]['recommendations'].append(row2dict(recommendation)) + properties[property.id]["recommendations"].append(row2dict(recommendation)) return list(properties.values()) @@ -62,11 +67,16 @@ def get_property_details_by_portfolio_id(session: Session, portfolio_id: int): :return: A list of dictionaries, where each dictionary represents a property's details. Returns an empty list if no property details are found. """ - property_details = session.query(PropertyDetailsEpcModel).filter( - PropertyDetailsEpcModel.portfolio_id == portfolio_id).all() + property_details = ( + session.query(PropertyDetailsEpcModel) + .filter(PropertyDetailsEpcModel.portfolio_id == portfolio_id) + .all() + ) # Convert the SQLAlchemy objects to dictionaries - property_details_dict = [row2dict(pd) for pd in property_details] if property_details else [] + property_details_dict = ( + [row2dict(pd) for pd in property_details] if property_details else [] + ) return property_details_dict @@ -80,7 +90,9 @@ def get_plan_by_portfolio_id(session: Session, portfolio_id: int): :return: A list of dictionaries, where each dictionary represents a plan. Returns an empty list if no plans are found. """ - plans = session.query(Plan).filter(Plan.portfolio_id == portfolio_id).all() + plans = ( + session.query(PlanModel).filter(PlanModel.portfolio_id == portfolio_id).all() + ) # Convert the SQLAlchemy objects to dictionaries plans_dict = [row2dict(plan) for plan in plans] if plans else [] @@ -88,7 +100,14 @@ def get_plan_by_portfolio_id(session: Session, portfolio_id: int): return plans_dict -def plot_epc_distribution(df, customer_key, title='Your Units', background_color='white', bar_height=0.4, font_size=15): +def plot_epc_distribution( + df, + customer_key, + title="Your Units", + background_color="white", + bar_height=0.4, + font_size=15, +): """ Plots a horizontal bar chart of EPC rating distribution with adjustable bar thickness and text sizes. Allows setting the plot background color and dynamically adjusts text size and bar spacing. @@ -100,75 +119,113 @@ def plot_epc_distribution(df, customer_key, title='Your Units', background_color :param font_size: Base font size for text annotations (default 15) """ # Calculate dynamic figure size or adjust based on preferences - square_size = max(6, len(df) * 0.6) # Ensure minimum size and adjust based on number of entries + square_size = max( + 6, len(df) * 0.6 + ) # Ensure minimum size and adjust based on number of entries fig, ax = plt.subplots(figsize=(square_size, square_size)) fig.patch.set_facecolor(background_color) # Set figure background color ax.set_facecolor(background_color) # Set axes background color - df['percentage'] = df['percentage'].round(1) # Round the percentage values to 1 decimal place - df_sorted = df.sort_values('percentage', ascending=True) + df["percentage"] = df["percentage"].round( + 1 + ) # Round the percentage values to 1 decimal place + df_sorted = df.sort_values("percentage", ascending=True) # Plot bars with specified height for adjustable thickness - bars = ax.barh(df_sorted['current_epc_rating'], df_sorted['percentage'], - color=df_sorted['current_epc_rating'].map(EPC_COLOURS), edgecolor='none', height=bar_height) + bars = ax.barh( + df_sorted["current_epc_rating"], + df_sorted["percentage"], + color=df_sorted["current_epc_rating"].map(EPC_COLOURS), + edgecolor="none", + height=bar_height, + ) - epc_rating_font_size = font_size * 2 # EPC rating font size larger than base font size - count_percentage_font_size = font_size # Count (percentage) font size as base font size + epc_rating_font_size = ( + font_size * 2 + ) # EPC rating font size larger than base font size + count_percentage_font_size = ( + font_size # Count (percentage) font size as base font size + ) # Annotate bars with EPC ratings inside and count with percentage values outside for index, bar in enumerate(bars): width = bar.get_width() - epc_rating = df_sorted.iloc[index]['current_epc_rating'] - count = df_sorted.iloc[index]['count'] - percentage = df_sorted.iloc[index]['percentage'] + epc_rating = df_sorted.iloc[index]["current_epc_rating"] + count = df_sorted.iloc[index]["count"] + percentage = df_sorted.iloc[index]["percentage"] # EPC rating inside the bar with increased font size - ax.text(width - (width * 0.05), bar.get_y() + bar.get_height() / 2, - f"{epc_rating}", va='center', ha='right', color='white', fontsize=epc_rating_font_size) + ax.text( + width - (width * 0.05), + bar.get_y() + bar.get_height() / 2, + f"{epc_rating}", + va="center", + ha="right", + color="white", + fontsize=epc_rating_font_size, + ) # Count and percentage outside the bar, original font size - ax.text(width + 1, bar.get_y() + bar.get_height() / 2, - f"{count} ({percentage}%)", va='center', color='black', fontsize=count_percentage_font_size) + ax.text( + width + 1, + bar.get_y() + bar.get_height() / 2, + f"{count} ({percentage}%)", + va="center", + color="black", + fontsize=count_percentage_font_size, + ) - ax.set_title(title, fontsize=font_size * 1.2) # Adjust title font size proportionally - ax.tick_params(axis='x', which='both', bottom=False, top=False, - labelbottom=False) # Remove x-axis tick marks and values - ax.tick_params(axis='y', which='both', left=False, right=False, - labelleft=False) # Remove y-axis tick marks and labels - ax.spines['top'].set_visible(False) # Remove top spine - ax.spines['right'].set_visible(False) # Remove right spine - ax.spines['left'].set_visible(False) # Remove left spine - ax.spines['bottom'].set_visible(False) # Remove bottom spine + ax.set_title( + title, fontsize=font_size * 1.2 + ) # Adjust title font size proportionally + ax.tick_params( + axis="x", which="both", bottom=False, top=False, labelbottom=False + ) # Remove x-axis tick marks and values + ax.tick_params( + axis="y", which="both", left=False, right=False, labelleft=False + ) # Remove y-axis tick marks and labels + ax.spines["top"].set_visible(False) # Remove top spine + ax.spines["right"].set_visible(False) # Remove right spine + ax.spines["left"].set_visible(False) # Remove left spine + ax.spines["bottom"].set_visible(False) # Remove bottom spine plt.tight_layout() # Adjust layout plt.show() # Save the figure as an image - figure_path = f'etl/customers/{customer_key}/epc_distribution_plot.png' - fig.savefig(figure_path, bbox_inches='tight') + figure_path = f"etl/customers/{customer_key}/epc_distribution_plot.png" + fig.savefig(figure_path, bbox_inches="tight") plt.close(fig) # Close the figure to free memory return fig, figure_path -def save_plot_to_image(figure, path='plot.png'): +def save_plot_to_image(figure, path="plot.png"): """ Saves a matplotlib figure to an image file for insertion into PowerPoint. """ - figure.savefig(path, bbox_inches='tight') + figure.savefig(path, bbox_inches="tight") plt.close(figure) -def save_figure_as_image(figure, filename='temp_plot.png'): +def save_figure_as_image(figure, filename="temp_plot.png"): """ Saves a matplotlib figure to an image file. """ figure.savefig(filename, dpi=300) - plt.close(figure) # Close the figure to prevent it from displaying in notebooks or Python environments + plt.close( + figure + ) # Close the figure to prevent it from displaying in notebooks or Python environments -def add_commentary_with_bullets(slide, commentary, top_inches, left_inches=Inches(1), width_inches=Inches(8), - height_inches=Inches(2)): +def add_commentary_with_bullets( + slide, + commentary, + top_inches, + left_inches=Inches(1), + width_inches=Inches(8), + height_inches=Inches(2), +): """ Adds commentary with bullet points to a slide. @@ -179,7 +236,9 @@ def add_commentary_with_bullets(slide, commentary, top_inches, left_inches=Inche :param width_inches: The width of the commentary text box. :param height_inches: The height of the commentary text box. """ - txBox = slide.shapes.add_textbox(left_inches, top_inches, width_inches, height_inches) + txBox = slide.shapes.add_textbox( + left_inches, top_inches, width_inches, height_inches + ) tf = txBox.text_frame # Configure text frame @@ -192,7 +251,9 @@ def add_commentary_with_bullets(slide, commentary, top_inches, left_inches=Inche for i, section in enumerate(sections): if i > 0: - p = tf.add_paragraph() # Add a new paragraph for each section after the first + p = ( + tf.add_paragraph() + ) # Add a new paragraph for each section after the first else: p = tf.paragraphs[0] # Use the first paragraph for the first section p.text = section @@ -215,7 +276,9 @@ def add_slide_with_image(prs, title, img_path=None, commentary=None): # Determine the position of the commentary text box based on whether an image is included if img_path: # Add the image - slide.shapes.add_picture(img_path, Inches(1), Inches(1.5), Inches(8), Inches(4.5)) + slide.shapes.add_picture( + img_path, Inches(1), Inches(1.5), Inches(8), Inches(4.5) + ) # Position for commentary when image is present commentary_top = Inches(6) else: @@ -237,16 +300,18 @@ def create_powerpoint(data, save_location): prs = Presentation() for slide, slide_data in data.items(): - slide_figure_path = data[slide].get('image_path') - text = data[slide].get('text') - title = data[slide].get('title', "") + slide_figure_path = data[slide].get("image_path") + text = data[slide].get("text") + title = data[slide].get("title", "") add_slide_with_image(prs, title, slide_figure_path, text) # Save the presentation prs.save(save_location) -def create_recommendations_summary(recommendations_df, properties_df, property_details_df, sap_target): +def create_recommendations_summary( + recommendations_df, properties_df, property_details_df, sap_target +): # Aggregate the impact of the recommendations # We want: # Total number of sap points @@ -254,40 +319,52 @@ def create_recommendations_summary(recommendations_df, properties_df, property_d # total bill savings # total cost # Total Co2 impact - recommendations_summary = recommendations_df.groupby(["property_id"]).agg( - total_sap_points=("sap_points", "sum"), - total_valuation_impact=("property_valuation_increase", "sum"), - total_bill_savings=("energy_cost_savings", "sum"), - total_cost=("estimated_cost", "sum"), - total_carbon=("co2_equivalent_savings", "sum"), - adjusted_heat_demand=("adjusted_heat_demand", "sum") - ).reset_index() + recommendations_summary = ( + recommendations_df.groupby(["property_id"]) + .agg( + total_sap_points=("sap_points", "sum"), + total_valuation_impact=("property_valuation_increase", "sum"), + total_bill_savings=("energy_cost_savings", "sum"), + total_cost=("estimated_cost", "sum"), + total_carbon=("co2_equivalent_savings", "sum"), + adjusted_heat_demand=("adjusted_heat_demand", "sum"), + ) + .reset_index() + ) # Merge on current sap points, current CO2, current adjusted_heat_demand, current annual bill recommendations_summary = recommendations_summary.merge( - properties_df[["id", "uprn", "current_sap_points"]].rename(columns={"id": "property_id"}), on="property_id", - how="left" + properties_df[["id", "uprn", "current_sap_points"]].rename( + columns={"id": "property_id"} + ), + on="property_id", + how="left", ) recommendations_summary["expected_sap_points"] = ( - recommendations_summary["current_sap_points"] + recommendations_summary["total_sap_points"] + recommendations_summary["current_sap_points"] + + recommendations_summary["total_sap_points"] ) - recommendations_summary["expected_epc_rating"] = recommendations_summary["expected_sap_points"].apply( - lambda x: sap_to_epc(x) + recommendations_summary["expected_epc_rating"] = recommendations_summary[ + "expected_sap_points" + ].apply(lambda x: sap_to_epc(x)) + recommendations_summary["sap_difference"] = ( + sap_target - recommendations_summary["expected_sap_points"] ) - recommendations_summary["sap_difference"] = sap_target - recommendations_summary["expected_sap_points"] if property_details_df is not None: recommendations_summary = recommendations_summary.merge( - property_details_df[["uprn", "co2_emissions", "adjusted_energy_consumption", "energy_bill"]].rename( + property_details_df[ + ["uprn", "co2_emissions", "adjusted_energy_consumption", "energy_bill"] + ].rename( columns={ "id": "property_id", "co2_emissions": "current_co2", "adjusted_energy_consumption": "current_energy", - "energy_bill": "current_energy_bill" + "energy_bill": "current_energy_bill", } ), on="uprn", - how="left" + how="left", ) return recommendations_summary diff --git a/pytest.ini b/pytest.ini index ee203d46..9c9f8234 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,4 @@ [pytest] pythonpath = . addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial -testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests +testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests backend/categorisation/tests diff --git a/sfr/principal_pitch/2_export_data.py b/sfr/principal_pitch/2_export_data.py index b62e51d7..42d70a3a 100644 --- a/sfr/principal_pitch/2_export_data.py +++ b/sfr/principal_pitch/2_export_data.py @@ -13,7 +13,7 @@ from sqlalchemy.orm import sessionmaker from backend.app.db.connection import db_engine, db_read_session from backend.app.db.models.recommendations import ( Recommendation, - Plan, + PlanModel, PlanRecommendations, RecommendationMaterials, ) @@ -73,12 +73,12 @@ def get_data(portfolio_id, scenario_ids): # -------------------- latest_plans_subq = ( session.query( - Plan.scenario_id, - Plan.property_id, - func.max(Plan.created_at).label("latest_created_at"), + PlanModel.scenario_id, + PlanModel.property_id, + func.max(PlanModel.created_at).label("latest_created_at"), ) - .filter(Plan.scenario_id.in_(scenario_ids)) - .group_by(Plan.scenario_id, Plan.property_id) + .filter(PlanModel.scenario_id.in_(scenario_ids)) + .group_by(PlanModel.scenario_id, PlanModel.property_id) .subquery() ) @@ -87,12 +87,12 @@ def get_data(portfolio_id, scenario_ids): # ).all() plans_query = ( - session.query(Plan) + session.query(PlanModel) .join( latest_plans_subq, - (Plan.scenario_id == latest_plans_subq.c.scenario_id) - & (Plan.property_id == latest_plans_subq.c.property_id) - & (Plan.created_at == latest_plans_subq.c.latest_created_at), + (PlanModel.scenario_id == latest_plans_subq.c.scenario_id) + & (PlanModel.property_id == latest_plans_subq.c.property_id) + & (PlanModel.created_at == latest_plans_subq.c.latest_created_at), ) .all() ) @@ -108,7 +108,7 @@ def get_data(portfolio_id, scenario_ids): # ) plans_data = [ - {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} + {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns} for plan in plans_query ] @@ -118,12 +118,14 @@ def get_data(portfolio_id, scenario_ids): # Recommendations (NO materials yet) # -------------------- recommendations_query = ( - session.query(Recommendation, Plan.scenario_id, PlanRecommendations.plan_id) + session.query( + Recommendation, PlanModel.scenario_id, PlanRecommendations.plan_id + ) .join( PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id, ) - .join(Plan, Plan.id == PlanRecommendations.plan_id) + .join(PlanModel, PlanModel.id == PlanRecommendations.plan_id) .filter( PlanRecommendations.plan_id.in_(plan_ids), Recommendation.default.is_(True), diff --git a/utils/logger.py b/utils/logger.py index d643f36a..45370d3d 100644 --- a/utils/logger.py +++ b/utils/logger.py @@ -1,7 +1,13 @@ import logging +from os import PathLike +from typing import Optional, Union -def setup_logger(log_file=None, level=logging.INFO, overwrite_handler=False): +def setup_logger( + log_file: Optional[Union[str, PathLike[str]]] = None, + level: int = logging.INFO, + overwrite_handler: bool = False, +) -> logging.Logger: # Create a logger and set the logging level logger = logging.getLogger() logger.setLevel(level) diff --git a/utils/s3.py b/utils/s3.py index 2e67d4f0..b243b2ab 100644 --- a/utils/s3.py +++ b/utils/s3.py @@ -17,11 +17,11 @@ def read_from_s3(bucket_name, s3_file_name): :param s3_file_name: The file name to use for the saved data in S3 """ # Initialize a session using Amazon S3 - s3 = boto3.resource('s3') + s3 = boto3.resource("s3") # Get the MessagePack data from S3 obj = s3.Object(bucket_name, s3_file_name) - data = obj.get()['Body'].read() + data = obj.get()["Body"].read() return data @@ -36,7 +36,7 @@ def save_data_to_s3(data, bucket_name, s3_file_name): """ # Ensure you have AWS credentials set up - either via environment variables, AWS CLI, or IAM roles try: - s3 = boto3.client('s3') + s3 = boto3.client("s3") except NoCredentialsError: print("Credentials not available.") return @@ -46,12 +46,12 @@ def save_data_to_s3(data, bucket_name, s3_file_name): try: s3.put_object(Bucket=bucket_name, Key=s3_file_name, Body=data) - print(f'Successfully uploaded data to {bucket_name}/{s3_file_name}') + print(f"Successfully uploaded data to {bucket_name}/{s3_file_name}") except Exception as e: - print(f'Failed to upload data to {bucket_name}/{s3_file_name}: {str(e)}') + print(f"Failed to upload data to {bucket_name}/{s3_file_name}: {str(e)}") -def read_io_from_s3(bucket_name, file_key): +def read_io_from_s3(bucket_name: str, file_key: str) -> BytesIO: """ Read a file from S3 into a BytesIO object. This can be used by other methods to parse the response @@ -61,13 +61,13 @@ def read_io_from_s3(bucket_name, file_key): :param file_key: The file name of the shapefile in S3 :return: Io file to be parsed by another method """ - client = boto3.client('s3') + client = boto3.client("s3") # Get the Parquet file from S3 response = client.get_object(Bucket=bucket_name, Key=file_key) # Read the file into an io object - buffer = BytesIO(response['Body'].read()) + buffer = BytesIO(response["Body"].read()) return buffer @@ -86,7 +86,7 @@ def save_dataframe_to_s3_parquet(df, bucket_name, file_key): df.to_parquet(parquet_buffer) # Create the boto3 client - client = boto3.client('s3') + client = boto3.client("s3") # Upload the Parquet file to S3 client.put_object(Bucket=bucket_name, Key=file_key, Body=parquet_buffer.getvalue()) @@ -102,15 +102,14 @@ def read_dataframe_from_s3_parquet(bucket_name, file_key): """ if bucket_name is None: - raise ValueError("Bucket name is None when trying to read dataframe from parquet") + raise ValueError( + "Bucket name is None when trying to read dataframe from parquet" + ) if not file_key.endswith(".parquet"): raise ValueError("This file doesn't look like a parquet file") - parquet_buffer = read_io_from_s3( - bucket_name=bucket_name, - file_key=file_key - ) + parquet_buffer = read_io_from_s3(bucket_name=bucket_name, file_key=file_key) df = pd.read_parquet(parquet_buffer) @@ -130,7 +129,7 @@ def save_csv_to_s3(dataframe, bucket_name, file_name): bool: True if the file was successfully saved, False otherwise. """ # Initialize S3 client - s3 = boto3.client('s3') + s3 = boto3.client("s3") # Create an in-memory text stream csv_buffer = StringIO() @@ -159,7 +158,7 @@ def save_pickle_to_s3(data, bucket_name, s3_file_name): try: serialized_data = pickle.dumps(data) except Exception as e: - print(f'Failed to serialize data: {str(e)}') + print(f"Failed to serialize data: {str(e)}") return # Use save_data_to_s3 function to upload the serialized data to S3 @@ -175,9 +174,9 @@ def read_pickle_from_s3(bucket_name, s3_file_name): :return: The data read from the pickle file """ try: - s3 = boto3.client('s3') + s3 = boto3.client("s3") s3_response = s3.get_object(Bucket=bucket_name, Key=s3_file_name) - serialized_data = s3_response['Body'].read() + serialized_data = s3_response["Body"].read() except NoCredentialsError: logger.errpr("Credentials not available.") return None @@ -185,20 +184,24 @@ def read_pickle_from_s3(bucket_name, s3_file_name): logger.errpr("Incomplete credentials provided.") return None except Exception as e: - logger.error(f'Failed to download data from {bucket_name}/{s3_file_name}: {str(e)}') + logger.error( + f"Failed to download data from {bucket_name}/{s3_file_name}: {str(e)}" + ) return None # Deserialize data from pickle format try: data = pickle.loads(serialized_data) except Exception as e: - logger.error(f'Failed to deserialize data: {str(e)}') + logger.error(f"Failed to deserialize data: {str(e)}") return None return data -def read_excel_from_s3(bucket_name, file_key, header_row, drop_all_na=True, sheet_name=None): +def read_excel_from_s3( + bucket_name, file_key, header_row, drop_all_na=True, sheet_name=None +): """ Read an Excel file from an S3 bucket and return it as a pandas DataFrame. @@ -222,7 +225,7 @@ def read_excel_from_s3(bucket_name, file_key, header_row, drop_all_na=True, shee # Drop columns where all values are NaN if drop_all_na: - df.dropna(axis=1, how='all', inplace=True) + df.dropna(axis=1, how="all", inplace=True) # Reset index if the first column is just an index or entirely NaN df.reset_index(drop=True, inplace=True) @@ -254,7 +257,7 @@ def save_excel_to_s3(df, bucket_name, file_key): # Initialize a session using boto3 session = boto3.session.Session() - s3 = session.resource('s3') + s3 = session.resource("s3") # Upload the Excel file from the buffer to S3 bucket = s3.Bucket(bucket_name) @@ -264,17 +267,19 @@ def save_excel_to_s3(df, bucket_name, file_key): def read_csv_from_s3(bucket_name, filepath): - logger.info(f"Reading CSV file from S3 bucket '{bucket_name}' with key '{filepath}'") - s3 = boto3.client('s3') + logger.info( + f"Reading CSV file from S3 bucket '{bucket_name}' with key '{filepath}'" + ) + s3 = boto3.client("s3") # Get the object from s3 s3_object = s3.get_object(Bucket=bucket_name, Key=filepath) # Read the CSV body from the s3 object - body = s3_object['Body'].read() + body = s3_object["Body"].read() # Use StringIO to create a file-like object from the string - csv_data = StringIO(body.decode('utf-8')) + csv_data = StringIO(body.decode("utf-8")) # Use csv library to read it into a list of dictionaries reader = csv.DictReader(csv_data) @@ -292,14 +297,16 @@ def list_files_in_s3_folder(bucket_name, folder_name): :return: A list of file keys in the specified S3 folder. """ try: - s3 = boto3.client('s3') + s3 = boto3.client("s3") response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_name) - if 'Contents' not in response: - logger.info(f"No files found in folder {folder_name} in bucket {bucket_name}.") + if "Contents" not in response: + logger.info( + f"No files found in folder {folder_name} in bucket {bucket_name}." + ) return [] - file_keys = [content['Key'] for content in response['Contents']] + file_keys = [content["Key"] for content in response["Contents"]] return file_keys except NoCredentialsError: @@ -309,7 +316,9 @@ def list_files_in_s3_folder(bucket_name, folder_name): logger.error("Incomplete credentials provided.") return [] except Exception as e: - logger.error(f'Failed to list files in folder {folder_name} in bucket {bucket_name}: {str(e)}') + logger.error( + f"Failed to list files in folder {folder_name} in bucket {bucket_name}: {str(e)}" + ) return [] @@ -335,22 +344,30 @@ def list_files_and_subfolders_in_s3_folder(bucket_name, folder_name): """ # For this function, folder_name should end with a forward slash - if not folder_name.endswith('/'): - folder_name += '/' + if not folder_name.endswith("/"): + folder_name += "/" try: - s3 = boto3.client('s3') - response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_name, Delimiter='/') + s3 = boto3.client("s3") + response = s3.list_objects_v2( + Bucket=bucket_name, Prefix=folder_name, Delimiter="/" + ) items = [] # Add files to the list - if 'Contents' in response: - items.extend([content['Key'] for content in response['Contents'] if content['Key'] != folder_name]) + if "Contents" in response: + items.extend( + [ + content["Key"] + for content in response["Contents"] + if content["Key"] != folder_name + ] + ) # Add immediate subfolders to the list - if 'CommonPrefixes' in response: - items.extend([prefix['Prefix'] for prefix in response['CommonPrefixes']]) + if "CommonPrefixes" in response: + items.extend([prefix["Prefix"] for prefix in response["CommonPrefixes"]]) return items @@ -361,7 +378,9 @@ def list_files_and_subfolders_in_s3_folder(bucket_name, folder_name): logger.error("Incomplete credentials provided.") return [] except Exception as e: - logger.error(f'Failed to list files and subfolders in folder {folder_name} in bucket {bucket_name}: {str(e)}') + logger.error( + f"Failed to list files and subfolders in folder {folder_name} in bucket {bucket_name}: {str(e)}" + ) return [] @@ -374,15 +393,21 @@ def list_xmls_in_s3_folder(bucket_name, folder_name): :return: A list of XML file keys in the specified S3 folder. """ try: - s3 = boto3.client('s3') + s3 = boto3.client("s3") response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_name) - if 'Contents' not in response: - logger.info(f"No files found in folder {folder_name} in bucket {bucket_name}.") + if "Contents" not in response: + logger.info( + f"No files found in folder {folder_name} in bucket {bucket_name}." + ) return [] # Filter XML files - xml_files = [content['Key'] for content in response['Contents'] if content['Key'].endswith('.xml')] + xml_files = [ + content["Key"] + for content in response["Contents"] + if content["Key"].endswith(".xml") + ] return xml_files except NoCredentialsError: @@ -392,5 +417,7 @@ def list_xmls_in_s3_folder(bucket_name, folder_name): logger.error("Incomplete credentials provided.") return [] except Exception as e: - logger.error(f'Failed to list XML files in folder {folder_name} in bucket {bucket_name}: {str(e)}') + logger.error( + f"Failed to list XML files in folder {folder_name} in bucket {bucket_name}: {str(e)}" + ) return []