From b3fa7c3051b22e76f8c7a6d3a375d72ebe6ad0df Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 12 Feb 2026 12:01:39 +0000 Subject: [PATCH] rename Plan and Scenario to PlanModel and ScenarioModel --- backend/Outputs.py | 241 +++--- .../app/db/functions/portfolio_functions.py | 30 +- .../db/functions/recommendations_functions.py | 24 +- backend/app/db/models/funding.py | 45 +- backend/app/db/models/recommendations.py | 4 +- .../categorisation/categorisation_logic.py | 6 +- backend/categorisation/processor.py | 8 +- etl/customers/l_and_g/ic_slides.py | 161 ++-- .../mod/pilot/2. Create Excel Model.py | 469 +++++++---- etl/customers/newhaven/slides.py | 773 +++++++++++------- .../d_restart_failed_subtasks.py | 43 +- .../f_diagnostics.py | 74 +- .../g_rebaselining_installed_measrues.py | 761 +++++++++-------- .../h_reset_estimated_epcs.py | 100 ++- .../k_deck_stats.py | 114 +-- .../m_reduced_sample_revised.py | 28 +- etl/customers/slide_utils.py | 213 +++-- sfr/principal_pitch/2_export_data.py | 28 +- 18 files changed, 1892 insertions(+), 1230 deletions(-) diff --git a/backend/Outputs.py b/backend/Outputs.py index f9538709..7111e4d3 100644 --- a/backend/Outputs.py +++ b/backend/Outputs.py @@ -8,7 +8,11 @@ from utils.s3 import read_from_s3, save_excel_to_s3 from backend.app.utils import sap_to_epc from backend.app.db.connection import db_engine from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel -from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations +from backend.app.db.models.recommendations import ( + Recommendation, + PlanModel, + PlanRecommendations, +) class Outputs: @@ -42,7 +46,7 @@ class Outputs: "flat_roof_insulation": "Flat roof (Out of scope - prov sum only)", "room_in_roof_insulation": "RIR (POA - Prov sum only)", "ev_charging": "EV Charging", - "battery": "Battery" + "battery": "Battery", } def __init__(self, format, portfolio_id): @@ -67,28 +71,38 @@ class Outputs: # Download cleaned data self.cleaned_epc_lookup = read_from_s3( s3_file_name="cleaned_epc_data/cleaned.bson", - bucket_name="retrofit-data-dev" + bucket_name="retrofit-data-dev", ) self.cleaned_epc_lookup = msgpack.unpackb(self.cleaned_epc_lookup, raw=False) def get_properties_from_db(self): # Get properties and their details for a specific portfolio - properties_query = self.session.query( - PropertyModel, - PropertyDetailsEpcModel - ).join( - PropertyDetailsEpcModel, - PropertyModel.id == PropertyDetailsEpcModel.property_id - ).filter( - PropertyModel.portfolio_id == self.portfolio_id # Filter by portfolio ID - ).all() + properties_query = ( + self.session.query(PropertyModel, PropertyDetailsEpcModel) + .join( + PropertyDetailsEpcModel, + PropertyModel.id == PropertyDetailsEpcModel.property_id, + ) + .filter( + PropertyModel.portfolio_id + == self.portfolio_id # Filter by portfolio ID + ) + .all() + ) # Transform properties data to include all fields dynamically properties_data = [ - {**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns}, - **{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in - PropertyDetailsEpcModel.__table__.columns}} + { + **{ + col.name: getattr(prop.PropertyModel, col.name) + for col in PropertyModel.__table__.columns + }, + **{ + col.name: getattr(prop.PropertyDetailsEpcModel, col.name) + for col in PropertyDetailsEpcModel.__table__.columns + }, + } for prop in properties_query ] @@ -96,10 +110,14 @@ class Outputs: def get_plans_from_db(self): - plans_query = self.session.query(Plan).filter(Plan.portfolio_id == self.portfolio_id).all() + plans_query = ( + self.session.query(PlanModel) + .filter(PlanModel.portfolio_id == self.portfolio_id) + .all() + ) # Transform plans data to include all fields dynamically plans_data = [ - {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} + {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns} for plan in plans_query ] @@ -107,28 +125,38 @@ class Outputs: def get_recommendations_from_db(self, plan_ids): # Get recommendations through PlanRecommendations for those plans and that are default - recommendations_query = self.session.query( - Recommendation, - Plan.scenario_id - ).join( - PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id - ).join( - Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id - ).filter( - PlanRecommendations.plan_id.in_(plan_ids), - Recommendation.default == True # Filtering for default recommendations - ).all() + recommendations_query = ( + self.session.query(Recommendation, PlanModel.scenario_id) + .join( + PlanRecommendations, + Recommendation.id == PlanRecommendations.recommendation_id, + ) + .join( + PlanModel, + PlanModel.id + == PlanRecommendations.plan_id, # Join with Plan to access scenario_id + ) + .filter( + PlanRecommendations.plan_id.in_(plan_ids), + Recommendation.default == True, # Filtering for default recommendations + ) + .all() + ) # Transform recommendations data to include all fields dynamically and include scenario_id recommendations_data = [ { **{ - col.name: getattr(rec.Recommendation, col.name) if - hasattr(rec, 'Recommendation') else getattr(rec, col.name) + col.name: ( + getattr(rec.Recommendation, col.name) + if hasattr(rec, "Recommendation") + else getattr(rec, col.name) + ) for col in Recommendation.__table__.columns }, - "Scenario ID": rec.scenario_id - } for rec in recommendations_query + "Scenario ID": rec.scenario_id, + } + for rec in recommendations_query ] return recommendations_data @@ -148,7 +176,9 @@ class Outputs: measure_label = self.MDS_MEASURE_MAPPING.get(measure_type, None) # If the property_id already exists in the collected rows, update it - existing_row = next((item for item in rows if item["property_id"] == property_id), None) + existing_row = next( + (item for item in rows if item["property_id"] == property_id), None + ) if existing_row is None: # Create a new row if the property_id doesn't exist new_row = {measure: None for measure in all_measures} @@ -196,7 +226,7 @@ class Outputs: properties_data = self.get_properties_from_db() plans_data = self.get_plans_from_db() - plan_ids = [plan['id'] for plan in plans_data] + plan_ids = [plan["id"] for plan in plans_data] recommendations_data = self.get_recommendations_from_db(plan_ids) self.session.close() @@ -209,50 +239,54 @@ class Outputs: scenario_ids = plans_df["scenario_id"].unique() # We start to create the MDS sheet - mds = properties_df[ - [ - "property_id", - "address", - "postcode", - "uprn", - "current_epc_rating", - "current_sap_points", - "primary_energy_consumption", - "property_type", - "built_form", - "total_floor_area", - "walls", - "tenure", - "mainfuel", - # The bills columns are split out - we include them and aggregate, without appliances - "heating_cost_current", - "hot_water_cost_current", - "lighting_cost_current", - "gas_standing_charge", - "electricity_standing_charge" + mds = ( + properties_df[ + [ + "property_id", + "address", + "postcode", + "uprn", + "current_epc_rating", + "current_sap_points", + "primary_energy_consumption", + "property_type", + "built_form", + "total_floor_area", + "walls", + "tenure", + "mainfuel", + # The bills columns are split out - we include them and aggregate, without appliances + "heating_cost_current", + "hot_water_cost_current", + "lighting_cost_current", + "gas_standing_charge", + "electricity_standing_charge", + ] ] - ].copy().rename( - columns={ - "address": "Address", - "postcode": "Postcode", - "uprn": "UPRN", - "current_epc_rating": "Pre EPC", - "current_sap_points": "EPC Source", - "primary_energy_consumption": "Existing Heating Demand Kwh/m2/y", - "property_type": "Property Type", - "built_form": "Built Form", - "total_floor_area": "Floor area m2 (If known)", - "walls": "Wall Type (Mandatory field)", - "tenure": "Tenure", - } + .copy() + .rename( + columns={ + "address": "Address", + "postcode": "Postcode", + "uprn": "UPRN", + "current_epc_rating": "Pre EPC", + "current_sap_points": "EPC Source", + "primary_energy_consumption": "Existing Heating Demand Kwh/m2/y", + "property_type": "Property Type", + "built_form": "Built Form", + "total_floor_area": "Floor area m2 (If known)", + "walls": "Wall Type (Mandatory field)", + "tenure": "Tenure", + } + ) ) mds["Estimated bill (£ per year)"] = ( - mds["heating_cost_current"] + - mds["hot_water_cost_current"] + - mds["lighting_cost_current"] + - mds["gas_standing_charge"] + - mds["electricity_standing_charge"] + mds["heating_cost_current"] + + mds["hot_water_cost_current"] + + mds["lighting_cost_current"] + + mds["gas_standing_charge"] + + mds["electricity_standing_charge"] ) mds = mds.drop( @@ -261,65 +295,84 @@ class Outputs: "hot_water_cost_current", "lighting_cost_current", "gas_standing_charge", - "electricity_standing_charge" + "electricity_standing_charge", ] ) # Formatting - Pre EPC is an enum mds["Pre EPC"] = [x.value for x in mds["Pre EPC"].values] - mds["Wall Type (Mandatory field)"] = mds["Wall Type (Mandatory field)"].str.split(",").str[0] + mds["Wall Type (Mandatory field)"] = ( + mds["Wall Type (Mandatory field)"].str.split(",").str[0] + ) # Remove average thermal transmittance field mds["Wall Type (Mandatory field)"] = np.where( - mds["Wall Type (Mandatory field)"].str.contains("Average thermal transmittance"), + mds["Wall Type (Mandatory field)"].str.contains( + "Average thermal transmittance" + ), "", - mds["Wall Type (Mandatory field)"] + mds["Wall Type (Mandatory field)"], ) mds = mds.merge( - pd.DataFrame(self.cleaned_epc_lookup["main-fuel"])[["clean_description", "fuel_type"]], + pd.DataFrame(self.cleaned_epc_lookup["main-fuel"])[ + ["clean_description", "fuel_type"] + ], left_on="mainfuel", right_on="clean_description", - how="left" + how="left", + ) + mds = mds.rename(columns={"fuel_type": "Existing Fuel Type"}).drop( + columns=["clean_description", "mainfuel"] ) - mds = mds.rename(columns={"fuel_type": "Existing Fuel Type"}).drop(columns=["clean_description", "mainfuel"]) mds["Existing Fuel Type"].value_counts() mds_output_by_scenario = {} for scenario_id in scenario_ids: - scenario_recommendations = recommendations_df[recommendations_df["Scenario ID"] == scenario_id] + scenario_recommendations = recommendations_df[ + recommendations_df["Scenario ID"] == scenario_id + ] # For each measure, we create the measure matrix - scenario_measure_matrix = self.make_mds_measure_matrix(scenario_recommendations) + scenario_measure_matrix = self.make_mds_measure_matrix( + scenario_recommendations + ) # Calculate the predicted impact on: SAP, heat demand, bills, kwh - recommendation_impacts = scenario_recommendations.groupby("property_id")[ - ["sap_points", "heat_demand", "kwh_savings", "energy_cost_savings"] - ].sum().reset_index() + recommendation_impacts = ( + scenario_recommendations.groupby("property_id")[ + ["sap_points", "heat_demand", "kwh_savings", "energy_cost_savings"] + ] + .sum() + .reset_index() + ) scenario_mds = mds.merge( scenario_measure_matrix, how="left", on="property_id" - ).merge( - recommendation_impacts, how="left", on="property_id" - ) + ).merge(recommendation_impacts, how="left", on="property_id") # If we have no recommendations, sap_points, kwh_savings, head_demand will be NaN to_clean = [c for c in recommendation_impacts.columns if c != "property_id"] for col in to_clean: scenario_mds[col].fillna(0, inplace=True) scenario_mds.fillna(0, inplace=True) - scenario_mds["Post SAP"] = scenario_mds["EPC Source"] + scenario_mds["sap_points"] + scenario_mds["Post SAP"] = ( + scenario_mds["EPC Source"] + scenario_mds["sap_points"] + ) # Round Post SAP down to the nearest integer scenario_mds["Post SAP"] = scenario_mds["Post SAP"].apply(lambda x: int(x)) - scenario_mds["Post EPC"] = scenario_mds["Post SAP"].apply(lambda x: sap_to_epc(x)) + scenario_mds["Post EPC"] = scenario_mds["Post SAP"].apply( + lambda x: sap_to_epc(x) + ) scenario_mds["Heating Demand Kwh/m2/y"] = ( - scenario_mds["Existing Heating Demand Kwh/m2/y"] - scenario_mds["heat_demand"] + scenario_mds["Existing Heating Demand Kwh/m2/y"] + - scenario_mds["heat_demand"] ) scenario_mds = scenario_mds.rename( columns={ "sap_points": "Predicted SAP Points", "kwh_savings": "Energy Saving (Kwh)", - "energy_cost_savings": "Bill Reduction (£ per yr)" + "energy_cost_savings": "Bill Reduction (£ per yr)", } ) @@ -330,7 +383,7 @@ class Outputs: save_excel_to_s3( df=scenario_mds, file_key=f"engine_outputs/{self.format}/{self.today}_scenario_id={scenario_id}.xlsx", - bucket_name="retrofit-data-dev" + bucket_name="retrofit-data-dev", ) def export(self): diff --git a/backend/app/db/functions/portfolio_functions.py b/backend/app/db/functions/portfolio_functions.py index fa97c206..ae48afed 100644 --- a/backend/app/db/functions/portfolio_functions.py +++ b/backend/app/db/functions/portfolio_functions.py @@ -1,5 +1,10 @@ from sqlalchemy import func -from backend.app.db.models.recommendations import Plan, PlanRecommendations, Recommendation, Scenario +from backend.app.db.models.recommendations import ( + PlanModel, + PlanRecommendations, + Recommendation, + ScenarioModel, +) def aggregate_portfolio_recommendations( @@ -8,7 +13,7 @@ def aggregate_portfolio_recommendations( scenario_id: int, total_valuation_increase: float, labour_days: float, - aggregated_data: dict + aggregated_data: dict, ): # Aggregate multiple fields aggregates = ( @@ -16,15 +21,20 @@ def aggregate_portfolio_recommendations( func.sum(Recommendation.estimated_cost).label("cost"), func.sum(Recommendation.total_work_hours).label("total_work_hours"), func.sum(Recommendation.kwh_savings).label("energy_savings"), - func.sum(Recommendation.co2_equivalent_savings).label("co2_equivalent_savings"), + func.sum(Recommendation.co2_equivalent_savings).label( + "co2_equivalent_savings" + ), func.sum(Recommendation.energy_cost_savings).label("energy_cost_savings"), ) - .join(PlanRecommendations, PlanRecommendations.recommendation_id == Recommendation.id) - .join(Plan, Plan.id == PlanRecommendations.plan_id) + .join( + PlanRecommendations, + PlanRecommendations.recommendation_id == Recommendation.id, + ) + .join(PlanModel, PlanModel.id == PlanRecommendations.plan_id) .filter( - Plan.portfolio_id == portfolio_id, - Plan.scenario_id == scenario_id, - Recommendation.default == True + PlanModel.portfolio_id == portfolio_id, + PlanModel.scenario_id == scenario_id, + Recommendation.default == True, ) .one() ) @@ -36,11 +46,11 @@ def aggregate_portfolio_recommendations( "energy_savings": aggregates.energy_savings or 0, "co2_equivalent_savings": aggregates.co2_equivalent_savings or 0, "energy_cost_savings": aggregates.energy_cost_savings or 0, - **aggregated_data + **aggregated_data, } # Get the scenario and update the fields. This data needs to be stored against the scenario, not the portfolio - portfolio_scenario = session.query(Scenario).filter_by(id=scenario_id).one() + portfolio_scenario = session.query(ScenarioModel).filter_by(id=scenario_id).one() # Update the data for key, value in aggregates_dict.items(): diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 54754ee0..5ff91909 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -4,11 +4,11 @@ from sqlalchemy import insert, delete from sqlalchemy.orm import Session from sqlalchemy.exc import SQLAlchemyError from backend.app.db.models.recommendations import ( - Plan, + PlanModel, Recommendation, RecommendationMaterials, PlanRecommendations, - Scenario, + ScenarioModel, ) from backend.app.db.models.portfolio import PropertyModel from backend.app.db.connection import db_session, db_read_session @@ -138,7 +138,7 @@ def create_plan(session: Session, plan): :param plan: dictionary of data representing a plan to be created """ try: - new_plan = Plan(**plan) + new_plan = PlanModel(**plan) session.add(new_plan) session.flush() session.commit() @@ -160,7 +160,9 @@ def bulk_create_plans(session: Session, plans_to_create: list[dict]) -> dict[int for p in plans_to_create ] - stmt = insert(Plan).values(payload).returning(Plan.id, Plan.property_id) + stmt = ( + insert(PlanModel).values(payload).returning(PlanModel.id, PlanModel.property_id) + ) result = session.execute(stmt).all() @@ -170,12 +172,14 @@ def bulk_create_plans(session: Session, plans_to_create: list[dict]) -> dict[int def create_scenario(session: Session, scenario: dict) -> int: existing_scenario = ( - session.query(Scenario).filter_by(portfolio_id=scenario["portfolio_id"]).first() + session.query(ScenarioModel) + .filter_by(portfolio_id=scenario["portfolio_id"]) + .first() ) scenario["is_default"] = not bool(existing_scenario) - new_scenario = Scenario(**scenario) + new_scenario = ScenarioModel(**scenario) session.add(new_scenario) session.flush() # ensures ID is populated @@ -578,7 +582,9 @@ def delete_portfolio_scenarios_if_empty(portfolio_id: int): return with db_session() as session: - session.execute(delete(Scenario).where(Scenario.portfolio_id == portfolio_id)) + session.execute( + delete(ScenarioModel).where(ScenarioModel.portfolio_id == portfolio_id) + ) print("Deleted scenarios for empty portfolio") @@ -611,11 +617,11 @@ def clear_portfolio_in_batches( print("Portfolio cleared in batches.") -def get_plans_by_portfolio_id(portfolio_id: int) -> List[Plan]: +def get_plans_by_portfolio_id(portfolio_id: int) -> List[PlanModel]: raise NotImplementedError -def get_scenario(scenario_id: int) -> List[Scenario]: +def get_scenario(scenario_id: int) -> List[ScenarioModel]: raise NotImplementedError diff --git a/backend/app/db/models/funding.py b/backend/app/db/models/funding.py index 6ea8364e..a7417e14 100644 --- a/backend/app/db/models/funding.py +++ b/backend/app/db/models/funding.py @@ -1,9 +1,18 @@ import enum -from sqlalchemy import Column, Integer, String, Float, Enum, TIMESTAMP, BigInteger, ForeignKey +from sqlalchemy import ( + Column, + Integer, + String, + Float, + Enum, + TIMESTAMP, + BigInteger, + ForeignKey, +) from sqlalchemy.orm import declarative_base from sqlalchemy.sql import func -from backend.app.db.models.recommendations import Plan +from backend.app.db.models.recommendations import PlanModel from backend.app.db.models.materials import MaterialType, Material Base = declarative_base() @@ -17,13 +26,17 @@ class SchemeEnum(enum.Enum): class FundingPackage(Base): - __tablename__ = 'funding_package' + __tablename__ = "funding_package" id = Column(Integer, primary_key=True, autoincrement=True) - plan_id = Column(BigInteger, ForeignKey(Plan.id), nullable=False) + plan_id = Column(BigInteger, ForeignKey(PlanModel.id), nullable=False) scheme = Column( - Enum(SchemeEnum, values_callable=lambda x: [e.value for e in x], create_constraint=False), - nullable=False + Enum( + SchemeEnum, + values_callable=lambda x: [e.value for e in x], + create_constraint=False, + ), + nullable=False, ) created_at = Column(TIMESTAMP, nullable=False, server_default=func.now()) project_funding = Column(Float) @@ -34,15 +47,23 @@ class FundingPackage(Base): class FundingPackageMeasures(Base): - __tablename__ = 'funding_package_measures' + __tablename__ = "funding_package_measures" id = Column(Integer, primary_key=True, autoincrement=True) - funding_package_id = Column(BigInteger, ForeignKey(FundingPackage.id), nullable=False) - measure = Column( - Enum(MaterialType, values_callable=lambda x: [e.value for e in x], create_constraint=False), - nullable=False + funding_package_id = Column( + BigInteger, ForeignKey(FundingPackage.id), nullable=False ) - material_id = Column(BigInteger, ForeignKey(Material.id), nullable=False) # Assuming material table exists + measure = Column( + Enum( + MaterialType, + values_callable=lambda x: [e.value for e in x], + create_constraint=False, + ), + nullable=False, + ) + material_id = Column( + BigInteger, ForeignKey(Material.id), nullable=False + ) # Assuming material table exists innovation_uplift = Column(Float) partial_project_score = Column(Float) uplift_project_score = Column(Float) diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py index 36872394..759c088e 100644 --- a/backend/app/db/models/recommendations.py +++ b/backend/app/db/models/recommendations.py @@ -74,7 +74,7 @@ class PlanTypeEnum(enum.Enum): EXTRACTION_ECO = "extraction_eco" -class Plan(Base): +class PlanModel(Base): __tablename__ = "plan" id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True) @@ -139,7 +139,7 @@ class PlanRecommendations(Base): ) -class Scenario(Base): +class ScenarioModel(Base): __tablename__ = "scenario" id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True) diff --git a/backend/categorisation/categorisation_logic.py b/backend/categorisation/categorisation_logic.py index 503b3e54..f9503e50 100644 --- a/backend/categorisation/categorisation_logic.py +++ b/backend/categorisation/categorisation_logic.py @@ -1,12 +1,12 @@ from typing import List -from backend.app.db.models.recommendations import Plan +from backend.app.db.models.recommendations import PlanModel class CategorisationLogic: @staticmethod - def get_compliant_plans(plans: List[Plan]) -> List[Plan]: + def get_compliant_plans(plans: List[PlanModel]) -> List[PlanModel]: raise NotImplementedError @staticmethod - def get_cheapest_plan(plans: List[Plan]) -> Plan: + def get_cheapest_plan(plans: List[PlanModel]) -> PlanModel: raise NotImplementedError diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py index 0c867267..53d7846c 100644 --- a/backend/categorisation/processor.py +++ b/backend/categorisation/processor.py @@ -5,24 +5,24 @@ from backend.app.db.functions.recommendations_functions import ( get_property_ids, set_plan_default, ) -from backend.app.db.models.recommendations import Plan +from backend.app.db.models.recommendations import PlanModel from backend.categorisation.categorisation_logic import CategorisationLogic def process_portfolio(portfolio_id: int) -> None: # Get all plans (including scenarios) for all properties in the portfolio - plans: List[Plan] = get_plans_by_portfolio_id(portfolio_id) + plans: List[PlanModel] = get_plans_by_portfolio_id(portfolio_id) # For each property, get all compliant plans property_ids: List[int] = get_property_ids(portfolio_id) # For each property, find the cheapest compliant plan for id in property_ids: - plans_for_property: List[Plan] = [ + plans_for_property: List[PlanModel] = [ plan for plan in plans if plan.property_id == id ] - compliant_plans_for_property: List[Plan] = ( + compliant_plans_for_property: List[PlanModel] = ( CategorisationLogic.get_compliant_plans(plans_for_property) ) diff --git a/etl/customers/l_and_g/ic_slides.py b/etl/customers/l_and_g/ic_slides.py index a5cb3511..de6edd49 100644 --- a/etl/customers/l_and_g/ic_slides.py +++ b/etl/customers/l_and_g/ic_slides.py @@ -41,7 +41,10 @@ epc_data = pd.read_csv( # Classify floor area in <73m2, 73-98, 99-200, 200+ epc_data["floor_area_bracket"] = epc_data["total_floor_area"].apply( - lambda x: "<73" if x < 73 else "73-98" if x < 99 else "99-200" if x < 200 else "200+") + lambda x: ( + "<73" if x < 73 else "73-98" if x < 99 else "99-200" if x < 200 else "200+" + ) +) # 73-98 185 # <73 156 @@ -65,7 +68,11 @@ import pandas as pd import numpy as np from sqlalchemy.orm import sessionmaker from backend.app.db.connection import db_engine -from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations +from backend.app.db.models.recommendations import ( + Recommendation, + PlanModel, + PlanRecommendations, +) from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel @@ -74,56 +81,79 @@ def get_data(portfolio_id, scenario_ids): session.begin() # Get properties and their details for a specific portfolio - properties_query = session.query( - PropertyModel, - PropertyDetailsEpcModel - ).join( - PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id - ).filter( - PropertyModel.portfolio_id == portfolio_id # Filter by portfolio ID - ).all() + properties_query = ( + session.query(PropertyModel, PropertyDetailsEpcModel) + .join( + PropertyDetailsEpcModel, + PropertyModel.id == PropertyDetailsEpcModel.property_id, + ) + .filter(PropertyModel.portfolio_id == portfolio_id) # Filter by portfolio ID + .all() + ) # Transform properties data to include all fields dynamically properties_data = [ - {**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns}, - **{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in - PropertyDetailsEpcModel.__table__.columns}} + { + **{ + col.name: getattr(prop.PropertyModel, col.name) + for col in PropertyModel.__table__.columns + }, + **{ + col.name: getattr(prop.PropertyDetailsEpcModel, col.name) + for col in PropertyDetailsEpcModel.__table__.columns + }, + } for prop in properties_query ] # Get property IDs from fetched properties # Get plans linked to the fetched properties - plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all() + plans_query = ( + session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all() + ) # Transform plans data to include all fields dynamically plans_data = [ - {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} + {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns} for plan in plans_query ] # Extract plan IDs for filtering recommendations through PlanRecommendations - plan_ids = [plan['id'] for plan in plans_data] + plan_ids = [plan["id"] for plan in plans_data] # Get recommendations through PlanRecommendations for those plans and that are default - recommendations_query = session.query( - Recommendation, - Plan.scenario_id - ).join( - PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id - ).join( - Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id - ).filter( - PlanRecommendations.plan_id.in_(plan_ids), - Recommendation.default == True # Filtering for default recommendations - ).all() + recommendations_query = ( + session.query(Recommendation, PlanModel.scenario_id) + .join( + PlanRecommendations, + Recommendation.id == PlanRecommendations.recommendation_id, + ) + .join( + PlanModel, + PlanModel.id + == PlanRecommendations.plan_id, # Join with Plan to access scenario_id + ) + .filter( + PlanRecommendations.plan_id.in_(plan_ids), + Recommendation.default == True, # Filtering for default recommendations + ) + .all() + ) # Transform recommendations data to include all fields dynamically and include scenario_id recommendations_data = [ - {**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation') else getattr(rec, - col.name) for - col in Recommendation.__table__.columns}, - "Scenario ID": rec.scenario_id} + { + **{ + col.name: ( + getattr(rec.Recommendation, col.name) + if hasattr(rec, "Recommendation") + else getattr(rec, col.name) + ) + for col in Recommendation.__table__.columns + }, + "Scenario ID": rec.scenario_id, + } for rec in recommendations_query ] @@ -132,7 +162,9 @@ def get_data(portfolio_id, scenario_ids): return properties_data, plans_data, recommendations_data -properties_data, plans_data, recommendations_data = get_data(portfolio_id=124, scenario_ids=[205]) +properties_data, plans_data, recommendations_data = get_data( + portfolio_id=124, scenario_ids=[205] +) properties_df = pd.DataFrame(properties_data) plans_df = pd.DataFrame(plans_data) @@ -147,12 +179,12 @@ recommended_measures_df = recommended_measures_df.drop(columns=["default"]) post_install_sap = recommendations_df[["property_id", "default", "sap_points"]] post_install_sap = post_install_sap[post_install_sap["default"]] # Sum up the sap points by property id -post_install_sap = post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index() +post_install_sap = ( + post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index() +) recommendations_measures_pivot = recommended_measures_df.pivot( - index='property_id', - columns='measure_type', - values='estimated_cost' + index="property_id", columns="measure_type", values="estimated_cost" ) recommendations_measures_pivot = recommendations_measures_pivot.reset_index() @@ -163,7 +195,7 @@ recommendations_measures_pivot = recommendations_measures_pivot.rename( "double_glazing": "Cost: Double Glazing", "loft_insulation": "Cost: Loft Insulation", "mechanical_ventilation": "Cost: Ventilation", - "solar_pv": "Cost: Solar PV" + "solar_pv": "Cost: Solar PV", } ) recommendations_measures_pivot = recommendations_measures_pivot.fillna(0) @@ -186,16 +218,26 @@ recommendations_measures_pivot["Recommendation: Solar PV"] = ( recommendations_measures_pivot["Cost: Solar PV"] > 0 ) -df = properties_df[ - [ - "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows", - "current_epc_rating", - "current_sap_points", "total_floor_area", "number_of_rooms", +df = ( + properties_df[ + [ + "property_id", + "uprn", + "address", + "postcode", + "property_type", + "walls", + "roof", + "heating", + "windows", + "current_epc_rating", + "current_sap_points", + "total_floor_area", + "number_of_rooms", + ] ] -].merge( - recommendations_measures_pivot, how="left", on="property_id" -).merge( - post_install_sap, how="left", on="property_id" + .merge(recommendations_measures_pivot, how="left", on="property_id") + .merge(post_install_sap, how="left", on="property_id") ) df = df.drop(columns=["property_id"]) @@ -222,25 +264,36 @@ df["Has Recommendations"] = ~pd.isnull(df["Cost: Air Source Heat Pump"]) # We fill missings: for col in [ - "Recommendation: Air Source Heat Pump", "Recommendation: Cavity Wall Insulation", - "Recommendation: Double Glazing", "Recommendation: Loft Insulation", "Recommendation: Ventilation", - "Recommendation: Solar PV" + "Recommendation: Air Source Heat Pump", + "Recommendation: Cavity Wall Insulation", + "Recommendation: Double Glazing", + "Recommendation: Loft Insulation", + "Recommendation: Ventilation", + "Recommendation: Solar PV", ]: df[col] = df[col].fillna(False) for col in [ - "Cost: Air Source Heat Pump", "Cost: Cavity Wall Insulation", - "Cost: Double Glazing", "Cost: Loft Insulation", "Cost: Ventilation", - "Cost: Solar PV" + "Cost: Air Source Heat Pump", + "Cost: Cavity Wall Insulation", + "Cost: Double Glazing", + "Cost: Loft Insulation", + "Cost: Ventilation", + "Cost: Solar PV", ]: df[col] = df[col].fillna(0) # Calculate post SAP df["Predicted Post Works SAP"] = df["Current SAP Points"] + df["sap_points"] df["Predicted Post Works SAP"] = df["Predicted Post Works SAP"].round() -df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(lambda x: sap_to_epc(x)) +df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply( + lambda x: sap_to_epc(x) +) df["Recommendation: Air Source Heat Pump"].sum() df["Cost: Air Source Heat Pump"].sum() -df.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/Basildon Data Export - 2.csv", index=False) +df.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/Basildon Data Export - 2.csv", + index=False, +) diff --git a/etl/customers/mod/pilot/2. Create Excel Model.py b/etl/customers/mod/pilot/2. Create Excel Model.py index 9a9eda86..810ab661 100644 --- a/etl/customers/mod/pilot/2. Create Excel Model.py +++ b/etl/customers/mod/pilot/2. Create Excel Model.py @@ -4,7 +4,11 @@ import numpy as np from backend.app.utils import sap_to_epc from sqlalchemy.orm import sessionmaker from backend.app.db.connection import db_engine -from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations +from backend.app.db.models.recommendations import ( + Recommendation, + PlanModel, + PlanRecommendations, +) from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel @@ -13,56 +17,79 @@ def get_data(portfolio_id, scenario_ids): session.begin() # Get properties and their details for a specific portfolio - properties_query = session.query( - PropertyModel, - PropertyDetailsEpcModel - ).join( - PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id - ).filter( - PropertyModel.portfolio_id == portfolio_id # Filter by portfolio ID - ).all() + properties_query = ( + session.query(PropertyModel, PropertyDetailsEpcModel) + .join( + PropertyDetailsEpcModel, + PropertyModel.id == PropertyDetailsEpcModel.property_id, + ) + .filter(PropertyModel.portfolio_id == portfolio_id) # Filter by portfolio ID + .all() + ) # Transform properties data to include all fields dynamically properties_data = [ - {**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns}, - **{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in - PropertyDetailsEpcModel.__table__.columns}} + { + **{ + col.name: getattr(prop.PropertyModel, col.name) + for col in PropertyModel.__table__.columns + }, + **{ + col.name: getattr(prop.PropertyDetailsEpcModel, col.name) + for col in PropertyDetailsEpcModel.__table__.columns + }, + } for prop in properties_query ] # Get property IDs from fetched properties # Get plans linked to the fetched properties - plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all() + plans_query = ( + session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all() + ) # Transform plans data to include all fields dynamically plans_data = [ - {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} + {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns} for plan in plans_query ] # Extract plan IDs for filtering recommendations through PlanRecommendations - plan_ids = [plan['id'] for plan in plans_data] + plan_ids = [plan["id"] for plan in plans_data] # Get recommendations through PlanRecommendations for those plans and that are default - recommendations_query = session.query( - Recommendation, - Plan.scenario_id - ).join( - PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id - ).join( - Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id - ).filter( - PlanRecommendations.plan_id.in_(plan_ids), - Recommendation.default == True # Filtering for default recommendations - ).all() + recommendations_query = ( + session.query(Recommendation, PlanModel.scenario_id) + .join( + PlanRecommendations, + Recommendation.id == PlanRecommendations.recommendation_id, + ) + .join( + PlanModel, + PlanModel.id + == PlanRecommendations.plan_id, # Join with Plan to access scenario_id + ) + .filter( + PlanRecommendations.plan_id.in_(plan_ids), + Recommendation.default == True, # Filtering for default recommendations + ) + .all() + ) # Transform recommendations data to include all fields dynamically and include scenario_id recommendations_data = [ - {**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation') - else getattr(rec, col.name) for - col in Recommendation.__table__.columns}, - "Scenario ID": rec.scenario_id} + { + **{ + col.name: ( + getattr(rec.Recommendation, col.name) + if hasattr(rec, "Recommendation") + else getattr(rec, col.name) + ) + for col in Recommendation.__table__.columns + }, + "Scenario ID": rec.scenario_id, + } for rec in recommendations_query ] @@ -94,16 +121,34 @@ def app(): ) property_asset_data = properties_df.merge( - mod_property_data.drop(columns=["address", "postcode", "tenure"]), how="left", on="uprn" + mod_property_data.drop(columns=["address", "postcode", "tenure"]), + how="left", + on="uprn", ) - property_asset_data["is_pitched"] = property_asset_data["roof"].str.contains("pitched", case=False) + property_asset_data["is_pitched"] = property_asset_data["roof"].str.contains( + "pitched", case=False + ) property_asset_data["pre_1970"] = property_asset_data["BUILD_YEAR"] < 1970 - property_asset_data["wall_type"] = property_asset_data["walls"].str.split(" ").str[0].str.strip() - property_asset_data["is_insulated"] = ( - property_asset_data["walls"].str.split(",").str[1].str.strip().isin( - ["filled cavity", "with external insulation", "filled cavity and external insulation"] - ) | property_asset_data["walls"].str.split(",").str[2].str.strip().isin(["insulated"]) + property_asset_data["wall_type"] = ( + property_asset_data["walls"].str.split(" ").str[0].str.strip() + ) + property_asset_data["is_insulated"] = property_asset_data["walls"].str.split( + "," + ).str[1].str.strip().isin( + [ + "filled cavity", + "with external insulation", + "filled cavity and external insulation", + ] + ) | property_asset_data[ + "walls" + ].str.split( + "," + ).str[ + 2 + ].str.strip().isin( + ["insulated"] ) property_asset_data["is_insulated"] = np.where( property_asset_data["is_insulated"], "Insulated", "Uninsulated" @@ -115,18 +160,26 @@ def app(): property_asset_data["pre_1970"], "Pre 1970", "Post 1970" ) - archetype_variables = ["property_type", "wall_type", "is_insulated", "is_pitched", "pre_1970"] + archetype_variables = [ + "property_type", + "wall_type", + "is_insulated", + "is_pitched", + "pre_1970", + ] assigned_archetypes = ( - property_asset_data.groupby( - archetype_variables - ).size().reset_index().rename(columns={0: "n_properties"}).sort_values("n_properties", ascending=False) + property_asset_data.groupby(archetype_variables) + .size() + .reset_index() + .rename(columns={0: "n_properties"}) + .sort_values("n_properties", ascending=False) ) # Make the archetype ID a concatenation of the variables - assigned_archetypes["archetype_id"] = assigned_archetypes[archetype_variables].apply( - lambda x: "_".join(x.astype(str)), axis=1 - ) + assigned_archetypes["archetype_id"] = assigned_archetypes[ + archetype_variables + ].apply(lambda x: "_".join(x.astype(str)), axis=1) # Most prominent archetypes prominent_archetypes = assigned_archetypes.head(6) @@ -136,7 +189,7 @@ def app(): property_asset_data = property_asset_data.merge( assigned_archetypes[archetype_variables + ["archetype_id"]], how="left", - on=archetype_variables + on=archetype_variables, ) # Create age bands: @@ -148,7 +201,7 @@ def app(): property_asset_data["age_band"] = pd.cut( property_asset_data["BUILD_YEAR"], bins=[1959, 1969, 1979, 1989, 1999, 2022], - labels=["1960-1969", "1970-1979", "1980-1989", "1990-1999", "2000+"] + labels=["1960-1969", "1970-1979", "1980-1989", "1990-1999", "2000+"], ) # Create floor area bands @@ -159,47 +212,59 @@ def app(): property_asset_data["floor_area_band"] = pd.cut( property_asset_data["total_floor_area"], bins=[0, 73, 97, 199, 10000], - labels=["0-73", "74-97", "98-199", "200+"] + labels=["0-73", "74-97", "98-199", "200+"], ) property_asset_data["archetype_group"] = property_asset_data["archetype_id"].copy() property_asset_data["archetype_group"] = np.where( - property_asset_data["archetype_id"].isin(other_archetypes["archetype_id"].values), + property_asset_data["archetype_id"].isin( + other_archetypes["archetype_id"].values + ), "other", - property_asset_data["archetype_group"] + property_asset_data["archetype_group"], ) # For colour wall_types = ( - property_asset_data[["wall_type"]].value_counts().to_frame().reset_index().rename( - columns={"wall_type": "Wall Type"} - ) + property_asset_data[["wall_type"]] + .value_counts() + .to_frame() + .reset_index() + .rename(columns={"wall_type": "Wall Type"}) ) # Group into age bands ages = ( - property_asset_data[["age_band"]].value_counts() + property_asset_data[["age_band"]] + .value_counts() .to_frame() - .reset_index().sort_values("age_band", ascending=True) + .reset_index() + .sort_values("age_band", ascending=True) .rename(columns={"age_band": "Age Band"}) ) floor_area_bands = ( - property_asset_data[["floor_area_band"]].value_counts() + property_asset_data[["floor_area_band"]] + .value_counts() .to_frame() - .reset_index().sort_values("floor_area_band", ascending=True) + .reset_index() + .sort_values("floor_area_band", ascending=True) .rename(columns={"floor_area_band": "Floor Area Band"}) ) archetype_counts = ( - property_asset_data[["archetype_group"]]. - value_counts(). - to_frame(). - reset_index() + property_asset_data[["archetype_group"]] + .value_counts() + .to_frame() + .reset_index() .rename(columns={"archetype_group": "Archetype"}) ) property_types = ( - (property_asset_data["property_type"] + ": " + property_asset_data["built_form"]). - value_counts(). - to_frame(). - reset_index() + ( + property_asset_data["property_type"] + + ": " + + property_asset_data["built_form"] + ) + .value_counts() + .to_frame() + .reset_index() .rename(columns={"index": "Property Type", 0: "Count"}) ) @@ -217,18 +282,24 @@ def app(): totals = property_asset_data[ [ "Total_household_members", - "co2_emissions", "current_energy_demand", "current_energy_demand_heating_hotwater", - "heating_cost_current", "hot_water_cost_current", "lighting_cost_current", - "appliances_cost_current", "gas_standing_charge", "electricity_standing_charge" + "co2_emissions", + "current_energy_demand", + "current_energy_demand_heating_hotwater", + "heating_cost_current", + "hot_water_cost_current", + "lighting_cost_current", + "appliances_cost_current", + "gas_standing_charge", + "electricity_standing_charge", ] ].copy() totals["total_cost"] = ( - totals["heating_cost_current"] + - totals["hot_water_cost_current"] + - totals["lighting_cost_current"] + - totals["appliances_cost_current"] + - totals["gas_standing_charge"] + - totals["electricity_standing_charge"] + totals["heating_cost_current"] + + totals["hot_water_cost_current"] + + totals["lighting_cost_current"] + + totals["appliances_cost_current"] + + totals["gas_standing_charge"] + + totals["electricity_standing_charge"] ) print( totals[ @@ -259,38 +330,59 @@ def app(): scenario_recommendations_df = recommendations_df[ recommendations_df["Scenario ID"] == scenario - ].copy() + ].copy() - scenario_recommendations_df["contingency"] = contingency * scenario_recommendations_df["estimated_cost"] + scenario_recommendations_df["contingency"] = ( + contingency * scenario_recommendations_df["estimated_cost"] + ) scenario_recommendations_df["total_cost"] = ( - scenario_recommendations_df["estimated_cost"] + scenario_recommendations_df["contingency"] + scenario_recommendations_df["estimated_cost"] + + scenario_recommendations_df["contingency"] ) recommended_measures_df = scenario_recommendations_df[ ["property_id", "measure_type", "estimated_cost", "default"] ] - recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]] + recommended_measures_df = recommended_measures_df[ + recommended_measures_df["default"] + ] recommended_measures_df = recommended_measures_df.drop(columns=["default"]) # Metrics by property ID aggregated_metrics = scenario_recommendations_df[ [ - "property_id", "type", "default", "sap_points", - "energy_cost_savings", "kwh_savings", "co2_equivalent_savings", "estimated_cost", "contingency", - "total_cost" + "property_id", + "type", + "default", + "sap_points", + "energy_cost_savings", + "kwh_savings", + "co2_equivalent_savings", + "estimated_cost", + "contingency", + "total_cost", ] ] aggregated_metrics = aggregated_metrics[aggregated_metrics["default"]] - aggregated_metrics = aggregated_metrics.groupby("property_id")[ - ["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings", "estimated_cost", - "total_cost", "contingency"] - ].sum().reset_index() + aggregated_metrics = ( + aggregated_metrics.groupby("property_id")[ + [ + "sap_points", + "co2_equivalent_savings", + "energy_cost_savings", + "kwh_savings", + "estimated_cost", + "total_cost", + "contingency", + ] + ] + .sum() + .reset_index() + ) recommendations_measures_pivot = recommended_measures_df.pivot( - index='property_id', - columns='measure_type', - values='estimated_cost' + index="property_id", columns="measure_type", values="estimated_cost" ) recommendations_measures_pivot = recommendations_measures_pivot.reset_index() recommendations_measures_pivot = recommendations_measures_pivot.fillna(0) @@ -299,30 +391,58 @@ def app(): for c in recommendations_measures_pivot.columns: if c == "property_id": continue - recommendations_measures_pivot["Recommendation: " + c] = recommendations_measures_pivot[c] > 0 + recommendations_measures_pivot["Recommendation: " + c] = ( + recommendations_measures_pivot[c] > 0 + ) # We now create a final output - df = properties_df[ - [ - "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows", - "current_epc_rating", "current_sap_points", "total_floor_area", "number_of_rooms", - "co2_emissions", "current_energy_demand", "current_energy_demand_heating_hotwater", - "heating_cost_current", "hot_water_cost_current", "lighting_cost_current", - "appliances_cost_current", "gas_standing_charge", "electricity_standing_charge" + df = ( + properties_df[ + [ + "property_id", + "uprn", + "address", + "postcode", + "property_type", + "walls", + "roof", + "heating", + "windows", + "current_epc_rating", + "current_sap_points", + "total_floor_area", + "number_of_rooms", + "co2_emissions", + "current_energy_demand", + "current_energy_demand_heating_hotwater", + "heating_cost_current", + "hot_water_cost_current", + "lighting_cost_current", + "appliances_cost_current", + "gas_standing_charge", + "electricity_standing_charge", + ] ] - ].merge( - recommendations_measures_pivot, how="left", on="property_id" - ).merge( - aggregated_metrics, how="left", on="property_id" + .merge(recommendations_measures_pivot, how="left", on="property_id") + .merge(aggregated_metrics, how="left", on="property_id") ) df["bills_total_cost"] = ( - df["heating_cost_current"] + df["hot_water_cost_current"] + df["lighting_cost_current"] + - df["appliances_cost_current"] + df["gas_standing_charge"] + df["electricity_standing_charge"] + df["heating_cost_current"] + + df["hot_water_cost_current"] + + df["lighting_cost_current"] + + df["appliances_cost_current"] + + df["gas_standing_charge"] + + df["electricity_standing_charge"] ) df = df.drop(columns=["property_id"]) - for c in ["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings"]: + for c in [ + "sap_points", + "co2_equivalent_savings", + "energy_cost_savings", + "kwh_savings", + ]: df[c] = df[c].fillna(0) df = df.rename( @@ -345,16 +465,23 @@ def app(): # Calculate post SAP df["Predicted Post Works SAP"] = df["Current SAP Points"] + df["sap_points"] df["Predicted Post Works SAP"] = df["Predicted Post Works SAP"].round() - df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(lambda x: sap_to_epc(x)) + df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply( + lambda x: sap_to_epc(x) + ) # Calculate the relative savings on carbon, kwh, and bills - df["relative_carbon_savings"] = df["co2_equivalent_savings"] / df["co2_emissions"] + df["relative_carbon_savings"] = ( + df["co2_equivalent_savings"] / df["co2_emissions"] + ) df["relative_kwh_savings"] = df["kwh_savings"] / df["current_energy_demand"] df["relative_bill_savings"] = df["energy_cost_savings"] / df["bills_total_cost"] # Add on the archetype df = df.merge( - property_asset_data[["uprn", "archetype_group"]], how="left", left_on="UPRN", right_on="uprn" + property_asset_data[["uprn", "archetype_group"]], + how="left", + left_on="UPRN", + right_on="uprn", ) # For properties that don't make it to EPC B, check why. E.g. for a property that has an oil boiler, it @@ -387,7 +514,9 @@ def app(): printing_scenario_id = scenario_ids[0] # EPC breakdown - print(scenario_data[printing_scenario_id]['Predicted Post Works EPC'].value_counts()) + print( + scenario_data[printing_scenario_id]["Predicted Post Works EPC"].value_counts() + ) # Cost # Total cost print(scenario_data[printing_scenario_id]["total_cost"].sum()) @@ -408,16 +537,24 @@ def app(): measure_details = {} for scenario in scenario_ids: measure_details[scenario] = {} - recommendation_cols = [c for c in scenario_data[scenario].columns if "Recommendation:" in c] - measure_details[scenario]["count"] = scenario_data[scenario][recommendation_cols].sum().to_dict() + recommendation_cols = [ + c for c in scenario_data[scenario].columns if "Recommendation:" in c + ] + measure_details[scenario]["count"] = ( + scenario_data[scenario][recommendation_cols].sum().to_dict() + ) # Get average cost per measure measure_columns = [ - c.split("Recommendation: ")[1] for c in scenario_data[scenario].columns if "Recommendation:" in c + c.split("Recommendation: ")[1] + for c in scenario_data[scenario].columns + if "Recommendation:" in c ] # Take the mean, drop zero columns measure_costs = {} for m in measure_columns: - measure_costs[m] = float(scenario_data[scenario][scenario_data[scenario][m] > 0][m].mean()) + measure_costs[m] = float( + scenario_data[scenario][scenario_data[scenario][m] > 0][m].mean() + ) measure_details[scenario]["cost_per_measure"] = measure_costs pprint(measure_details[scenario_ids[0]]["count"]) @@ -452,12 +589,27 @@ def app(): for scenario in scenario_ids: df = scenario_data[scenario].copy() - avg_savings = df[ - ["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings", "estimated_cost", - "total_cost", "contingency"] - ].mean().to_dict() - avg_savings["cost_per_sap_point"] = avg_savings["total_cost"] / avg_savings["sap_points"] - avg_savings["cost_per_carbon"] = avg_savings["total_cost"] / avg_savings["co2_equivalent_savings"] + avg_savings = ( + df[ + [ + "sap_points", + "co2_equivalent_savings", + "energy_cost_savings", + "kwh_savings", + "estimated_cost", + "total_cost", + "contingency", + ] + ] + .mean() + .to_dict() + ) + avg_savings["cost_per_sap_point"] = ( + avg_savings["total_cost"] / avg_savings["sap_points"] + ) + avg_savings["cost_per_carbon"] = ( + avg_savings["total_cost"] / avg_savings["co2_equivalent_savings"] + ) scenario_metrics[scenario] = avg_savings pprint(scenario_metrics[scenario_ids[0]]) @@ -465,11 +617,11 @@ def app(): scenario_data[scenario_ids[0]]["loft_insulation"][ scenario_data[scenario_ids[0]]["loft_insulation"] > 0 - ].mean() + ].mean() scenario_data[scenario_ids[0]]["cavity_wall_insulation"][ scenario_data[scenario_ids[0]]["cavity_wall_insulation"] > 0 - ].mean() + ].mean() # Testing checking floor risk @@ -477,11 +629,7 @@ def app(): def get_flood_risk(lat, lon, radius_km=1): url = "https://environment.data.gov.uk/flood-monitoring/id/floods" - params = { - 'lat': lat, - 'long': lon, - 'dist': radius_km # search radius in km - } + params = {"lat": lat, "long": lon, "dist": radius_km} # search radius in km response = requests.get(url, params=params) response.raise_for_status() @@ -495,20 +643,19 @@ def app(): print(f"{len(flood_warnings)} warning(s) found near the location:") for warning in flood_warnings: print(f"- Area: {warning.get('description')}") - print(f" Severity: {warning.get('severity')} (Level {warning.get('severityLevel')})") + print( + f" Severity: {warning.get('severity')} (Level {warning.get('severityLevel')})" + ) print(f" Message changed at: {warning.get('timeMessageChanged')}") print() return flood_warnings from shapely.geometry import shape, Point + def get_flood_areas_near_point(lat, lon, radius_km=2): url = "https://environment.data.gov.uk/flood-monitoring/id/floodAreas" - params = { - 'lat': lat, - 'long': lon, - 'dist': radius_km - } + params = {"lat": lat, "long": lon, "dist": radius_km} response = requests.get(url, params=params) response.raise_for_status() @@ -531,7 +678,7 @@ def app(): if not features: continue - flood_polygon = shape(features[0]['geometry']) + flood_polygon = shape(features[0]["geometry"]) try: is_inside = flood_polygon.contains(point) @@ -539,12 +686,17 @@ def app(): is_inside = False if is_inside: - print(f"📍 Point is inside flood area: {area['label']} ({area['notation']})") + print( + f"📍 Point is inside flood area: {area['label']} ({area['notation']})" + ) return area from tqdm import tqdm + floor_warnings_data = [] - for _, property in tqdm(property_asset_data.iterrows(), total=len(property_asset_data)): + for _, property in tqdm( + property_asset_data.iterrows(), total=len(property_asset_data) + ): # warnings = floor_warnings_data.extend( # get_flood_risk(lat=property["LATITUDE"], lon=property["LONGITUDE"], radius_km=1) # ) @@ -556,7 +708,7 @@ def app(): "uprn": property["uprn"], "address": property["address"], "postcode": property["postcode"], - "area": resp + "area": resp, } ) continue @@ -570,7 +722,7 @@ def app(): "House_Cavity_Uninsulated_Pitched roof_Post 1970", "other", "House_System_Uninsulated_Pitched roof_Pre 1970", - "House_Solid_Uninsulated_Not Pitched Roof_Pre 1970" + "House_Solid_Uninsulated_Not Pitched Roof_Pre 1970", ] values = [62, 36, 21, 16, 16, 4, 2] @@ -582,36 +734,39 @@ def app(): "Cavity wall insulation, ventilation", "Bespoke retrofit measures", "External wall insulation, roof insulation", - "Flat roof insulation, internal wall insulation" + "Flat roof insulation, internal wall insulation", ] - fig = go.Figure(go.Treemap( - labels=labels, - parents=[""] * len(labels), # No root - values=values, - hovertext=hovertext, - hoverinfo="text", - textinfo="none", - marker=dict( - line=dict(color="white", width=4), - colors=values, - colorscale="Blues" + fig = go.Figure( + go.Treemap( + labels=labels, + parents=[""] * len(labels), # No root + values=values, + hovertext=hovertext, + hoverinfo="text", + textinfo="none", + marker=dict( + line=dict(color="white", width=4), colors=values, colorscale="Blues" + ), ) - )) + ) fig.update_layout( - margin=dict(t=10, l=10, r=10, b=10), - plot_bgcolor="white", - paper_bgcolor="white" + margin=dict(t=10, l=10, r=10, b=10), plot_bgcolor="white", paper_bgcolor="white" ) fig.show() # Get the recommended measures by scenario id - recommendation_cols = [c for c in scenario_data[scenario_ids[1]].columns if "Recommendation:" in c] - measure_counts_by_scenario = scenario_data[scenario_ids[1]].groupby("archetype_group")[ - recommendation_cols - ].sum().reset_index() + recommendation_cols = [ + c for c in scenario_data[scenario_ids[1]].columns if "Recommendation:" in c + ] + measure_counts_by_scenario = ( + scenario_data[scenario_ids[1]] + .groupby("archetype_group")[recommendation_cols] + .sum() + .reset_index() + ) measure_counts_by_scenario.to_csv( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/measure_counts_by_scenario.csv" @@ -630,15 +785,13 @@ def app(): to_append = {"uprn": uprn} for _id in scenario_ids: - scenario = scenario_data[_id][ - scenario_data[_id]["uprn"] == uprn - ].squeeze() + scenario = scenario_data[_id][scenario_data[_id]["uprn"] == uprn].squeeze() val = PropertyValuation.estimate_valuation_improvement( current_value=x["valuation"], current_epc=scenario["Current EPC Rating"].value, target_epc=scenario["Predicted Post Works EPC"], - total_cost=None + total_cost=None, ) to_append[_id] = val["average_increase"] diff --git a/etl/customers/newhaven/slides.py b/etl/customers/newhaven/slides.py index 45108fec..efedb844 100644 --- a/etl/customers/newhaven/slides.py +++ b/etl/customers/newhaven/slides.py @@ -3,7 +3,12 @@ import pandas as pd import numpy as np from sqlalchemy.orm import sessionmaker from backend.app.db.connection import db_engine -from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, Scenario +from backend.app.db.models.recommendations import ( + Recommendation, + PlanModel, + PlanRecommendations, + ScenarioModel, +) from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel from utils.s3 import read_csv_from_s3 @@ -13,56 +18,79 @@ def get_data(portfolio_id, scenario_ids): session.begin() # Get properties and their details for a specific portfolio - properties_query = session.query( - PropertyModel, - PropertyDetailsEpcModel - ).join( - PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id - ).filter( - PropertyModel.portfolio_id == portfolio_id # Filter by portfolio ID - ).all() + properties_query = ( + session.query(PropertyModel, PropertyDetailsEpcModel) + .join( + PropertyDetailsEpcModel, + PropertyModel.id == PropertyDetailsEpcModel.property_id, + ) + .filter(PropertyModel.portfolio_id == portfolio_id) # Filter by portfolio ID + .all() + ) # Transform properties data to include all fields dynamically properties_data = [ - {**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns}, - **{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in - PropertyDetailsEpcModel.__table__.columns}} + { + **{ + col.name: getattr(prop.PropertyModel, col.name) + for col in PropertyModel.__table__.columns + }, + **{ + col.name: getattr(prop.PropertyDetailsEpcModel, col.name) + for col in PropertyDetailsEpcModel.__table__.columns + }, + } for prop in properties_query ] # Get property IDs from fetched properties # Get plans linked to the fetched properties - plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all() + plans_query = ( + session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all() + ) # Transform plans data to include all fields dynamically plans_data = [ - {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} + {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns} for plan in plans_query ] # Extract plan IDs for filtering recommendations through PlanRecommendations - plan_ids = [plan['id'] for plan in plans_data] + plan_ids = [plan["id"] for plan in plans_data] # Get recommendations through PlanRecommendations for those plans and that are default - recommendations_query = session.query( - Recommendation, - Plan.scenario_id - ).join( - PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id - ).join( - Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id - ).filter( - PlanRecommendations.plan_id.in_(plan_ids), - Recommendation.default == True # Filtering for default recommendations - ).all() + recommendations_query = ( + session.query(Recommendation, PlanModel.scenario_id) + .join( + PlanRecommendations, + Recommendation.id == PlanRecommendations.recommendation_id, + ) + .join( + PlanModel, + PlanModel.id + == PlanRecommendations.plan_id, # Join with Plan to access scenario_id + ) + .filter( + PlanRecommendations.plan_id.in_(plan_ids), + Recommendation.default == True, # Filtering for default recommendations + ) + .all() + ) # Transform recommendations data to include all fields dynamically and include scenario_id recommendations_data = [ - {**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation') else getattr(rec, - col.name) for - col in Recommendation.__table__.columns}, - "Scenario ID": rec.scenario_id} + { + **{ + col.name: ( + getattr(rec.Recommendation, col.name) + if hasattr(rec, "Recommendation") + else getattr(rec, col.name) + ) + for col in Recommendation.__table__.columns + }, + "Scenario ID": rec.scenario_id, + } for rec in recommendations_query ] @@ -71,7 +99,9 @@ def get_data(portfolio_id, scenario_ids): return properties_data, plans_data, recommendations_data -def estimate_post_retrofit_heating_hotwater_kwh(properties_df, recommendations_df, scenario_ids): +def estimate_post_retrofit_heating_hotwater_kwh( + properties_df, recommendations_df, scenario_ids +): # properties_starting_with_electric_heating = properties_df[ # properties_df["mainfuel"].isin( # ["Electricity not community", "Electricity electricity unspecified tariff"] @@ -85,20 +115,29 @@ def estimate_post_retrofit_heating_hotwater_kwh(properties_df, recommendations_d for scenario_id in scenario_ids: # Get the recommendations for the scenario, default scenario_recommendations = recommendations_df[ - (recommendations_df["Scenario ID"] == scenario_id) & - (recommendations_df["default"] == True) - ].copy() + (recommendations_df["Scenario ID"] == scenario_id) + & (recommendations_df["default"] == True) + ].copy() - scenario_recommendations['ligting_kwh'] = scenario_recommendations.apply( - lambda x: x['kwh_savings'] if x['type'] == 'low_energy_lighting' else 0, - axis=1) - scenario_recommendations['solar_kwh'] = scenario_recommendations.apply( - lambda x: x['kwh_savings'] if x['type'] == 'solar_pv' else 0, axis=1) + scenario_recommendations["ligting_kwh"] = scenario_recommendations.apply( + lambda x: x["kwh_savings"] if x["type"] == "low_energy_lighting" else 0, + axis=1, + ) + scenario_recommendations["solar_kwh"] = scenario_recommendations.apply( + lambda x: x["kwh_savings"] if x["type"] == "solar_pv" else 0, axis=1 + ) # Set 'Estimated Kwh Savings' to zero where specific kwh columns are used - scenario_recommendations['Estimated Kwh Savings'] = scenario_recommendations.apply( - lambda x: 0 if x['type'] in ['low_energy_lighting', 'solar_pv'] else x[ - 'kwh_savings'], axis=1) + scenario_recommendations["Estimated Kwh Savings"] = ( + scenario_recommendations.apply( + lambda x: ( + 0 + if x["type"] in ["low_energy_lighting", "solar_pv"] + else x["kwh_savings"] + ), + axis=1, + ) + ) # We need to determine if any of the properties start with electric heating or end with it # property_electric_heating = [] @@ -112,51 +151,76 @@ def estimate_post_retrofit_heating_hotwater_kwh(properties_df, recommendations_d # property_electric_heating.append(pid) # continue - grouped_data = scenario_recommendations.groupby(['property_id']).agg({ - 'Estimated Kwh Savings': 'sum', - 'ligting_kwh': 'sum', - 'solar_kwh': 'sum', - "estimated_cost": "sum" - }).reset_index() + grouped_data = ( + scenario_recommendations.groupby(["property_id"]) + .agg( + { + "Estimated Kwh Savings": "sum", + "ligting_kwh": "sum", + "solar_kwh": "sum", + "estimated_cost": "sum", + } + ) + .reset_index() + ) comparison = properties_df.drop_duplicates().merge( grouped_data, on=["property_id"], how="left" ) comparison["Post Retrofit Heating & Hotwater kwh"] = ( - comparison["current_energy_demand_heating_hotwater"] - \ - comparison["Estimated Kwh Savings"] + comparison["current_energy_demand_heating_hotwater"] + - comparison["Estimated Kwh Savings"] ) - avgs = comparison[['current_energy_demand_heating_hotwater', 'Post Retrofit Heating & Hotwater kwh']].mean() + avgs = comparison[ + [ + "current_energy_demand_heating_hotwater", + "Post Retrofit Heating & Hotwater kwh", + ] + ].mean() # We now, for properties that have a plan, do a before and after with_savings = comparison[~pd.isnull(comparison["Estimated Kwh Savings"])] avgs2 = with_savings[ - ['current_energy_demand_heating_hotwater', 'Post Retrofit Heating & Hotwater kwh']].mean() - avgs2["difference"] = avgs2["current_energy_demand_heating_hotwater"] - avgs2[ - "Post Retrofit Heating & Hotwater kwh"] - avgs2["percentage_reduction"] = 100 * avgs2["difference"] / avgs2["current_energy_demand_heating_hotwater"] + [ + "current_energy_demand_heating_hotwater", + "Post Retrofit Heating & Hotwater kwh", + ] + ].mean() + avgs2["difference"] = ( + avgs2["current_energy_demand_heating_hotwater"] + - avgs2["Post Retrofit Heating & Hotwater kwh"] + ) + avgs2["percentage_reduction"] = ( + 100 * avgs2["difference"] / avgs2["current_energy_demand_heating_hotwater"] + ) # We also calculate the cost per kwh saves total_kwh_saved = ( - with_savings["Estimated Kwh Savings"].sum() + - with_savings["ligting_kwh"].sum() + - with_savings["solar_kwh"].sum() + with_savings["Estimated Kwh Savings"].sum() + + with_savings["ligting_kwh"].sum() + + with_savings["solar_kwh"].sum() ) total_cost = with_savings["estimated_cost"].sum() cost_per_kwh_saved = total_cost / total_kwh_saved scenario_comparison_df.append({"scenario_id": scenario_id, **avgs}) scenario_comparison_df_2.append({"scenario_id": scenario_id, **avgs2}) - cost_per_kwh_saved_table.append({"scenario_id": scenario_id, "cost_per_kwh_saved": cost_per_kwh_saved}) + cost_per_kwh_saved_table.append( + {"scenario_id": scenario_id, "cost_per_kwh_saved": cost_per_kwh_saved} + ) scenario_comparison_population = pd.DataFrame(scenario_comparison_df) scenario_comparison_retrofitted_units = pd.DataFrame(scenario_comparison_df_2) cost_per_kwh_saved_table = pd.DataFrame(cost_per_kwh_saved_table) - return scenario_comparison_population, scenario_comparison_retrofitted_units, cost_per_kwh_saved_table + return ( + scenario_comparison_population, + scenario_comparison_retrofitted_units, + cost_per_kwh_saved_table, + ) def slides(): @@ -167,7 +231,9 @@ def slides(): # Look at one scenario at a time, otherwise this is agony scenario_ids = [47, 48, 49, 50, 51] - properties_data, plans_data, recommendations_data = get_data(portfolio_id, scenario_ids) + properties_data, plans_data, recommendations_data = get_data( + portfolio_id, scenario_ids + ) properties_df = pd.DataFrame(properties_data) plans_df = pd.DataFrame(plans_data) @@ -177,16 +243,19 @@ def slides(): raise ValueError("The number of unique properties is not 2553") # Q1: What is the baseline heating and energy demand for the properties in the portfolio - baseline? - heating_hotwater_kwh = ( - properties_df[['current_energy_demand', 'current_energy_demand_heating_hotwater']] - .mean() - ) + heating_hotwater_kwh = properties_df[ + ["current_energy_demand", "current_energy_demand_heating_hotwater"] + ].mean() # Q2: For each scenario, what is for what is the heating and hot water kwh after retrofit, on the entire # popoulation (incl those without retrofit) and for just those being retrofit # We also calculat the cost per kwh saved - scenario_comparison_population, scenario_comparison_retrofitted_units, cost_per_kwh_saved_table = ( - estimate_post_retrofit_heating_hotwater_kwh(properties_df, recommendations_df, scenario_ids) + ( + scenario_comparison_population, + scenario_comparison_retrofitted_units, + cost_per_kwh_saved_table, + ) = estimate_post_retrofit_heating_hotwater_kwh( + properties_df, recommendations_df, scenario_ids ) # Q3: For each scenario, we want to answer what the heating and hot water kwh looks like after retrofit @@ -194,42 +263,55 @@ def slides(): # By property - recommendations_df["type_mapped"] = recommendations_df["type"].copy().replace( - { - "loft_insulation": "roof_insulation", - "room_roof_insulation": "roof_insulation", - "flat_roof_insulation": "roof_insulation", - "hot_water_tank_insulation": "other", - "cylinder_thermostat": "other", - "sealing_open_fireplace": "other", - "suspended_floor_insulation": "floor_insulation", - "solid_floor_insulation": "floor_insulation", - } + recommendations_df["type_mapped"] = ( + recommendations_df["type"] + .copy() + .replace( + { + "loft_insulation": "roof_insulation", + "room_roof_insulation": "roof_insulation", + "flat_roof_insulation": "roof_insulation", + "hot_water_tank_insulation": "other", + "cylinder_thermostat": "other", + "sealing_open_fireplace": "other", + "suspended_floor_insulation": "floor_insulation", + "solid_floor_insulation": "floor_insulation", + } + ) ) recommendations_df["type_mapped"] = np.where( recommendations_df["description"].str.contains("air source heat pump"), "air_source_heat_pump", - recommendations_df["type_mapped"] + recommendations_df["type_mapped"], ) # Group by 'Plan Name' and 'Recommendation Type' and count unique 'Property ID' - recommendation_summary = recommendations_df[recommendations_df["default"] == True].groupby( - ['Scenario ID', 'type_mapped'] - ).agg({ - 'property_id': 'nunique' - }).reset_index() + recommendation_summary = ( + recommendations_df[recommendations_df["default"] == True] + .groupby(["Scenario ID", "type_mapped"]) + .agg({"property_id": "nunique"}) + .reset_index() + ) - recommendation_summary.columns = ['Scenario ID', 'Type Mapped', 'Number of Properties'] + recommendation_summary.columns = [ + "Scenario ID", + "Type Mapped", + "Number of Properties", + ] recommendation_summary["Percentage of Properties"] = 100 * ( recommendation_summary["Number of Properties"] / properties_df["id"].nunique() ) - recommendation_summary_final_scenario = recommendation_summary[recommendation_summary["Scenario ID"].isin([51])] + recommendation_summary_final_scenario = recommendation_summary[ + recommendation_summary["Scenario ID"].isin([51]) + ] # MVP implementation of funding estimation for the most basic scenario, using GBIS - project_scores_matrix = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv") + project_scores_matrix = pd.read_csv( + "/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv" + ) def find_abs(sap_movement, starting_sap, floor_area): starting_band = find_band(starting_sap) @@ -238,7 +320,7 @@ def slides(): return 0 if floor_area <= 72: - floor_area_segment = '0-72' + floor_area_segment = "0-72" elif (floor_area > 72) and (floor_area <= 97): floor_area_segment = "73-97" elif (floor_area > 97) and (floor_area <= 199): @@ -247,26 +329,26 @@ def slides(): floor_area_segment = "200+" return project_scores_matrix[ - (project_scores_matrix["Floor Area Segment"] == floor_area_segment) & - (project_scores_matrix["Starting Band"] == starting_band) & - (project_scores_matrix["Finishing Band"] == finishing_band) - ].squeeze()["Cost Savings"] + (project_scores_matrix["Floor Area Segment"] == floor_area_segment) + & (project_scores_matrix["Starting Band"] == starting_band) + & (project_scores_matrix["Finishing Band"] == finishing_band) + ].squeeze()["Cost Savings"] eco4_scores_sap_table = [ - {'Band': 'High_A', 'From': 96.0, 'Up to': 100.0, 'Mid-point': 98.0}, - {'Band': 'Low_A', 'From': 92.0, 'Up to': 96.0, 'Mid-point': 94.0}, - {'Band': 'High_B', 'From': 86.0, 'Up to': 91.0, 'Mid-point': 88.5}, - {'Band': 'Low_B', 'From': 81.0, 'Up to': 86.0, 'Mid-point': 83.5}, - {'Band': 'High_C', 'From': 74.5, 'Up to': 80.0, 'Mid-point': 77.25}, - {'Band': 'Low_C', 'From': 69.0, 'Up to': 74.5, 'Mid-point': 71.75}, - {'Band': 'High_D', 'From': 61.5, 'Up to': 68.0, 'Mid-point': 64.75}, - {'Band': 'Low_D', 'From': 55.0, 'Up to': 61.5, 'Mid-point': 58.25}, - {'Band': 'High_E', 'From': 46.5, 'Up to': 54.0, 'Mid-point': 50.25}, - {'Band': 'Low_E', 'From': 39.0, 'Up to': 46.5, 'Mid-point': 42.75}, - {'Band': 'High_F', 'From': 29.5, 'Up to': 38.0, 'Mid-point': 33.75}, - {'Band': 'Low_F', 'From': 21.0, 'Up to': 29.5, 'Mid-point': 25.25}, - {'Band': 'High_G', 'From': 10.5, 'Up to': 20.0, 'Mid-point': 15.25}, - {'Band': 'Low_G', 'From': 1.0, 'Up to': 10.5, 'Mid-point': 5.75} + {"Band": "High_A", "From": 96.0, "Up to": 100.0, "Mid-point": 98.0}, + {"Band": "Low_A", "From": 92.0, "Up to": 96.0, "Mid-point": 94.0}, + {"Band": "High_B", "From": 86.0, "Up to": 91.0, "Mid-point": 88.5}, + {"Band": "Low_B", "From": 81.0, "Up to": 86.0, "Mid-point": 83.5}, + {"Band": "High_C", "From": 74.5, "Up to": 80.0, "Mid-point": 77.25}, + {"Band": "Low_C", "From": 69.0, "Up to": 74.5, "Mid-point": 71.75}, + {"Band": "High_D", "From": 61.5, "Up to": 68.0, "Mid-point": 64.75}, + {"Band": "Low_D", "From": 55.0, "Up to": 61.5, "Mid-point": 58.25}, + {"Band": "High_E", "From": 46.5, "Up to": 54.0, "Mid-point": 50.25}, + {"Band": "Low_E", "From": 39.0, "Up to": 46.5, "Mid-point": 42.75}, + {"Band": "High_F", "From": 29.5, "Up to": 38.0, "Mid-point": 33.75}, + {"Band": "Low_F", "From": 21.0, "Up to": 29.5, "Mid-point": 25.25}, + {"Band": "High_G", "From": 10.5, "Up to": 20.0, "Mid-point": 15.25}, + {"Band": "Low_G", "From": 1.0, "Up to": 10.5, "Mid-point": 5.75}, ] eco4_scores_sap_table = pd.DataFrame(eco4_scores_sap_table) @@ -274,8 +356,9 @@ def slides(): # Iterate through each row in the DataFrame to find the correct band value_floored = np.floor(value) return eco4_scores_sap_table[ - (eco4_scores_sap_table["From"] <= value_floored) & (eco4_scores_sap_table["Up to"] >= value_floored) - ].squeeze()["Band"] + (eco4_scores_sap_table["From"] <= value_floored) + & (eco4_scores_sap_table["Up to"] >= value_floored) + ].squeeze()["Band"] def identify_funding_measure(p, p_recs, is_social): measures = ["cavity_wall_insulation", "loft_insulation"] @@ -287,15 +370,17 @@ def slides(): project_abs = find_abs( sap_movement=funding_measure["sap_points"], starting_sap=p["current_sap_points"], - floor_area=p["total_floor_area"] + floor_area=p["total_floor_area"], + ) + property_abs.append( + { + "property_id": p["property_id"], + "measure": funding_measure["type"], + "cost": funding_measure["estimated_cost"], + "abs": project_abs, + "is_social": is_social, + } ) - property_abs.append({ - "property_id": p["property_id"], - "measure": funding_measure["type"], - "cost": funding_measure["estimated_cost"], - "abs": project_abs, - "is_social": is_social - }) if not property_abs: return None @@ -351,7 +436,9 @@ def slides(): band_b_proportion = 0.195 band_c_proportion = 0.219 band_d_proportion = 0.156 - a_to_d_proportion = band_a_proportion + band_b_proportion + band_c_proportion + band_d_proportion + a_to_d_proportion = ( + band_a_proportion + band_b_proportion + band_c_proportion + band_d_proportion + ) benefits_proportion = 0.51 @@ -360,20 +447,26 @@ def slides(): # We scale the private funding based on these two factors private_funding_scaled = private_funding * benefits_proportion * a_to_d_proportion - n_private_projects = np.round((~funding["is_social"]).sum() * benefits_proportion * a_to_d_proportion) + n_private_projects = np.round( + (~funding["is_social"]).sum() * benefits_proportion * a_to_d_proportion + ) # Look at the impact of EWI for scenario ewi_jobs = recommendations_df[ - (recommendations_df["Scenario ID"] == 49) & (recommendations_df["type"] == "external_wall_insulation") - ] + (recommendations_df["Scenario ID"] == 49) + & (recommendations_df["type"] == "external_wall_insulation") + ] ewi_jobs["estimated_cost"].sum() has_cavity = recommendations_df[ - (recommendations_df["type"] == "cavity_wall_insulation") & (recommendations_df["Scenario ID"] == 47) - ] + (recommendations_df["type"] == "cavity_wall_insulation") + & (recommendations_df["Scenario ID"] == 47) + ] # Take the some properties in this - cavity_units = properties_df[properties_df["property_id"].isin(has_cavity["property_id"].values)] + cavity_units = properties_df[ + properties_df["property_id"].isin(has_cavity["property_id"].values) + ] cavity_units[cavity_units.index == 3][["uprn", "property_id"]] @@ -381,41 +474,52 @@ def slides(): # Recommenation type by kwh savings per unit recommendations_final_scenario = recommendations_df[ - recommendations_df["Scenario ID"].isin([51]) & - (recommendations_df["default"] == True) - ].copy() + recommendations_df["Scenario ID"].isin([51]) + & (recommendations_df["default"] == True) + ].copy() # Merge on floor area recommendations_final_scenario = recommendations_final_scenario.merge( properties_df[["property_id", "total_floor_area"]], on="property_id", how="left" ) recommendations_final_scenario = recommendations_final_scenario[ - ~pd.isnull(recommendations_final_scenario["total_floor_area"])] - recommendations_final_scenario["kwh_savings_per_unit"] = recommendations_final_scenario["kwh_savings"] / \ - recommendations_final_scenario["total_floor_area"] - - recommendations_final_scenario["type_mapped2"] = recommendations_df["type"].copy().replace( - { - "room_roof_insulation": "roof_insulation", - "flat_roof_insulation": "roof_insulation", - "hot_water_tank_insulation": "other", - "cylinder_thermostat": "other", - "sealing_open_fireplace": "other", - "suspended_floor_insulation": "floor_insulation", - "solid_floor_insulation": "floor_insulation", - } + ~pd.isnull(recommendations_final_scenario["total_floor_area"]) + ] + recommendations_final_scenario["kwh_savings_per_unit"] = ( + recommendations_final_scenario["kwh_savings"] + / recommendations_final_scenario["total_floor_area"] ) - aggs = recommendations_final_scenario.groupby("type_mapped")[ - ["kwh_savings_per_unit", "estimated_cost"]].mean().reset_index().sort_values( - "kwh_savings_per_unit", ascending=False + recommendations_final_scenario["type_mapped2"] = ( + recommendations_df["type"] + .copy() + .replace( + { + "room_roof_insulation": "roof_insulation", + "flat_roof_insulation": "roof_insulation", + "hot_water_tank_insulation": "other", + "cylinder_thermostat": "other", + "sealing_open_fireplace": "other", + "suspended_floor_insulation": "floor_insulation", + "solid_floor_insulation": "floor_insulation", + } + ) + ) + + aggs = ( + recommendations_final_scenario.groupby("type_mapped")[ + ["kwh_savings_per_unit", "estimated_cost"] + ] + .mean() + .reset_index() + .sort_values("kwh_savings_per_unit", ascending=False) ) aggs["cost_per_kwh_saved"] = aggs["estimated_cost"] / aggs["kwh_savings_per_unit"] # Show more columns with pandas - pd.set_option('display.max_columns', None) + pd.set_option("display.max_columns", None) # Show more rows with pandas - pd.set_option('display.max_rows', None) + pd.set_option("display.max_rows", None) # Show more characters in a column - pd.set_option('display.max_colwidth', None) + pd.set_option("display.max_colwidth", None) def lewes_outputs(): @@ -427,12 +531,14 @@ def lewes_outputs(): """ # get the asset list - asset_list = read_csv_from_s3(bucket_name="retrofit-plan-inputs-dev", filepath="8/90/pilot.csv") + asset_list = read_csv_from_s3( + bucket_name="retrofit-plan-inputs-dev", filepath="8/90/pilot.csv" + ) asset_list = pd.DataFrame(asset_list) # Get non-invasive recommendations non_intrusive_recommendations = read_csv_from_s3( bucket_name="retrofit-plan-inputs-dev", - filepath="8/90/non_invasive_recommendations.csv" + filepath="8/90/non_invasive_recommendations.csv", ) non_intrusive_recommendations = pd.DataFrame(non_intrusive_recommendations) @@ -440,20 +546,21 @@ def lewes_outputs(): portfolio_id = 90 # Look at one scenario at a time, otherwise this is agony scenario_ids = [47, 48, 49, 50, 51] - properties_data, plans_data, recommendations_data = get_data(portfolio_id, scenario_ids) + properties_data, plans_data, recommendations_data = get_data( + portfolio_id, scenario_ids + ) properties_df = pd.DataFrame(properties_data) recommendations_df = pd.DataFrame(recommendations_data) # Unnest this import ast + survey_recs = [] for _, row in non_intrusive_recommendations.iterrows(): recs = ast.literal_eval(row["recommendations"]) ashp_rec = next((r for r in recs if r["type"] == "air_source_heat_pump"), None) solar_rec = next((r for r in recs if r["type"] == "solar_pv"), None) - to_append = { - "uprn": row["uprn"] - } + to_append = {"uprn": row["uprn"]} if ashp_rec["suitable"]: to_append = { **to_append, @@ -479,44 +586,57 @@ def lewes_outputs(): domna_kwh = 10850 scaling_factor = vital_kwh / domna_kwh - next_gen_dataset = properties_df[[ - "uprn", "address", "postcode", - "property_type", "built_form", "current_energy_demand_heating_hotwater", - "mainfuel", "total_floor_area", "floor_height" - ]].rename( - columns={ - "mainfuel": "primary_fuel_type", - "total_floor_area": "gross_floor_area", - "current_energy_demand_heating_hotwater": "estimated_heating_hotwater_kwh" - } - ).merge( - asset_list[["uprn", "number_of_floors"]], - how="left", - on="uprn" - ).merge( - survey_recs, - how="left", - on="uprn" + next_gen_dataset = ( + properties_df[ + [ + "uprn", + "address", + "postcode", + "property_type", + "built_form", + "current_energy_demand_heating_hotwater", + "mainfuel", + "total_floor_area", + "floor_height", + ] + ] + .rename( + columns={ + "mainfuel": "primary_fuel_type", + "total_floor_area": "gross_floor_area", + "current_energy_demand_heating_hotwater": "estimated_heating_hotwater_kwh", + } + ) + .merge(asset_list[["uprn", "number_of_floors"]], how="left", on="uprn") + .merge(survey_recs, how="left", on="uprn") ) next_gen_dataset["estimated_heating_hotwater_kwh_scaled"] = ( next_gen_dataset["estimated_heating_hotwater_kwh"] * scaling_factor ) next_gen_dataset["ashp_suitable"] = next_gen_dataset["ashp_suitable"].fillna(False) - next_gen_dataset["solar_suitable"] = next_gen_dataset["solar_suitable"].fillna(False) + next_gen_dataset["solar_suitable"] = next_gen_dataset["solar_suitable"].fillna( + False + ) # We prepare the scenario outputs by property type grouped_data = next_gen_dataset.copy() grouped_data["property_sub_type"] = grouped_data["built_form"].copy() # If a property is a flat, re-map sub_type just to flat - grouped_data.loc[grouped_data["property_type"] == "Flat", "property_sub_type"] = "Flat" + grouped_data.loc[grouped_data["property_type"] == "Flat", "property_sub_type"] = ( + "Flat" + ) # Same for maisonettes - grouped_data.loc[grouped_data["property_type"] == "Maisonette", "property_sub_type"] = "Maisonette" + grouped_data.loc[ + grouped_data["property_type"] == "Maisonette", "property_sub_type" + ] = "Maisonette" # We now pull out the recommendations impact by property type and sub type # Exclude sealing open fireplaces - recommendations_df = recommendations_df[recommendations_df["type"] != "sealing_open_fireplace"] + recommendations_df = recommendations_df[ + recommendations_df["type"] != "sealing_open_fireplace" + ] # We update the type column so that if type == heating, and the description contains "air source heat pump", # the type is "air_source_heat_pump", else if the description contains "high heat retention storage heaters", else @@ -532,108 +652,130 @@ def lewes_outputs(): np.where( recommendations_df["description"].str.contains("condensing boiler"), "Boiler Upgrade", - recommendations_df["type"] - ) - ) + recommendations_df["type"], + ), + ), ), - recommendations_df["type"] + recommendations_df["type"], ) recommendation_types = recommendations_df["type"].unique().tolist() rename_dict = { - 'hot_water_tank_insulation': 'Hot Water Tank Insulation', - 'windows_glazing': 'Windows Glazing', - 'secondary_heating': 'Secondary Heating', - 'cavity_wall_insulation': 'Cavity Wall Insulation', - 'flat_roof_insulation': 'Flat Roof Insulation', - 'mechanical_ventilation': 'Mechanical Ventilation', - 'loft_insulation': 'Loft Insulation', - 'cylinder_thermostat': 'Cylinder Thermostat', - 'room_roof_insulation': 'Room Roof Insulation', - 'low_energy_lighting': 'Low Energy Lighting', - 'external_wall_insulation': 'External Wall Insulation', - 'solar_pv': 'Solar PV', - 'heating_control': 'Heating Control', - 'solid_floor_insulation': 'Solid Floor Insulation', - 'suspended_floor_insulation': 'Suspended Floor Insulation', - 'internal_wall_insulation': 'Internal Wall Insulation' + "hot_water_tank_insulation": "Hot Water Tank Insulation", + "windows_glazing": "Windows Glazing", + "secondary_heating": "Secondary Heating", + "cavity_wall_insulation": "Cavity Wall Insulation", + "flat_roof_insulation": "Flat Roof Insulation", + "mechanical_ventilation": "Mechanical Ventilation", + "loft_insulation": "Loft Insulation", + "cylinder_thermostat": "Cylinder Thermostat", + "room_roof_insulation": "Room Roof Insulation", + "low_energy_lighting": "Low Energy Lighting", + "external_wall_insulation": "External Wall Insulation", + "solar_pv": "Solar PV", + "heating_control": "Heating Control", + "solid_floor_insulation": "Solid Floor Insulation", + "suspended_floor_insulation": "Suspended Floor Insulation", + "internal_wall_insulation": "Internal Wall Insulation", } property_scenario_impact = [] for scenario_id in tqdm(scenario_ids): # Get the recommendations for the scenario, default scenario_recommendations = recommendations_df[ - (recommendations_df["Scenario ID"] == scenario_id) & - (recommendations_df["default"] == True) - ].copy() + (recommendations_df["Scenario ID"] == scenario_id) + & (recommendations_df["default"] == True) + ].copy() - scenario_recommendations['Estimated Lighting kWh Savings'] = scenario_recommendations.apply( - lambda x: x['kwh_savings'] if x['type'] == 'low_energy_lighting' else 0, - axis=1) - scenario_recommendations['Estimated Solar kWh Savings'] = scenario_recommendations.apply( - lambda x: x['kwh_savings'] if x['type'] == 'solar_pv' else 0, axis=1) + scenario_recommendations["Estimated Lighting kWh Savings"] = ( + scenario_recommendations.apply( + lambda x: x["kwh_savings"] if x["type"] == "low_energy_lighting" else 0, + axis=1, + ) + ) + scenario_recommendations["Estimated Solar kWh Savings"] = ( + scenario_recommendations.apply( + lambda x: x["kwh_savings"] if x["type"] == "solar_pv" else 0, axis=1 + ) + ) # Set 'Estimated Kwh Savings' to zero where specific kwh columns are used - scenario_recommendations['Estimated Heating Demand kWh Savings'] = scenario_recommendations.apply( - lambda x: 0 if x['type'] in ['low_energy_lighting', 'solar_pv'] else x[ - 'kwh_savings'], axis=1) + scenario_recommendations["Estimated Heating Demand kWh Savings"] = ( + scenario_recommendations.apply( + lambda x: ( + 0 + if x["type"] in ["low_energy_lighting", "solar_pv"] + else x["kwh_savings"] + ), + axis=1, + ) + ) - scenario_grouped_data = scenario_recommendations.groupby(['property_id']).agg({ - 'Estimated Heating Demand kWh Savings': 'sum', - 'Estimated Lighting kWh Savings': 'sum', - 'Estimated Solar kWh Savings': 'sum', - "estimated_cost": "sum" - }).reset_index() + scenario_grouped_data = ( + scenario_recommendations.groupby(["property_id"]) + .agg( + { + "Estimated Heating Demand kWh Savings": "sum", + "Estimated Lighting kWh Savings": "sum", + "Estimated Solar kWh Savings": "sum", + "estimated_cost": "sum", + } + ) + .reset_index() + ) comparison = properties_df.drop_duplicates()[ ["uprn", "property_id", "current_energy_demand_heating_hotwater"] - ].merge( - scenario_grouped_data, on=["property_id"], how="left" - ) - comparison["Estimated Heating Demand kWh Savings"] = ( - comparison["Estimated Heating Demand kWh Savings"].fillna(0) - ) - comparison["Estimated Lighting kWh Savings"] = ( - comparison["Estimated Lighting kWh Savings"].fillna(0) - ) - comparison["Estimated Solar kWh Savings"] = ( - comparison["Estimated Solar kWh Savings"].fillna(0) - ) + ].merge(scenario_grouped_data, on=["property_id"], how="left") + comparison["Estimated Heating Demand kWh Savings"] = comparison[ + "Estimated Heating Demand kWh Savings" + ].fillna(0) + comparison["Estimated Lighting kWh Savings"] = comparison[ + "Estimated Lighting kWh Savings" + ].fillna(0) + comparison["Estimated Solar kWh Savings"] = comparison[ + "Estimated Solar kWh Savings" + ].fillna(0) comparison["estimated_cost"] = comparison["estimated_cost"].fillna(0) comparison["post_scenario_heating_hotwater_kwh"] = ( - comparison["current_energy_demand_heating_hotwater"] - comparison["Estimated Heating Demand kWh Savings"] + comparison["current_energy_demand_heating_hotwater"] + - comparison["Estimated Heating Demand kWh Savings"] ) # For each scenario, we create a measure matrix measure_matrix = scenario_recommendations.pivot_table( - index='property_id', - columns='type', - values='id', # Using 'id' just as a placeholder for the pivot + index="property_id", + columns="type", + values="id", # Using 'id' just as a placeholder for the pivot aggfunc=lambda x: True, # If an ID exists for a given type, mark as True - fill_value=False # Fill other entries as False + fill_value=False, # Fill other entries as False ).reset_index() non_zero_heat_demand_impact = comparison[ - (comparison["Estimated Heating Demand kWh Savings"] > 0) | - (comparison["Estimated Lighting kWh Savings"] > 0) | - (comparison["Estimated Solar kWh Savings"] > 0) - ] + (comparison["Estimated Heating Demand kWh Savings"] > 0) + | (comparison["Estimated Lighting kWh Savings"] > 0) + | (comparison["Estimated Solar kWh Savings"] > 0) + ] measure_matrix = measure_matrix[ - measure_matrix["property_id"].isin(non_zero_heat_demand_impact["property_id"].values) + measure_matrix["property_id"].isin( + non_zero_heat_demand_impact["property_id"].values + ) ] measure_matrix = measure_matrix.rename(columns=rename_dict) - comparison = comparison.merge( - measure_matrix, on="property_id", how="left" - ) + comparison = comparison.merge(measure_matrix, on="property_id", how="left") comparison["scenario_id"] = scenario_id property_scenario_impact.append(comparison) property_scenario_impact = pd.concat(property_scenario_impact) # property_scenario_impact = property_scenario_impact.drop(columns=["property_id", "Estimated Kwh Savings"]) - for v in list(rename_dict.values()) + ["Air Source Heat Pump", "High Heat Retention Storage", "Boiler Upgrade"]: + for v in list(rename_dict.values()) + [ + "Air Source Heat Pump", + "High Heat Retention Storage", + "Boiler Upgrade", + ]: # Fill NaNs with False property_scenario_impact[v] = property_scenario_impact[v].fillna(False) @@ -642,18 +784,22 @@ def lewes_outputs(): property_scenario_impact["post_scenario_heating_hotwater_kwh"] * scaling_factor ) - grouped_data = grouped_data.merge( - property_scenario_impact, how="left", on="uprn" - ) + grouped_data = grouped_data.merge(property_scenario_impact, how="left", on="uprn") # Agg the data - grouped_data = grouped_data.groupby(["property_type", "property_sub_type", "scenario_id"]).agg({ - "estimated_heating_hotwater_kwh": "mean", - "estimated_heating_hotwater_kwh_scaled": "mean", - "estimated_cost": "mean", - "post_scenario_heating_hotwater_kwh": "mean", - "post_scenario_heating_hotwater_kwh_scaled": "mean" - }).reset_index() + grouped_data = ( + grouped_data.groupby(["property_type", "property_sub_type", "scenario_id"]) + .agg( + { + "estimated_heating_hotwater_kwh": "mean", + "estimated_heating_hotwater_kwh_scaled": "mean", + "estimated_cost": "mean", + "post_scenario_heating_hotwater_kwh": "mean", + "post_scenario_heating_hotwater_kwh_scaled": "mean", + } + ) + .reset_index() + ) scenario_names = pd.DataFrame( [ @@ -665,45 +811,40 @@ def lewes_outputs(): "scenario_id": 48, "scenario": "Demand reduction – no solid wall, floors or heating/renewables", }, - { - "scenario_id": 49, - "scenario": "Demand reduction – no decant" - }, + {"scenario_id": 49, "scenario": "Demand reduction – no decant"}, { "scenario_id": 50, "scenario": "Demand reduction – no decant + heating & solar", }, - { - "scenario_id": 51, - "scenario": "Whole house retrofit" - } + {"scenario_id": 51, "scenario": "Whole house retrofit"}, ] - ) - grouped_data = grouped_data.merge( - scenario_names, how="left", on="scenario_id" - ) + grouped_data = grouped_data.merge(scenario_names, how="left", on="scenario_id") if not grouped_data[ - grouped_data["estimated_heating_hotwater_kwh"] < grouped_data["post_scenario_heating_hotwater_kwh"]].empty: + grouped_data["estimated_heating_hotwater_kwh"] + < grouped_data["post_scenario_heating_hotwater_kwh"] + ].empty: raise Exception("someting went wrong") - if not grouped_data[grouped_data["estimated_heating_hotwater_kwh_scaled"] < grouped_data[ - "post_scenario_heating_hotwater_kwh_scaled"]].empty: + if not grouped_data[ + grouped_data["estimated_heating_hotwater_kwh_scaled"] + < grouped_data["post_scenario_heating_hotwater_kwh_scaled"] + ].empty: raise Exception("someting went wrong") # Reorder the columns grouped_data = grouped_data[ [ - 'property_type', - 'property_sub_type', - 'scenario', - 'estimated_heating_hotwater_kwh', - 'post_scenario_heating_hotwater_kwh', - 'estimated_heating_hotwater_kwh_scaled', - 'post_scenario_heating_hotwater_kwh_scaled', - 'estimated_cost', + "property_type", + "property_sub_type", + "scenario", + "estimated_heating_hotwater_kwh", + "post_scenario_heating_hotwater_kwh", + "estimated_heating_hotwater_kwh_scaled", + "post_scenario_heating_hotwater_kwh_scaled", + "estimated_cost", ] ] @@ -730,9 +871,7 @@ def lewes_outputs(): scenario_names, how="left", on="scenario_id" ) - lewes_data = next_gen_dataset.merge( - property_scenario_impact, how="left", on="uprn" - ) + lewes_data = next_gen_dataset.merge(property_scenario_impact, how="left", on="uprn") lewes_data = lewes_data.sort_values( ["postcode", "uprn", "scenario_id"], ascending=True @@ -742,31 +881,52 @@ def lewes_outputs(): # TODO - remap the heating type lewes_data = lewes_data[ [ - 'uprn', 'address', 'postcode', 'property_type', 'built_form', + "uprn", + "address", + "postcode", + "property_type", + "built_form", # 'estimated_heating_hotwater_kwh', - 'primary_fuel_type', 'gross_floor_area', 'floor_height', 'number_of_floors', 'ashp_suitable', - 'ashp_size_kw', - 'ashp_cost', 'solar_suitable', 'solar_size_kwp', 'solar_cost', - 'scenario', - 'estimated_heating_hotwater_kwh_scaled', - 'post_scenario_heating_hotwater_kwh_scaled', + "primary_fuel_type", + "gross_floor_area", + "floor_height", + "number_of_floors", + "ashp_suitable", + "ashp_size_kw", + "ashp_cost", + "solar_suitable", + "solar_size_kwp", + "solar_cost", + "scenario", + "estimated_heating_hotwater_kwh_scaled", + "post_scenario_heating_hotwater_kwh_scaled", # 'property_id', - dropped # 'current_energy_demand_heating_hotwater', - 'Estimated Heating Demand kWh Savings', - 'Estimated Lighting kWh Savings', - 'Estimated Solar kWh Savings', - 'estimated_cost', - 'post_scenario_heating_hotwater_kwh', 'Cavity Wall Insulation', 'Cylinder Thermostat', - 'Flat Roof Insulation', - 'Hot Water Tank Insulation', 'Loft Insulation', 'Mechanical Ventilation', 'Room Roof Insulation', + "Estimated Heating Demand kWh Savings", + "Estimated Lighting kWh Savings", + "Estimated Solar kWh Savings", + "estimated_cost", + "post_scenario_heating_hotwater_kwh", + "Cavity Wall Insulation", + "Cylinder Thermostat", + "Flat Roof Insulation", + "Hot Water Tank Insulation", + "Loft Insulation", + "Mechanical Ventilation", + "Room Roof Insulation", # 'scenario_id', - dropped - 'Low Energy Lighting', 'Secondary Heating', 'Windows Glazing', 'External Wall Insulation', - 'Heating Control', - 'Solar PV', - 'Air Source Heat Pump', 'Boiler Upgrade', 'High Heat Retention Storage', - 'Internal Wall Insulation', - 'Solid Floor Insulation', - 'Suspended Floor Insulation', + "Low Energy Lighting", + "Secondary Heating", + "Windows Glazing", + "External Wall Insulation", + "Heating Control", + "Solar PV", + "Air Source Heat Pump", + "Boiler Upgrade", + "High Heat Retention Storage", + "Internal Wall Insulation", + "Solid Floor Insulation", + "Suspended Floor Insulation", ] ].rename( columns={ @@ -783,29 +943,34 @@ def lewes_outputs(): # "estimated_heating_hotwater_kwh": "Estimated Heating & Hot Water kwh", "estimated_heating_hotwater_kwh_scaled": "Estimated Heating & Hot Water kwh", "post_scenario_heating_hotwater_kwh_scaled": "Post Scenario Heating & Hot Water kwh", - "estimated_cost": "Estimated Cost of Scenario" + "estimated_cost": "Estimated Cost of Scenario", } ) # We save this dataset, which will be shared with Lewes Council lewes_data.to_csv( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/outputs/Lewes property data.csv", index=False + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/outputs/Lewes property data.csv", + index=False, ) - df_pivot = property_scenario_impact.pivot_table(index='uprn', columns='scenario', - values=['post_scenario_heating_hotwater_kwh', - 'post_scenario_heating_hotwater_kwh_scaled']) + df_pivot = property_scenario_impact.pivot_table( + index="uprn", + columns="scenario", + values=[ + "post_scenario_heating_hotwater_kwh", + "post_scenario_heating_hotwater_kwh_scaled", + ], + ) # Flattening multi-index columns - df_pivot.columns = [f'{col[0]}_{col[1]}' for col in df_pivot.columns] + df_pivot.columns = [f"{col[0]}_{col[1]}" for col in df_pivot.columns] # Reset the index to have a clean dataframe df_pivot.reset_index(inplace=True) - next_gen_dataset = next_gen_dataset.merge( - df_pivot, how="left", on="uprn" - ) + next_gen_dataset = next_gen_dataset.merge(df_pivot, how="left", on="uprn") next_gen_dataset.to_csv( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/outputs/next_gen_dataset.csv", index=False + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/outputs/next_gen_dataset.csv", + index=False, ) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py b/etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py index 68978b08..d86be050 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py @@ -10,6 +10,7 @@ Additionally, we wil find the problematic records and remove them Given we ran an EPC C scenario, we should check how many properties, below EPC C we have, that have no plan or recommendations in case something went wrong """ + import pandas as pd from sqlalchemy.orm import Session from backend.app.db.models.portfolio import PropertyModel @@ -19,8 +20,7 @@ from backend.app.db.connection import db_session def get_uprns_for_portfolio(session: Session, portfolio_id: int) -> list[int]: return [ uprn - for (uprn,) in - session.query(PropertyModel.uprn) + for (uprn,) in session.query(PropertyModel.uprn) .filter(PropertyModel.portfolio_id == portfolio_id) .all() if uprn is not None @@ -34,7 +34,7 @@ with db_session() as session: sal = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model " "data.xlsx", - sheet_name="Standardised Asset List" + sheet_name="Standardised Asset List", ) missed_properties = sal[~sal["epc_os_uprn"].isin(completed_uprns)] @@ -44,7 +44,7 @@ missed_properties.to_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" "d_failed_properties_to_restart_20260102.xlsx", sheet_name="Standardised Asset List", - index=False + index=False, ) # Fixing an error - triggered jobs without removing EWI/IWI so need to delete all plans associated to these scenarios: @@ -52,14 +52,14 @@ scenario_id = None from sqlalchemy import select, func from sqlalchemy.orm import Session -from backend.app.db.models.recommendations import Plan +from backend.app.db.models.recommendations import PlanModel def count_plans_for_scenario(session: Session, scenario_id: int) -> int: return session.execute( select(func.count()) - .select_from(Plan) - .where(Plan.scenario_id == scenario_id) + .select_from(PlanModel) + .where(PlanModel.scenario_id == scenario_id) ).scalar_one() @@ -69,8 +69,7 @@ with db_session() as session: def get_plan_ids_for_scenario(session: Session, scenario_id: int) -> list[int]: result = session.execute( - select(Plan.id) - .where(Plan.scenario_id == scenario_id) + select(PlanModel.id).where(PlanModel.scenario_id == scenario_id) ) return [row.id for row in result] @@ -84,7 +83,7 @@ from sqlalchemy.orm import Session def chunked(iterable, size): for i in range(0, len(iterable), size): - yield iterable[i:i + size] + yield iterable[i : i + size] from sqlalchemy import text @@ -103,12 +102,14 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # recommendation_materials # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM recommendation_materials rm USING plan_recommendations pr WHERE rm.recommendation_id = pr.recommendation_id AND pr.plan_id = ANY(:plan_ids) - """), + """ + ), params, ) @@ -116,10 +117,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # plan_recommendations # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM plan_recommendations WHERE plan_id = ANY(:plan_ids) - """), + """ + ), params, ) @@ -127,14 +130,16 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # recommendations (only those used by these plans) # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM recommendation r WHERE r.id IN ( SELECT DISTINCT recommendation_id FROM plan_recommendations WHERE plan_id = ANY(:plan_ids) ) - """), + """ + ), params, ) @@ -142,10 +147,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # plans LAST # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM plan WHERE id = ANY(:plan_ids) - """), + """ + ), params, ) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/f_diagnostics.py b/etl/customers/peabody/Nov 2025 Consulting Project/f_diagnostics.py index 4b946c60..509c8179 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/f_diagnostics.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/f_diagnostics.py @@ -5,6 +5,7 @@ This includes: # EPC C, there should be a plan 2) If the plan is fabric first, make sure they are actually fabric first """ + import pandas as pd scenario_names = { @@ -33,7 +34,9 @@ for scenario_id, scenario_name in scenario_names.items(): ) # find properties that are below the scenario sap target, but have no recommended measures - df["below_scenario_target"] = df["current_sap_points"] < scenario_sap_targets[scenario_id] + df["below_scenario_target"] = ( + df["current_sap_points"] < scenario_sap_targets[scenario_id] + ) df["no_recommended_measures"] = df["sap_points"] == 0 df["zero_cost"] = df["total_retrofit_cost"] == 0 df["sap_points_above_zero"] = df["sap_points"] > 0 @@ -45,7 +48,9 @@ for scenario_id, scenario_name in scenario_names.items(): ].copy() if scenario_sap_targets[scenario_id] == 81: - problematic_properties = problematic_properties[problematic_properties["property_type"] != "Flat"] + problematic_properties = problematic_properties[ + problematic_properties["property_type"] != "Flat" + ] zero_cost_above_zero_sap = df[ (df["sap_points_above_zero"] & df["zero_cost"]) @@ -61,8 +66,12 @@ for scenario_id, scenario_name in scenario_names.items(): # pd.set_option('display.width', 1000) # problematic_properties.head(len(problematic_properties)) - print(f"We have {len(problematic_properties)} problematic properties for scenario {scenario_name} ({scenario_id})") - print(f"We have {len(zero_cost_above_zero_sap)} zero cost properties for scenario {scenario_name} ({scenario_id})") + print( + f"We have {len(problematic_properties)} problematic properties for scenario {scenario_name} ({scenario_id})" + ) + print( + f"We have {len(zero_cost_above_zero_sap)} zero cost properties for scenario {scenario_name} ({scenario_id})" + ) problems.append(problematic_properties) problems.append(zero_cost_above_zero_sap) @@ -97,12 +106,12 @@ all_problems = all_problems.drop_duplicates(subset=["uprn"]) sal = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model " "data.xlsx", - sheet_name="Standardised Asset List" + sheet_name="Standardised Asset List", ) sal2 = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260105 - additional " "UPRNS.xlsx", - sheet_name="Standardised Asset List" + sheet_name="Standardised Asset List", ) sal = pd.concat([sal, sal2]) @@ -114,7 +123,7 @@ retry.to_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" "d_problematic_properties_to_review_20260106.xlsx", sheet_name="Standardised Asset List", - index=False + index=False, ) # Delete associated plans @@ -126,19 +135,20 @@ uprns = retry["epc_os_uprn"].tolist() from sqlalchemy.orm import Session from backend.app.db.models.portfolio import PropertyModel from backend.app.db.connection import db_session -from backend.app.db.models.recommendations import Plan +from backend.app.db.models.recommendations import PlanModel from sqlalchemy import select, delete from sqlalchemy.exc import NoResultFound from sqlalchemy.orm import sessionmaker -def get_property_ids_for_uprns(session: Session, portfolio_id: int, uprns: list[int]) -> list[int]: +def get_property_ids_for_uprns( + session: Session, portfolio_id: int, uprns: list[int] +) -> list[int]: return [ property.id for property in session.query(PropertyModel) .filter( - PropertyModel.portfolio_id == portfolio_id, - PropertyModel.uprn.in_(uprns) + PropertyModel.portfolio_id == portfolio_id, PropertyModel.uprn.in_(uprns) ) .all() ] @@ -149,15 +159,21 @@ with db_session() as session: # Get all and delete plans for these property IDs -def get_all_plans_for_property_ids(session: Session, property_ids: list[int]) -> list[Plan]: - return session.query(Plan).filter(Plan.property_id.in_(property_ids)).all() +def get_all_plans_for_property_ids( + session: Session, property_ids: list[int] +) -> list[PlanModel]: + return ( + session.query(PlanModel).filter(PlanModel.property_id.in_(property_ids)).all() + ) -def get_ids_of_plans_for_deletion(session: Session, property_ids: list[int]) -> list[int]: +def get_ids_of_plans_for_deletion( + session: Session, property_ids: list[int] +) -> list[int]: return [ plan.id - for plan in session.query(Plan) - .filter(Plan.property_id.in_(property_ids)) + for plan in session.query(PlanModel) + .filter(PlanModel.property_id.in_(property_ids)) .all() ] @@ -168,7 +184,7 @@ with db_session() as session: def chunked(iterable, size): for i in range(0, len(iterable), size): - yield iterable[i:i + size] + yield iterable[i : i + size] from sqlalchemy import text @@ -187,12 +203,14 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # recommendation_materials # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM recommendation_materials rm USING plan_recommendations pr WHERE rm.recommendation_id = pr.recommendation_id AND pr.plan_id = ANY(:plan_ids) - """), + """ + ), params, ) @@ -200,10 +218,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # plan_recommendations # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM plan_recommendations WHERE plan_id = ANY(:plan_ids) - """), + """ + ), params, ) @@ -211,14 +231,16 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # recommendations (only those used by these plans) # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM recommendation r WHERE r.id IN ( SELECT DISTINCT recommendation_id FROM plan_recommendations WHERE plan_id = ANY(:plan_ids) ) - """), + """ + ), params, ) @@ -226,10 +248,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # plans LAST # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM plan WHERE id = ANY(:plan_ids) - """), + """ + ), params, ) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py b/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py index 4405d113..c451938d 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py @@ -2,17 +2,22 @@ import pandas as pd from tqdm import tqdm from sqlalchemy.orm import sessionmaker from backend.app.db.connection import db_engine, db_read_session, db_session -from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, RecommendationMaterials, \ - InstalledMeasure +from backend.app.db.models.recommendations import ( + Recommendation, + PlanModel, + PlanRecommendations, + RecommendationMaterials, + InstalledMeasure, +) from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel from backend.app.utils import sap_to_epc from typing import Dict, List, Set from recommendations.Costs import Costs from backend.app.db.models.portfolio import Epc -pd.set_option('display.max_rows', 500) -pd.set_option('display.max_columns', 500) -pd.set_option('display.width', 1000) +pd.set_option("display.max_rows", 500) +pd.set_option("display.max_columns", 500) +pd.set_option("display.width", 1000) def get_all_data(portfolio_id, scenario_ids): @@ -22,22 +27,26 @@ def get_all_data(portfolio_id, scenario_ids): # -------------------- # Properties # -------------------- - properties_query = session.query( - PropertyModel, - PropertyDetailsEpcModel - ).join( - PropertyDetailsEpcModel, - PropertyModel.id == PropertyDetailsEpcModel.property_id - ).filter( - PropertyModel.portfolio_id == portfolio_id - ).all() + properties_query = ( + session.query(PropertyModel, PropertyDetailsEpcModel) + .join( + PropertyDetailsEpcModel, + PropertyModel.id == PropertyDetailsEpcModel.property_id, + ) + .filter(PropertyModel.portfolio_id == portfolio_id) + .all() + ) properties_data = [ { - **{col.name: getattr(p.PropertyModel, col.name) - for col in PropertyModel.__table__.columns}, - **{col.name: getattr(p.PropertyDetailsEpcModel, col.name) - for col in PropertyDetailsEpcModel.__table__.columns}, + **{ + col.name: getattr(p.PropertyModel, col.name) + for col in PropertyModel.__table__.columns + }, + **{ + col.name: getattr(p.PropertyDetailsEpcModel, col.name) + for col in PropertyDetailsEpcModel.__table__.columns + }, } for p in properties_query ] @@ -45,12 +54,12 @@ def get_all_data(portfolio_id, scenario_ids): # -------------------- # Plans # -------------------- - plans_query = session.query(Plan).filter( - Plan.scenario_id.in_(scenario_ids) - ).all() + plans_query = ( + session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all() + ) plans_data = [ - {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} + {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns} for plan in plans_query ] @@ -59,25 +68,27 @@ def get_all_data(portfolio_id, scenario_ids): # -------------------- # Recommendations (NO materials yet) # -------------------- - recommendations_query = session.query( - Recommendation, - Plan.scenario_id - ).join( - PlanRecommendations, - Recommendation.id == PlanRecommendations.recommendation_id - ).join( - Plan, - Plan.id == PlanRecommendations.plan_id - ).filter( - PlanRecommendations.plan_id.in_(plan_ids), - ).all() + recommendations_query = ( + session.query(Recommendation, PlanModel.scenario_id) + .join( + PlanRecommendations, + Recommendation.id == PlanRecommendations.recommendation_id, + ) + .join(PlanModel, PlanModel.id == PlanRecommendations.plan_id) + .filter( + PlanRecommendations.plan_id.in_(plan_ids), + ) + .all() + ) recommendations_data = [ { - **{col.name: getattr(r.Recommendation, col.name) - for col in Recommendation.__table__.columns}, + **{ + col.name: getattr(r.Recommendation, col.name) + for col in Recommendation.__table__.columns + }, "scenario_id": r.scenario_id, - "materials": [] # placeholder + "materials": [], # placeholder } for r in recommendations_query ] @@ -131,7 +142,7 @@ recommendations_df = pd.read_csv( sustainability_data = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " "- Data Extracts for Domna.xlsx", - sheet_name="Sustainability" + sheet_name="Sustainability", ) sustainability_data_with_sap = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/Parity Data " @@ -140,10 +151,16 @@ sustainability_data_with_sap = pd.read_excel( properties_df["uprn"] = properties_df["uprn"].astype(str) property_data_comparison = properties_df.merge( - sustainability_data, how="inner", left_on="uprn", right_on="UPRN", suffixes=("_prop", "_sust") + sustainability_data, + how="inner", + left_on="uprn", + right_on="UPRN", + suffixes=("_prop", "_sust"), ) -property_data_comparison["wall_type"] = property_data_comparison["walls"].str.split(",").str[0].str.strip() +property_data_comparison["wall_type"] = ( + property_data_comparison["walls"].str.split(",").str[0].str.strip() +) column_pairs = { "built_form": "Attachment", @@ -154,25 +171,28 @@ column_pairs = { combination_tables = {} for v1, v2 in column_pairs.items(): - df = property_data_comparison.groupby([v1, v2]).size().reset_index(name='count') + df = property_data_comparison.groupby([v1, v2]).size().reset_index(name="count") combination_tables[v1] = df # We just need all of the measure types, per property recommendation_measure_types = recommendations_df[ - ["property_id", "measure_type" - , "sap_points", "heat_demand", "kwh_savings", "co2_equivalent_savings", - "energy_cost_savings" - ] + [ + "property_id", + "measure_type", + "sap_points", + "heat_demand", + "kwh_savings", + "co2_equivalent_savings", + "energy_cost_savings", + ] ].drop_duplicates() recommendation_measure_types["flag"] = True # We pivot -recommendations_measures_pivot = recommendation_measure_types[ - ["property_id", "measure_type", "flag"] -].drop_duplicates().pivot( - index='property_id', - columns='measure_type', - values='flag' +recommendations_measures_pivot = ( + recommendation_measure_types[["property_id", "measure_type", "flag"]] + .drop_duplicates() + .pivot(index="property_id", columns="measure_type", values="flag") ) recommendations_measures_pivot = recommendations_measures_pivot.reset_index() @@ -180,137 +200,157 @@ properties_to_recs = properties_df.rename(columns={"solar_pv": "solar_data"}).me recommendations_measures_pivot, how="left", on="property_id" ) -sustainability_data["cavity_wall_insulation"] = sustainability_data["Wall Insulation"].isin( - ["FilledCavity", "FilledCavityPlusInternal", "FilledCavityPlusExternal"] -) -sustainability_data["internal_wall_insulation"] = sustainability_data["Wall Insulation"].isin( - ["Internal", "FilledCavityPlusInternal"] -) -sustainability_data["external_wall_insulation"] = sustainability_data["Wall Insulation"].isin( - ["External", "FilledCavityPlusExternal"] -) +sustainability_data["cavity_wall_insulation"] = sustainability_data[ + "Wall Insulation" +].isin(["FilledCavity", "FilledCavityPlusInternal", "FilledCavityPlusExternal"]) +sustainability_data["internal_wall_insulation"] = sustainability_data[ + "Wall Insulation" +].isin(["Internal", "FilledCavityPlusInternal"]) +sustainability_data["external_wall_insulation"] = sustainability_data[ + "Wall Insulation" +].isin(["External", "FilledCavityPlusExternal"]) sustainability_data["loft_insulation"] = sustainability_data["Roof Insulation"].isin( ["mm300", "mm250", "mm350", "mm400", "mm270"] ) sustainability_data["double_glazing"] = sustainability_data["Glazing"].isin( - ["Double 2002 or later", "Double but age unknown", "Triple", "DoubleKnownData", "Secondary", "TripleKnownData"] + [ + "Double 2002 or later", + "Double but age unknown", + "Triple", + "DoubleKnownData", + "Secondary", + "TripleKnownData", + ] ) sustainability_data["secondary_glazing"] = sustainability_data["Glazing"].isin( ["Secondary"] ) -sustainability_data["suspended_floor_insulation"] = sustainability_data["Floor Insulation"].isin( - ["RetroFitted"] +sustainability_data["suspended_floor_insulation"] = sustainability_data[ + "Floor Insulation" +].isin(["RetroFitted"]) + +sustainability_data["boiler_upgrade"] = sustainability_data["Heating"].isin( + ["Boilers"] +) & sustainability_data["Boiler Efficiency"].isin(["A"]) +sustainability_data["air_source_heat_pump"] = sustainability_data["Heating"].isin( + ["Heat pumps (wet)"] ) -sustainability_data["boiler_upgrade"] = ( - sustainability_data["Heating"].isin(["Boilers"]) & sustainability_data["Boiler Efficiency"].isin(["A"]) -) -sustainability_data["air_source_heat_pump"] = (sustainability_data["Heating"].isin(["Heat pumps (wet)"])) +sustainability_data["time_temperature_zone_control"] = sustainability_data[ + "Controls Adequacy" +].isin(["Top Spec"]) -sustainability_data["time_temperature_zone_control"] = ( - sustainability_data["Controls Adequacy"].isin(["Top Spec"]) -) - -sustainability_data["roomstat_programmer_trvs"] = ( - sustainability_data["Controls Adequacy"].isin(["Optimal"]) -) +sustainability_data["roomstat_programmer_trvs"] = sustainability_data[ + "Controls Adequacy" +].isin(["Optimal"]) sustainability_data["flat_roof_insulation"] = ( - (sustainability_data["Roof Construction"] == "Flat") & - (sustainability_data["Roof Insulation"].isin(["mm50", "mm150", "mm100"])) -) + sustainability_data["Roof Construction"] == "Flat" +) & (sustainability_data["Roof Insulation"].isin(["mm50", "mm150", "mm100"])) properties_to_recs["uprn"] = properties_to_recs["uprn"].astype(str) comparison = sustainability_data.merge( properties_to_recs[ - ["uprn", "cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation", "loft_insulation", - "double_glazing", "secondary_glazing", "suspended_floor_insulation", "boiler_upgrade", "air_source_heat_pump", - "time_temperature_zone_control", "roomstat_programmer_trvs", "flat_roof_insulation", "room_roof_insulation" - ] + [ + "uprn", + "cavity_wall_insulation", + "external_wall_insulation", + "internal_wall_insulation", + "loft_insulation", + "double_glazing", + "secondary_glazing", + "suspended_floor_insulation", + "boiler_upgrade", + "air_source_heat_pump", + "time_temperature_zone_control", + "roomstat_programmer_trvs", + "flat_roof_insulation", + "room_roof_insulation", + ] ], left_on="UPRN", right_on="uprn", how="left", - suffixes=("", "_from_recs") + suffixes=("", "_from_recs"), ) # Flag entries where we've been told that walls are already insulated, but we have recommendations for wall insulation # ------------ Walls ------------ cwi_conflicting = comparison[ - (comparison["cavity_wall_insulation"]) & - (pd.isnull(comparison["cavity_wall_insulation_from_recs"]) == False) - ].copy() + (comparison["cavity_wall_insulation"]) + & (pd.isnull(comparison["cavity_wall_insulation_from_recs"]) == False) +].copy() cwi_conflicting["conflict_cavity_wall_insulation"] = True iwi_conflicting = comparison[ - (comparison["internal_wall_insulation"]) & - (pd.isnull(comparison["internal_wall_insulation_from_recs"]) == False) - ].copy() + (comparison["internal_wall_insulation"]) + & (pd.isnull(comparison["internal_wall_insulation_from_recs"]) == False) +].copy() iwi_conflicting["conflict_iwi_wall_insulation"] = True ewi_conflicting = comparison[ - (comparison["external_wall_insulation"]) & - (pd.isnull(comparison["external_wall_insulation_from_recs"]) == False) - ].copy() + (comparison["external_wall_insulation"]) + & (pd.isnull(comparison["external_wall_insulation_from_recs"]) == False) +].copy() ewi_conflicting["conflict_ewi_wall_insulation"] = True # ------------ Roof ------------ loft_conflicting = comparison[ - (comparison["loft_insulation"]) & - (pd.isnull(comparison["loft_insulation_from_recs"]) == False) - ].copy() + (comparison["loft_insulation"]) + & (pd.isnull(comparison["loft_insulation_from_recs"]) == False) +].copy() loft_conflicting["conflict_loft_insulation"] = True # ------------ Windows ------------ double_glazing_conflicting = comparison[ - (comparison["double_glazing"] | comparison["secondary_glazing"]) & - (pd.isnull(comparison["double_glazing_from_recs"]) == False) & - (pd.isnull(comparison["secondary_glazing_from_recs"]) == True) - ].copy() + (comparison["double_glazing"] | comparison["secondary_glazing"]) + & (pd.isnull(comparison["double_glazing_from_recs"]) == False) + & (pd.isnull(comparison["secondary_glazing_from_recs"]) == True) +].copy() double_glazing_conflicting["conflict_double_glazing"] = True secondary_glazing_conflicting = comparison[ - (comparison["secondary_glazing"]) & - (pd.isnull(comparison["secondary_glazing_from_recs"]) == False) - ].copy() + (comparison["secondary_glazing"]) + & (pd.isnull(comparison["secondary_glazing_from_recs"]) == False) +].copy() secondary_glazing_conflicting["conflict_secondary_glazing"] = True # ------------ Floors ------------ floors_conflicting = comparison[ - (comparison["suspended_floor_insulation"]) & - (pd.isnull(comparison["suspended_floor_insulation_from_recs"]) == False) - ].copy() + (comparison["suspended_floor_insulation"]) + & (pd.isnull(comparison["suspended_floor_insulation_from_recs"]) == False) +].copy() floors_conflicting["conflict_suspended_floor_insulation"] = True # ------------ Boiler Upgrade ------------ boiler_conflicting = comparison[ - (comparison["boiler_upgrade"]) & - (pd.isnull(comparison["boiler_upgrade_from_recs"]) == False) - ].copy() + (comparison["boiler_upgrade"]) + & (pd.isnull(comparison["boiler_upgrade_from_recs"]) == False) +].copy() boiler_conflicting["conflict_boiler_upgrade"] = True # ------------ ASHP ------------ ashp_conflicting = comparison[ - (comparison["air_source_heat_pump"]) & - (pd.isnull(comparison["air_source_heat_pump_from_recs"]) == False) - ].copy() + (comparison["air_source_heat_pump"]) + & (pd.isnull(comparison["air_source_heat_pump_from_recs"]) == False) +].copy() ashp_conflicting["conflict_air_source_heat_pump"] = True # ------------ heat controls ------------ ttzc_conflicting = comparison[ - (comparison["time_temperature_zone_control"]) & - (pd.isnull(comparison["time_temperature_zone_control_from_recs"]) == False) - ].copy() + (comparison["time_temperature_zone_control"]) + & (pd.isnull(comparison["time_temperature_zone_control_from_recs"]) == False) +].copy() ttzc_conflicting["conflict_time_temperature_zone_control"] = True rst_conflicting = comparison[ - (comparison["roomstat_programmer_trvs"]) & - (pd.isnull(comparison["roomstat_programmer_trvs_from_recs"]) == False) - ].copy() + (comparison["roomstat_programmer_trvs"]) + & (pd.isnull(comparison["roomstat_programmer_trvs_from_recs"]) == False) +].copy() rst_conflicting["conflict_roomstat_programmer_trvs"] = True # ------------ Flat Roof Insulation ----------- flat_roof_conflicting = comparison[ - (comparison["flat_roof_insulation"]) & - (pd.isnull(comparison["flat_roof_insulation_from_recs"]) == False) - ].copy() + (comparison["flat_roof_insulation"]) + & (pd.isnull(comparison["flat_roof_insulation_from_recs"]) == False) +].copy() flat_roof_conflicting["conflict_flat_roof_insulation"] = True # All properties with conflicts @@ -327,22 +367,26 @@ all_conflicts = pd.concat( ashp_conflicting, ttzc_conflicting, rst_conflicting, - flat_roof_conflicting + flat_roof_conflicting, ] ) all_conflicts = all_conflicts[ [ "uprn", - 'conflict_cavity_wall_insulation', - 'conflict_iwi_wall_insulation', - 'conflict_ewi_wall_insulation', - 'conflict_loft_insulation', - 'conflict_double_glazing', - 'conflict_secondary_glazing', - 'conflict_suspended_floor_insulation', 'conflict_boiler_upgrade', - 'conflict_air_source_heat_pump', - 'conflict_time_temperature_zone_control', 'conflict_roomstat_programmer_trvs', 'conflict_flat_roof_insulation'] + "conflict_cavity_wall_insulation", + "conflict_iwi_wall_insulation", + "conflict_ewi_wall_insulation", + "conflict_loft_insulation", + "conflict_double_glazing", + "conflict_secondary_glazing", + "conflict_suspended_floor_insulation", + "conflict_boiler_upgrade", + "conflict_air_source_heat_pump", + "conflict_time_temperature_zone_control", + "conflict_roomstat_programmer_trvs", + "conflict_flat_roof_insulation", + ] ] all_conflicts = all_conflicts.rename( @@ -358,31 +402,29 @@ all_conflicts = all_conflicts.rename( "conflict_air_source_heat_pump": "air_source_heat_pump", "conflict_time_temperature_zone_control": "time_temperature_zone_control", "conflict_roomstat_programmer_trvs": "roomstat_programmer_trvs", - "conflict_flat_roof_insulation": "flat_roof_insulation" - + "conflict_flat_roof_insulation": "flat_roof_insulation", } ) # Reshape by UPRN by melting all_conflicts = all_conflicts.melt( - id_vars=["uprn"], - var_name="measure_type", - value_name="already_installed" + id_vars=["uprn"], var_name="measure_type", value_name="already_installed" ) -recommendations_df["property_id"] = recommendations_df["property_id"].astype(int).astype(str) +recommendations_df["property_id"] = ( + recommendations_df["property_id"].astype(int).astype(str) +) properties_df["property_id"] = properties_df["property_id"].astype(int).astype(str) recs_with_uprn = recommendations_df.merge( properties_df[["property_id", "uprn"]], on="property_id", how="left", - suffixes=("", "_prop") + suffixes=("", "_prop"), ) recs_with_uprn = ( - recs_with_uprn - .sort_values("sap_points", ascending=False) + recs_with_uprn.sort_values("sap_points", ascending=False) .groupby(["uprn", "measure_type"], as_index=False) .first() ) @@ -390,13 +432,24 @@ recs_with_uprn = ( recs_with_uprn["uprn"] = recs_with_uprn["uprn"].astype(str) installed_measures_df = all_conflicts.merge( - recs_with_uprn[["uprn", "measure_type", "sap_points", "heat_demand", "kwh_savings", "co2_equivalent_savings", - "energy_cost_savings"]], + recs_with_uprn[ + [ + "uprn", + "measure_type", + "sap_points", + "heat_demand", + "kwh_savings", + "co2_equivalent_savings", + "energy_cost_savings", + ] + ], how="left", - on=["uprn", "measure_type"] + on=["uprn", "measure_type"], ) -installed_measures_df = installed_measures_df[installed_measures_df["already_installed"] == True] +installed_measures_df = installed_measures_df[ + installed_measures_df["already_installed"] == True +] ## --- Sense checking ---- @@ -423,27 +476,26 @@ def add_mechanical_ventilation_for_fabric(installed_measures_df, recs_with_uprn) recs_with_uprn[ (recs_with_uprn["measure_type"] == "mechanical_ventilation") & (recs_with_uprn["uprn"].isin(fabric_uprns)) - ] + ] .sort_values("sap_points", ascending=False) .drop_duplicates(subset=["uprn"]) ) - mv_installed = mv_recs[[ - "uprn", - "measure_type", - "sap_points", - "heat_demand", - "kwh_savings", - "co2_equivalent_savings", - "energy_cost_savings", - ]].copy() + mv_installed = mv_recs[ + [ + "uprn", + "measure_type", + "sap_points", + "heat_demand", + "kwh_savings", + "co2_equivalent_savings", + "energy_cost_savings", + ] + ].copy() mv_installed["already_installed"] = True - return pd.concat( - [installed_measures_df, mv_installed], - ignore_index=True - ) + return pd.concat([installed_measures_df, mv_installed], ignore_index=True) # installed_measures_df = add_mechanical_ventilation_for_fabric( @@ -453,24 +505,39 @@ def add_mechanical_ventilation_for_fabric(installed_measures_df, recs_with_uprn) assert installed_measures_df[["uprn", "measure_type"]].duplicated().sum() == 0 -for col in ["sap_points", "heat_demand", "kwh_savings", "co2_equivalent_savings", "energy_cost_savings"]: - print(f"n missings for {col}: {pd.isnull(installed_measures_df[col]).sum()}", ) +for col in [ + "sap_points", + "heat_demand", + "kwh_savings", + "co2_equivalent_savings", + "energy_cost_savings", +]: + print( + f"n missings for {col}: {pd.isnull(installed_measures_df[col]).sum()}", + ) # Do some calcs on SAP impact sap_impact = installed_measures_df.groupby(["uprn"])["sap_points"].sum().reset_index() -properties_sap = properties_df[["uprn", "current_sap_points", "current_epc_rating"]].copy() +properties_sap = properties_df[ + ["uprn", "current_sap_points", "current_epc_rating"] +].copy() properties_sap["uprn"] = properties_sap["uprn"].astype(str) -old_sap_vs_new = properties_sap.merge( - sap_impact, how="inner", on="uprn" +old_sap_vs_new = properties_sap.merge(sap_impact, how="inner", on="uprn") +old_sap_vs_new["new_sap_points"] = ( + old_sap_vs_new["current_sap_points"] + old_sap_vs_new["sap_points"] +) +old_sap_vs_new["new_epc_rating"] = old_sap_vs_new["new_sap_points"].apply( + lambda x: sap_to_epc(x) ) -old_sap_vs_new["new_sap_points"] = old_sap_vs_new["current_sap_points"] + old_sap_vs_new["sap_points"] -old_sap_vs_new["new_epc_rating"] = old_sap_vs_new["new_sap_points"].apply(lambda x: sap_to_epc(x)) # How many properties go from below C to above -old_sap_vs_new[old_sap_vs_new["current_sap_points"] < 69]["new_epc_rating"].value_counts() +old_sap_vs_new[old_sap_vs_new["current_sap_points"] < 69][ + "new_epc_rating" +].value_counts() changed = old_sap_vs_new[ - (old_sap_vs_new["current_sap_points"] < 69) & (old_sap_vs_new["new_sap_points"] >= 69) - ] + (old_sap_vs_new["current_sap_points"] < 69) + & (old_sap_vs_new["new_sap_points"] >= 69) +] # What do I need to do: # TODO: - need to get a view of "all" measures for the property, not just recommended. We can do this but just looking @@ -499,22 +566,38 @@ def bulk_insert_installed_measures(installed_measures_df): now = datetime.utcnow() for _, row in installed_measures_df.iterrows(): - records.append({ - "uprn": int(row["uprn"]), - "measure_type": row["measure_type"], - "installed_at": now, - "sap_points": float(row["sap_points"]) if pd.notna(row["sap_points"]) else None, - "carbon_savings": float(row["co2_equivalent_savings"]) if pd.notna(row["co2_equivalent_savings"]) else None, - "kwh_savings": float(row["kwh_savings"]) if pd.notna(row["kwh_savings"]) else None, - "bill_savings": float(row["energy_cost_savings"]) if pd.notna(row["energy_cost_savings"]) else None, - "heat_demand_savings": float(row["heat_demand"]) if pd.notna(row["heat_demand"]) else None, - "source": SOURCE, - "is_active": True, - }) + records.append( + { + "uprn": int(row["uprn"]), + "measure_type": row["measure_type"], + "installed_at": now, + "sap_points": ( + float(row["sap_points"]) if pd.notna(row["sap_points"]) else None + ), + "carbon_savings": ( + float(row["co2_equivalent_savings"]) + if pd.notna(row["co2_equivalent_savings"]) + else None + ), + "kwh_savings": ( + float(row["kwh_savings"]) if pd.notna(row["kwh_savings"]) else None + ), + "bill_savings": ( + float(row["energy_cost_savings"]) + if pd.notna(row["energy_cost_savings"]) + else None + ), + "heat_demand_savings": ( + float(row["heat_demand"]) if pd.notna(row["heat_demand"]) else None + ), + "source": SOURCE, + "is_active": True, + } + ) try: for i in range(0, len(records), BATCH_SIZE): - batch = records[i:i + BATCH_SIZE] + batch = records[i : i + BATCH_SIZE] session.bulk_insert_mappings(InstalledMeasure, batch) session.commit() print(f"✅ Inserted {i + len(batch)} / {len(records)}") @@ -580,9 +663,7 @@ def get_installed_measure_adjustments_by_uprn_for_portfolio( def exclude_ventilation(column): return case( ( - InstalledMeasure.measure_type.notin_( - REBASING_EXCLUDED_MEASURES - ), + InstalledMeasure.measure_type.notin_(REBASING_EXCLUDED_MEASURES), column, ), else_=0.0, @@ -594,33 +675,24 @@ def get_installed_measure_adjustments_by_uprn_for_portfolio( rows = ( session.query( InstalledMeasure.uprn.label("uprn"), - func.coalesce( func.sum(exclude_ventilation(InstalledMeasure.sap_points)), 0.0, ).label("sap_points"), - func.coalesce( func.sum(exclude_ventilation(InstalledMeasure.carbon_savings)), 0.0, ).label("co2"), - func.coalesce( func.sum(exclude_ventilation(InstalledMeasure.kwh_savings)), 0.0, ).label("energy_kwh"), - func.coalesce( func.sum(exclude_ventilation(InstalledMeasure.bill_savings)), 0.0, ).label("energy_bill"), - func.coalesce( - func.sum( - exclude_ventilation( - InstalledMeasure.heat_demand_savings - ) - ), + func.sum(exclude_ventilation(InstalledMeasure.heat_demand_savings)), 0.0, ).label("heat_demand"), ) @@ -657,16 +729,14 @@ def get_installed_measure_types_by_uprn( ) # Convert enums → strings - return { - r[0].value if hasattr(r[0], "value") else r[0] - for r in rows - } + return {r[0].value if hasattr(r[0], "value") else r[0] for r in rows} # ------------------------------------------------------------ # PROPERTY REBASING (READ-ONLY) # ------------------------------------------------------------ + def compute_property_sap_updates( properties: List[PropertyModel], sap_adjustments: Dict[int, float], # keyed by uprn @@ -692,14 +762,16 @@ def compute_property_sap_updates( sap_delta = sap_adjustments[prop.uprn] new_sap = prop.original_sap_points + sap_delta - updates.append({ - "property_id": prop.id, - "uprn": prop.uprn, - "original_sap_points": prop.original_sap_points, - "installed_sap_delta": sap_delta, - "new_sap_points": new_sap, - "is_adjusted": True, - }) + updates.append( + { + "property_id": prop.id, + "uprn": prop.uprn, + "original_sap_points": prop.original_sap_points, + "installed_sap_delta": sap_delta, + "new_sap_points": new_sap, + "is_adjusted": True, + } + ) return updates @@ -708,6 +780,7 @@ def compute_property_sap_updates( # PLAN RECOMPUTATION HELPERS # ------------------------------------------------------------ + def get_effective_plan_recommendations( session, plan_id: int, excluded_measure_types: Set[str] ) -> List[Recommendation]: @@ -715,11 +788,10 @@ def get_effective_plan_recommendations( session.query(Recommendation) .join(PlanRecommendations) .filter(PlanRecommendations.plan_id == plan_id) - .filter(Recommendation.default.is_(True))) + .filter(Recommendation.default.is_(True)) + ) if excluded_measure_types: - q = q.filter( - ~Recommendation.measure_type.in_(excluded_measure_types) - ) + q = q.filter(~Recommendation.measure_type.in_(excluded_measure_types)) return q.all() @@ -791,7 +863,11 @@ def get_installed_measure_types_by_property_id_for_portfolio( installed_by_property[property_id].add(mt) # drag-along rules - if mt in {"cavity_wall_insulation", "internal_wall_insulation", "external_wall_insulation"}: + if mt in { + "cavity_wall_insulation", + "internal_wall_insulation", + "external_wall_insulation", + }: installed_by_property[property_id].add("mechanical_ventilation") return installed_by_property @@ -810,7 +886,9 @@ def get_all_default_plan_recommendations( PlanRecommendations.plan_id, Recommendation, ) - .join(Recommendation, Recommendation.id == PlanRecommendations.recommendation_id) + .join( + Recommendation, Recommendation.id == PlanRecommendations.recommendation_id + ) .filter(PlanRecommendations.plan_id.in_(plan_ids)) .filter(Recommendation.default.is_(True)) .all() @@ -835,9 +913,14 @@ def filter_remaining_recommendations( return recommendations return [ - r for r in recommendations + r + for r in recommendations if ( - (r.measure_type.value if hasattr(r.measure_type, "value") else r.measure_type) + ( + r.measure_type.value + if hasattr(r.measure_type, "value") + else r.measure_type + ) not in installed_types ) ] @@ -845,11 +928,11 @@ def filter_remaining_recommendations( def compute_plan_updates( session, - plans: List[Plan], + plans: List[PlanModel], properties_by_id: Dict[int, PropertyModel], epcs_by_property_id: Dict[int, PropertyDetailsEpcModel], installed_types_by_property_id, - all_ventilation_measures + all_ventilation_measures, ) -> List[dict]: """ Computes plan metrics after marking some recommendations as already installed. @@ -921,39 +1004,34 @@ def compute_plan_updates( # ): # continue - updates.append({ - "plan_id": plan.id, - "property_id": plan.property_id, - - # SAP / EPC - "post_sap_points": post_sap, - "post_epc_rating": sap_to_epc(post_sap), - - # Carbon - "co2_savings": remaining["co2_savings"], - "post_co2_emissions": post_co2, - - # Energy bills - "energy_bill_savings": remaining["energy_bill_savings"], - "post_energy_bill": post_bill, - - # Energy consumption - "energy_consumption_savings": remaining["energy_consumption_savings"], - "post_energy_consumption": post_kwh, - - # Valuation (safe) - "valuation_increase": remaining["valuation_increase"], - "valuation_post_retrofit": ( - prop.current_valuation - + remaining["valuation_increase"] - if prop.current_valuation is not None - else None - ), - - # Costs - "cost_of_works": remaining["cost_of_works"], - "contingency_cost": remaining["contingency_cost"], - }) + updates.append( + { + "plan_id": plan.id, + "property_id": plan.property_id, + # SAP / EPC + "post_sap_points": post_sap, + "post_epc_rating": sap_to_epc(post_sap), + # Carbon + "co2_savings": remaining["co2_savings"], + "post_co2_emissions": post_co2, + # Energy bills + "energy_bill_savings": remaining["energy_bill_savings"], + "post_energy_bill": post_bill, + # Energy consumption + "energy_consumption_savings": remaining["energy_consumption_savings"], + "post_energy_consumption": post_kwh, + # Valuation (safe) + "valuation_increase": remaining["valuation_increase"], + "valuation_post_retrofit": ( + prop.current_valuation + remaining["valuation_increase"] + if prop.current_valuation is not None + else None + ), + # Costs + "cost_of_works": remaining["cost_of_works"], + "contingency_cost": remaining["contingency_cost"], + } + ) property_to_installed_types[prop.id] = installed_types @@ -1065,7 +1143,6 @@ def compute_epc_rebasing_updates( updates[property_id] = { "property_id": property_id, - # Originals (only set once) "original_co2_emissions": ( epc.original_co2_emissions @@ -1087,7 +1164,6 @@ def compute_epc_rebasing_updates( if epc.original_current_energy_demand_heating_hotwater is not None else epc.current_energy_demand_heating_hotwater ), - # Adjustments (always re-applied from originals) "installed_measures_co2_adjustment": adj["co2"], "installed_measures_energy_demand_adjustment": adj["energy_kwh"], @@ -1106,8 +1182,8 @@ def persist_plan_updates(plan_updates: list[dict]): with db_session() as session: plans = ( - session.query(Plan) - .filter(Plan.id.in_([u["plan_id"] for u in plan_updates])) + session.query(PlanModel) + .filter(PlanModel.id.in_([u["plan_id"] for u in plan_updates])) .all() ) @@ -1168,20 +1244,17 @@ def persist_epc_rebasing_updates( # Store originals once epc.original_co2_emissions = u["original_co2_emissions"] - epc.original_primary_energy_consumption = ( - u["original_primary_energy_consumption"] - ) - epc.original_current_energy_demand = ( - u["original_current_energy_demand"] - ) - epc.original_current_energy_demand_heating_hotwater = ( - u["original_current_energy_demand_heating_hotwater"] - ) + epc.original_primary_energy_consumption = u[ + "original_primary_energy_consumption" + ] + epc.original_current_energy_demand = u["original_current_energy_demand"] + epc.original_current_energy_demand_heating_hotwater = u[ + "original_current_energy_demand_heating_hotwater" + ] # Apply rebased values epc.co2_emissions = ( - u["original_co2_emissions"] - - u["installed_measures_co2_adjustment"] + u["original_co2_emissions"] - u["installed_measures_co2_adjustment"] ) epc.primary_energy_consumption = ( @@ -1195,18 +1268,18 @@ def persist_epc_rebasing_updates( ) # Flags + audit fields - epc.installed_measures_co2_adjustment = ( - u["installed_measures_co2_adjustment"] - ) - epc.installed_measures_energy_demand_adjustment = ( - u["installed_measures_energy_demand_adjustment"] - ) - epc.installed_measures_total_energy_bill_adjustment = ( - u["installed_measures_total_energy_bill_adjustment"] - ) - epc.installed_measures_heat_demand_adjustment = ( - u["installed_measures_heat_demand_adjustment"] - ) + epc.installed_measures_co2_adjustment = u[ + "installed_measures_co2_adjustment" + ] + epc.installed_measures_energy_demand_adjustment = u[ + "installed_measures_energy_demand_adjustment" + ] + epc.installed_measures_total_energy_bill_adjustment = u[ + "installed_measures_total_energy_bill_adjustment" + ] + epc.installed_measures_heat_demand_adjustment = u[ + "installed_measures_heat_demand_adjustment" + ] epc.is_epc_adjusted_for_installed_measures = True print(f"✅ Updated {len(epcs)} EPC records") @@ -1254,9 +1327,7 @@ def initialise_original_property_and_epc_values(portfolio_id: int): updated = True if epc.original_primary_energy_consumption is None: - epc.original_primary_energy_consumption = ( - epc.primary_energy_consumption - ) + epc.original_primary_energy_consumption = epc.primary_energy_consumption updated = True if epc.original_current_energy_demand is None: @@ -1314,21 +1385,19 @@ def get_installed_ventilation_adjustments_by_uprn_for_portfolio( rows = ( session.query( InstalledMeasure.uprn.label("uprn"), - - func.coalesce(func.sum(InstalledMeasure.sap_points), 0.0) - .label("sap_points"), - - func.coalesce(func.sum(InstalledMeasure.carbon_savings), 0.0) - .label("co2"), - - func.coalesce(func.sum(InstalledMeasure.kwh_savings), 0.0) - .label("energy_kwh"), - - func.coalesce(func.sum(InstalledMeasure.bill_savings), 0.0) - .label("energy_bill"), - - func.coalesce(func.sum(InstalledMeasure.heat_demand_savings), 0.0) - .label("heat_demand"), + func.coalesce(func.sum(InstalledMeasure.sap_points), 0.0).label( + "sap_points" + ), + func.coalesce(func.sum(InstalledMeasure.carbon_savings), 0.0).label("co2"), + func.coalesce(func.sum(InstalledMeasure.kwh_savings), 0.0).label( + "energy_kwh" + ), + func.coalesce(func.sum(InstalledMeasure.bill_savings), 0.0).label( + "energy_bill" + ), + func.coalesce(func.sum(InstalledMeasure.heat_demand_savings), 0.0).label( + "heat_demand" + ), ) .filter(InstalledMeasure.is_active.is_(True)) .filter(InstalledMeasure.measure_type == "mechanical_ventilation") @@ -1370,8 +1439,9 @@ def mark_recommendations_as_installed( stmt = ( update(Recommendation) .where( - tuple_(Recommendation.property_id, Recommendation.measure_type) - .in_(property_measure_pairs) + tuple_(Recommendation.property_id, Recommendation.measure_type).in_( + property_measure_pairs + ) ) .values(already_installed=True) ) @@ -1400,13 +1470,17 @@ with db_read_session() as session: .all() ) - all_ventilation_measures = get_installed_ventilation_adjustments_by_uprn_for_portfolio(session, PORTFOLIO_ID) - installed_types_by_property_id = get_installed_measure_types_by_property_id_for_portfolio(session, PORTFOLIO_ID) + all_ventilation_measures = ( + get_installed_ventilation_adjustments_by_uprn_for_portfolio( + session, PORTFOLIO_ID + ) + ) + installed_types_by_property_id = ( + get_installed_measure_types_by_property_id_for_portfolio(session, PORTFOLIO_ID) + ) plans = ( - session.query(Plan) - .filter(Plan.portfolio_id == PORTFOLIO_ID) - .all() + session.query(PlanModel).filter(PlanModel.portfolio_id == PORTFOLIO_ID).all() ) epcs = { @@ -1419,23 +1493,17 @@ with db_read_session() as session: ) } - installed_adjustments = ( - get_installed_measure_adjustments_by_uprn_for_portfolio( - session, - PORTFOLIO_ID, - ) + installed_adjustments = get_installed_measure_adjustments_by_uprn_for_portfolio( + session, + PORTFOLIO_ID, ) property_updates = compute_property_sap_updates( - properties, - {uprn: v["sap_points"] for uprn, v in installed_adjustments.items()} + properties, {uprn: v["sap_points"] for uprn, v in installed_adjustments.items()} ) properties_by_id = {p.id: p for p in properties} - property_updates_by_id = { - u["property_id"]: u - for u in property_updates - } + property_updates_by_id = {u["property_id"]: u for u in property_updates} epc_updates = compute_epc_rebasing_updates( epcs, @@ -1453,9 +1521,7 @@ with db_read_session() as session: ) # Used to mark recommendations - pairs = build_installed_recommendation_pairs( - installed_types_by_property_id - ) + pairs = build_installed_recommendation_pairs(installed_types_by_property_id) from copy import deepcopy @@ -1466,36 +1532,33 @@ for u in plan_updates_comparison: if not before: continue - u.update({ - # SAP - "before_sap_points": before.post_sap_points, - "after_sap_points": u["post_sap_points"], - - # Carbon - "before_post_co2_emissions": before.post_co2_emissions, - "after_post_co2_emissions": u["post_co2_emissions"], - - # Costs - "before_cost_of_works": before.cost_of_works, - "after_cost_of_works": u["cost_of_works"], - - "before_contingency_cost": before.contingency_cost, - "after_contingency_cost": u["contingency_cost"], - }) + u.update( + { + # SAP + "before_sap_points": before.post_sap_points, + "after_sap_points": u["post_sap_points"], + # Carbon + "before_post_co2_emissions": before.post_co2_emissions, + "after_post_co2_emissions": u["post_co2_emissions"], + # Costs + "before_cost_of_works": before.cost_of_works, + "after_cost_of_works": u["cost_of_works"], + "before_contingency_cost": before.contingency_cost, + "after_contingency_cost": u["contingency_cost"], + } + ) plan_updates_df = pd.DataFrame(plan_updates_comparison) plan_updates_df["delta_sap_points"] = ( - plan_updates_df["after_sap_points"] - - plan_updates_df["before_sap_points"] + plan_updates_df["after_sap_points"] - plan_updates_df["before_sap_points"] ) plan_updates_df["delta_carbon"] = ( plan_updates_df["after_post_co2_emissions"] - plan_updates_df["before_post_co2_emissions"] ) plan_updates_df["delta_cost_of_works"] = ( - plan_updates_df["after_cost_of_works"] - - plan_updates_df["before_cost_of_works"] + plan_updates_df["after_cost_of_works"] - plan_updates_df["before_cost_of_works"] ) plan_updates_df["delta_contingency_cost"] = ( plan_updates_df["after_contingency_cost"] @@ -1503,12 +1566,14 @@ plan_updates_df["delta_contingency_cost"] = ( ) # High-level sanity checks -summary = plan_updates_df[[ - "delta_sap_points", - "delta_carbon", - "delta_cost_of_works", - "delta_contingency_cost", -]].sum() +summary = plan_updates_df[ + [ + "delta_sap_points", + "delta_carbon", + "delta_cost_of_works", + "delta_contingency_cost", + ] +].sum() print(summary) @@ -1619,17 +1684,15 @@ def apply_appliance_carbon_to_plans( .all() ) - epc_by_property_id = { - e.property_id: e for e in epcs - } + epc_by_property_id = {e.property_id: e for e in epcs} # -------------------------------------------- # Load plans with post carbon # -------------------------------------------- plans = ( - session.query(Plan) - .filter(Plan.portfolio_id == portfolio_id) - .filter(Plan.post_co2_emissions.isnot(None)) + session.query(PlanModel) + .filter(PlanModel.portfolio_id == portfolio_id) + .filter(PlanModel.post_co2_emissions.isnot(None)) .all() ) @@ -1682,13 +1745,7 @@ def apply_appliance_carbon_to_plans( # Get all uprns for entries in already installed, from the database with db_read_session() as session: - db_uprns = { - str(r[0]) - for r in ( - session.query(InstalledMeasure.uprn) - .all() - ) - } + db_uprns = {str(r[0]) for r in (session.query(InstalledMeasure.uprn).all())} # What is the overlap of these properties and the properties in portfolo 430 sal_data = pd.read_excel( diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py b/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py index 67ff2c85..e3008f65 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py @@ -3,31 +3,41 @@ from sqlalchemy.orm import Session from sqlalchemy import text, select from backend.app.db.connection import db_read_session from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel -from backend.app.db.models.recommendations import Plan +from backend.app.db.models.recommendations import PlanModel PORTFOLIO_ID = 435 with db_read_session() as session: # Get all properties from PropertyDetailsEpcModel, where estimated is True, for portfolio 419 - estimated_epcs = session.query(PropertyDetailsEpcModel).filter( - # PropertyDetailsEpcModel.estimated == True, - PropertyDetailsEpcModel.property_id.in_( - session.query(PropertyModel.id).filter(PropertyModel.portfolio_id == PORTFOLIO_ID) + estimated_epcs = ( + session.query(PropertyDetailsEpcModel) + .filter( + # PropertyDetailsEpcModel.estimated == True, + PropertyDetailsEpcModel.property_id.in_( + session.query(PropertyModel.id).filter( + PropertyModel.portfolio_id == PORTFOLIO_ID + ) + ) ) - ).all() + .all() + ) # Get the ids estimated_epc_ids = [epc.property_id for epc in estimated_epcs] # I want to get the UPRNS for these properties, from the property model with db_read_session() as session: - estimated_uprns = session.query(PropertyModel.uprn).filter( - PropertyModel.id.in_( - session.query(PropertyDetailsEpcModel.property_id).filter( - PropertyDetailsEpcModel.id.in_(estimated_epc_ids) + estimated_uprns = ( + session.query(PropertyModel.uprn) + .filter( + PropertyModel.id.in_( + session.query(PropertyDetailsEpcModel.property_id).filter( + PropertyDetailsEpcModel.id.in_(estimated_epc_ids) + ) ) ) - ).all() + .all() + ) estimated_uprns_list = [uprn for (uprn,) in estimated_uprns] @@ -35,16 +45,16 @@ with db_read_session() as session: sal_1 = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model " "data.xlsx", - sheet_name="Standardised Asset List" + sheet_name="Standardised Asset List", ) sal_2 = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260105 - additional " "UPRNS.xlsx", - sheet_name="Standardised Asset List" + sheet_name="Standardised Asset List", ) sal = pd.concat([sal_1, sal_2]) -sal = sal.drop_duplicates(subset=['epc_os_uprn']) +sal = sal.drop_duplicates(subset=["epc_os_uprn"]) estimated_to_refresh = sal[sal["epc_os_uprn"].isin(estimated_uprns_list)].copy() @@ -55,20 +65,24 @@ SCENARIOS = [ # 861, # EPC C, No EWI/IWI, No Solid Floor, ASHP 3.0 COP # 859, # EPC C - no solid floor, ashp 3.0 # 885, # EPC B - fabric first, no solid floor, ashp 3.0 - 908, 909, 910 + 908, + 909, + 910, ] # Get all plans, associated to these properties - the property IDs are in estimated_epc_ids with db_read_session() as session: result = session.execute( - select(Plan.id, Plan.property_id) - .where(Plan.property_id.in_(estimated_epc_ids)) + select(PlanModel.id, PlanModel.property_id).where( + PlanModel.property_id.in_(estimated_epc_ids) + ) ) plans = [ { "plan_id": row.id, "property_id": row.property_id, - } for row in result + } + for row in result ] df = pd.DataFrame(plans) @@ -96,12 +110,14 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # recommendation_materials # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM recommendation_materials rm USING plan_recommendations pr WHERE rm.recommendation_id = pr.recommendation_id AND pr.plan_id = ANY(:plan_ids) - """), + """ + ), params, ) @@ -109,10 +125,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # plan_recommendations # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM plan_recommendations WHERE plan_id = ANY(:plan_ids) - """), + """ + ), params, ) @@ -120,14 +138,16 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # recommendations (only those used by these plans) # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM recommendation r WHERE r.id IN ( SELECT DISTINCT recommendation_id FROM plan_recommendations WHERE plan_id = ANY(:plan_ids) ) - """), + """ + ), params, ) @@ -135,17 +155,21 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # plans LAST # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM plan WHERE id = ANY(:plan_ids) - """), + """ + ), params, ) # Store the SAL -filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260101 " - "sal.xlsx") +filename = ( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260101 " + "sal.xlsx" +) with pd.ExcelWriter(filename) as writer: sal.to_excel(writer, sheet_name="Standardised Asset List", index=False) @@ -164,34 +188,36 @@ with pd.ExcelWriter(filename) as writer: b1 = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 " "sal.xlsx", - sheet_name="batch 1" + sheet_name="batch 1", ) b2 = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 " "sal.xlsx", - sheet_name="batch 2" + sheet_name="batch 2", ) b3 = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 " "sal.xlsx", - sheet_name="batch 3" + sheet_name="batch 3", ) b4 = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 " "sal.xlsx", - sheet_name="batch 4" + sheet_name="batch 4", ) b5 = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 " "sal.xlsx", - sheet_name="batch 5" + sheet_name="batch 5", ) # Batch 6 should be the remaining total = pd.concat([b1, b2, b3, b4, b5]) remaining = sal[~sal["epc_os_uprn"].isin(total["epc_os_uprn"].values)] # Create new output -filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/" - "20260107 corrected batch 6 sal.xlsx") +filename = ( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/" + "20260107 corrected batch 6 sal.xlsx" +) with pd.ExcelWriter(filename) as writer: sal.to_excel(writer, sheet_name="Standardised Asset List", index=False) @@ -206,6 +232,4 @@ with pd.ExcelWriter(filename) as writer: b5.to_excel(writer, sheet_name="batch 5", index=False) remaining.to_excel(writer, sheet_name="batch 6", index=False) -all_together = pd.concat( - [b1, b2, b3, b4, b5, remaining] -) +all_together = pd.concat([b1, b2, b3, b4, b5, remaining]) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py b/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py index 68655e80..0ec34e7c 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py @@ -110,14 +110,17 @@ import pandas as pd # Solar PV savings - we need the amount of solar PV bill savings from sqlalchemy.orm import sessionmaker from backend.app.db.connection import db_engine -from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, RecommendationMaterials +from backend.app.db.models.recommendations import ( + Recommendation, + PlanModel, + PlanRecommendations, + RecommendationMaterials, +) from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel from collections import defaultdict PORTFOLIO_ID = 485 # Peabody -SCENARIOS = [ - 970 -] +SCENARIOS = [970] scenario_names = { 970: "EPC C - no solid floor, ashp 3.0", } @@ -130,22 +133,26 @@ def get_data(portfolio_id, scenario_ids): # -------------------- # Properties # -------------------- - properties_query = session.query( - PropertyModel, - PropertyDetailsEpcModel - ).join( - PropertyDetailsEpcModel, - PropertyModel.id == PropertyDetailsEpcModel.property_id - ).filter( - PropertyModel.portfolio_id == portfolio_id - ).all() + properties_query = ( + session.query(PropertyModel, PropertyDetailsEpcModel) + .join( + PropertyDetailsEpcModel, + PropertyModel.id == PropertyDetailsEpcModel.property_id, + ) + .filter(PropertyModel.portfolio_id == portfolio_id) + .all() + ) properties_data = [ { - **{col.name: getattr(p.PropertyModel, col.name) - for col in PropertyModel.__table__.columns}, - **{col.name: getattr(p.PropertyDetailsEpcModel, col.name) - for col in PropertyDetailsEpcModel.__table__.columns}, + **{ + col.name: getattr(p.PropertyModel, col.name) + for col in PropertyModel.__table__.columns + }, + **{ + col.name: getattr(p.PropertyDetailsEpcModel, col.name) + for col in PropertyDetailsEpcModel.__table__.columns + }, } for p in properties_query ] @@ -153,12 +160,12 @@ def get_data(portfolio_id, scenario_ids): # -------------------- # Plans # -------------------- - plans_query = session.query(Plan).filter( - Plan.scenario_id.in_(scenario_ids) - ).all() + plans_query = ( + session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all() + ) plans_data = [ - {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} + {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns} for plan in plans_query ] @@ -167,27 +174,29 @@ def get_data(portfolio_id, scenario_ids): # -------------------- # Recommendations (NO materials yet) # -------------------- - recommendations_query = session.query( - Recommendation, - Plan.scenario_id - ).join( - PlanRecommendations, - Recommendation.id == PlanRecommendations.recommendation_id - ).join( - Plan, - Plan.id == PlanRecommendations.plan_id - ).filter( - PlanRecommendations.plan_id.in_(plan_ids), - Recommendation.default.is_(True), - Recommendation.already_installed.is_(False) - ).all() + recommendations_query = ( + session.query(Recommendation, PlanModel.scenario_id) + .join( + PlanRecommendations, + Recommendation.id == PlanRecommendations.recommendation_id, + ) + .join(PlanModel, PlanModel.id == PlanRecommendations.plan_id) + .filter( + PlanRecommendations.plan_id.in_(plan_ids), + Recommendation.default.is_(True), + Recommendation.already_installed.is_(False), + ) + .all() + ) recommendations_data = [ { - **{col.name: getattr(r.Recommendation, col.name) - for col in Recommendation.__table__.columns}, + **{ + col.name: getattr(r.Recommendation, col.name) + for col in Recommendation.__table__.columns + }, "scenario_id": r.scenario_id, - "materials": [] # placeholder + "materials": [], # placeholder } for r in recommendations_query ] @@ -197,23 +206,25 @@ def get_data(portfolio_id, scenario_ids): # -------------------- # Recommendation materials (SEPARATE QUERY) # -------------------- - materials_query = session.query( - RecommendationMaterials - ).filter( - RecommendationMaterials.recommendation_id.in_(recommendation_ids) - ).all() + materials_query = ( + session.query(RecommendationMaterials) + .filter(RecommendationMaterials.recommendation_id.in_(recommendation_ids)) + .all() + ) # Group materials by recommendation_id materials_by_recommendation = defaultdict(list) for m in materials_query: - materials_by_recommendation[m.recommendation_id].append({ - "material_id": m.material_id, - "depth": m.depth, - "quantity": m.quantity, - "quantity_unit": m.quantity_unit, - "estimated_cost": m.estimated_cost, - }) + materials_by_recommendation[m.recommendation_id].append( + { + "material_id": m.material_id, + "depth": m.depth, + "quantity": m.quantity, + "quantity_unit": m.quantity_unit, + "estimated_cost": m.estimated_cost, + } + ) # Attach materials safely (no filtering side effects) for r in recommendations_data: @@ -236,12 +247,11 @@ with pd.ExcelWriter("hackney.xlsx", engine="openpyxl") as writer: recommendations_df.to_excel(writer, sheet_name="recommendations", index=False) properties_df.to_excel(writer, sheet_name="properties", index=False) - + # solar_pv_recommendations = recommendations_df[recommendations_df["measure_type"] == "solar_pv"] # average_savings = solar_pv_recommendations.groupby("scenario_id")["energy_cost_savings"].mean().reset_index() - # # Check tenures # initial_asset_data = pd.read_excel( # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/m_reduced_sample_revised.py b/etl/customers/peabody/Nov 2025 Consulting Project/m_reduced_sample_revised.py index a18dc315..b7010cf7 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/m_reduced_sample_revised.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/m_reduced_sample_revised.py @@ -4,7 +4,7 @@ import pandas as pd full_sal = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final " "SAL/Depracated/20260107 corrected batch 6 sal.xlsx", - sheet_name="Standardised Asset List" + sheet_name="Standardised Asset List", ) # ------Pull in the reduced sample ------ @@ -12,7 +12,7 @@ full_sal = pd.read_excel( reduced_sal = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260112 - " "ownership filtered sal.xlsx", - sheet_name="Standardised Asset List" + sheet_name="Standardised Asset List", ) # ------ Pull in the confirmed ownership column from Peabody ------ @@ -20,18 +20,20 @@ new_asset_data = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 " "- Peabody " "- Data Extracts for Domna v2.xlsx", - sheet_name="Properties" + sheet_name="Properties", ) correct_sample = new_asset_data[ ~new_asset_data["AH Tenure"].isin( - ["Commercial", - "Freeholder", - "HOMEBUY / EQUITY LOAN", - "Leaseholder", - "Outright Sale", - "SHARED EQUITY", - "Shared Ownership"] + [ + "Commercial", + "Freeholder", + "HOMEBUY / EQUITY LOAN", + "Leaseholder", + "Outright Sale", + "SHARED EQUITY", + "Shared Ownership", + ] ) ].copy() @@ -41,9 +43,7 @@ stuff_to_add = correct_sample[ ~correct_sample["UPRN"].isin(reduced_sal["landlord_property_id"].values) ]["UPRN"].values -sal_to_add = full_sal[ - full_sal["domna_property_id"].isin(stuff_to_add) -].copy() +sal_to_add = full_sal[full_sal["domna_property_id"].isin(stuff_to_add)].copy() # ------- Stuff to remove ------- stuff_to_remove = reduced_sal[ @@ -88,7 +88,7 @@ from backend.app.db.models.portfolio import PropertyModel from backend.app.db.connection import db_session, db_read_session from sqlalchemy import select, func from sqlalchemy.orm import Session -from backend.app.db.models.recommendations import Plan +from backend.app.db.models.recommendations import PlanModel uprns_to_be_deleted = to_delete["epc_os_uprn"].values.tolist() diff --git a/etl/customers/slide_utils.py b/etl/customers/slide_utils.py index 9170ab17..5e027a56 100644 --- a/etl/customers/slide_utils.py +++ b/etl/customers/slide_utils.py @@ -7,7 +7,7 @@ from sqlalchemy.sql import true from backend.app.db.utils import row2dict from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel from backend.app.db.models.recommendations import Recommendation -from backend.app.db.models.recommendations import Plan +from backend.app.db.models.recommendations import PlanModel from backend.app.utils import sap_to_epc EPC_COLOURS = { @@ -17,7 +17,7 @@ EPC_COLOURS = { "D": "#fdd401", "E": "#fdab67", "F": "#ee8023", - "G": "#e71437" + "G": "#e71437", } @@ -33,22 +33,27 @@ def get_properties_with_default_recommendations(session: Session, portfolio_id: its associated default recommendations if any. """ # Adjust the join to correctly filter recommendations while including all properties - query = session.query(PropertyModel, Recommendation).outerjoin(Recommendation, - (Recommendation.property_id == PropertyModel.id) & ( - Recommendation.default == true())) \ - .filter(PropertyModel.portfolio_id == portfolio_id) \ + query = ( + session.query(PropertyModel, Recommendation) + .outerjoin( + Recommendation, + (Recommendation.property_id == PropertyModel.id) + & (Recommendation.default == true()), + ) + .filter(PropertyModel.portfolio_id == portfolio_id) .all() + ) properties = {} for property, recommendation in query: # Ensure the property is added once with an empty list of recommendations initially if property.id not in properties: properties[property.id] = row2dict(property) - properties[property.id]['recommendations'] = [] + properties[property.id]["recommendations"] = [] # Append recommendations if they exist and meet the criteria (already filtered by the query) if recommendation and recommendation.default: - properties[property.id]['recommendations'].append(row2dict(recommendation)) + properties[property.id]["recommendations"].append(row2dict(recommendation)) return list(properties.values()) @@ -62,11 +67,16 @@ def get_property_details_by_portfolio_id(session: Session, portfolio_id: int): :return: A list of dictionaries, where each dictionary represents a property's details. Returns an empty list if no property details are found. """ - property_details = session.query(PropertyDetailsEpcModel).filter( - PropertyDetailsEpcModel.portfolio_id == portfolio_id).all() + property_details = ( + session.query(PropertyDetailsEpcModel) + .filter(PropertyDetailsEpcModel.portfolio_id == portfolio_id) + .all() + ) # Convert the SQLAlchemy objects to dictionaries - property_details_dict = [row2dict(pd) for pd in property_details] if property_details else [] + property_details_dict = ( + [row2dict(pd) for pd in property_details] if property_details else [] + ) return property_details_dict @@ -80,7 +90,9 @@ def get_plan_by_portfolio_id(session: Session, portfolio_id: int): :return: A list of dictionaries, where each dictionary represents a plan. Returns an empty list if no plans are found. """ - plans = session.query(Plan).filter(Plan.portfolio_id == portfolio_id).all() + plans = ( + session.query(PlanModel).filter(PlanModel.portfolio_id == portfolio_id).all() + ) # Convert the SQLAlchemy objects to dictionaries plans_dict = [row2dict(plan) for plan in plans] if plans else [] @@ -88,7 +100,14 @@ def get_plan_by_portfolio_id(session: Session, portfolio_id: int): return plans_dict -def plot_epc_distribution(df, customer_key, title='Your Units', background_color='white', bar_height=0.4, font_size=15): +def plot_epc_distribution( + df, + customer_key, + title="Your Units", + background_color="white", + bar_height=0.4, + font_size=15, +): """ Plots a horizontal bar chart of EPC rating distribution with adjustable bar thickness and text sizes. Allows setting the plot background color and dynamically adjusts text size and bar spacing. @@ -100,75 +119,113 @@ def plot_epc_distribution(df, customer_key, title='Your Units', background_color :param font_size: Base font size for text annotations (default 15) """ # Calculate dynamic figure size or adjust based on preferences - square_size = max(6, len(df) * 0.6) # Ensure minimum size and adjust based on number of entries + square_size = max( + 6, len(df) * 0.6 + ) # Ensure minimum size and adjust based on number of entries fig, ax = plt.subplots(figsize=(square_size, square_size)) fig.patch.set_facecolor(background_color) # Set figure background color ax.set_facecolor(background_color) # Set axes background color - df['percentage'] = df['percentage'].round(1) # Round the percentage values to 1 decimal place - df_sorted = df.sort_values('percentage', ascending=True) + df["percentage"] = df["percentage"].round( + 1 + ) # Round the percentage values to 1 decimal place + df_sorted = df.sort_values("percentage", ascending=True) # Plot bars with specified height for adjustable thickness - bars = ax.barh(df_sorted['current_epc_rating'], df_sorted['percentage'], - color=df_sorted['current_epc_rating'].map(EPC_COLOURS), edgecolor='none', height=bar_height) + bars = ax.barh( + df_sorted["current_epc_rating"], + df_sorted["percentage"], + color=df_sorted["current_epc_rating"].map(EPC_COLOURS), + edgecolor="none", + height=bar_height, + ) - epc_rating_font_size = font_size * 2 # EPC rating font size larger than base font size - count_percentage_font_size = font_size # Count (percentage) font size as base font size + epc_rating_font_size = ( + font_size * 2 + ) # EPC rating font size larger than base font size + count_percentage_font_size = ( + font_size # Count (percentage) font size as base font size + ) # Annotate bars with EPC ratings inside and count with percentage values outside for index, bar in enumerate(bars): width = bar.get_width() - epc_rating = df_sorted.iloc[index]['current_epc_rating'] - count = df_sorted.iloc[index]['count'] - percentage = df_sorted.iloc[index]['percentage'] + epc_rating = df_sorted.iloc[index]["current_epc_rating"] + count = df_sorted.iloc[index]["count"] + percentage = df_sorted.iloc[index]["percentage"] # EPC rating inside the bar with increased font size - ax.text(width - (width * 0.05), bar.get_y() + bar.get_height() / 2, - f"{epc_rating}", va='center', ha='right', color='white', fontsize=epc_rating_font_size) + ax.text( + width - (width * 0.05), + bar.get_y() + bar.get_height() / 2, + f"{epc_rating}", + va="center", + ha="right", + color="white", + fontsize=epc_rating_font_size, + ) # Count and percentage outside the bar, original font size - ax.text(width + 1, bar.get_y() + bar.get_height() / 2, - f"{count} ({percentage}%)", va='center', color='black', fontsize=count_percentage_font_size) + ax.text( + width + 1, + bar.get_y() + bar.get_height() / 2, + f"{count} ({percentage}%)", + va="center", + color="black", + fontsize=count_percentage_font_size, + ) - ax.set_title(title, fontsize=font_size * 1.2) # Adjust title font size proportionally - ax.tick_params(axis='x', which='both', bottom=False, top=False, - labelbottom=False) # Remove x-axis tick marks and values - ax.tick_params(axis='y', which='both', left=False, right=False, - labelleft=False) # Remove y-axis tick marks and labels - ax.spines['top'].set_visible(False) # Remove top spine - ax.spines['right'].set_visible(False) # Remove right spine - ax.spines['left'].set_visible(False) # Remove left spine - ax.spines['bottom'].set_visible(False) # Remove bottom spine + ax.set_title( + title, fontsize=font_size * 1.2 + ) # Adjust title font size proportionally + ax.tick_params( + axis="x", which="both", bottom=False, top=False, labelbottom=False + ) # Remove x-axis tick marks and values + ax.tick_params( + axis="y", which="both", left=False, right=False, labelleft=False + ) # Remove y-axis tick marks and labels + ax.spines["top"].set_visible(False) # Remove top spine + ax.spines["right"].set_visible(False) # Remove right spine + ax.spines["left"].set_visible(False) # Remove left spine + ax.spines["bottom"].set_visible(False) # Remove bottom spine plt.tight_layout() # Adjust layout plt.show() # Save the figure as an image - figure_path = f'etl/customers/{customer_key}/epc_distribution_plot.png' - fig.savefig(figure_path, bbox_inches='tight') + figure_path = f"etl/customers/{customer_key}/epc_distribution_plot.png" + fig.savefig(figure_path, bbox_inches="tight") plt.close(fig) # Close the figure to free memory return fig, figure_path -def save_plot_to_image(figure, path='plot.png'): +def save_plot_to_image(figure, path="plot.png"): """ Saves a matplotlib figure to an image file for insertion into PowerPoint. """ - figure.savefig(path, bbox_inches='tight') + figure.savefig(path, bbox_inches="tight") plt.close(figure) -def save_figure_as_image(figure, filename='temp_plot.png'): +def save_figure_as_image(figure, filename="temp_plot.png"): """ Saves a matplotlib figure to an image file. """ figure.savefig(filename, dpi=300) - plt.close(figure) # Close the figure to prevent it from displaying in notebooks or Python environments + plt.close( + figure + ) # Close the figure to prevent it from displaying in notebooks or Python environments -def add_commentary_with_bullets(slide, commentary, top_inches, left_inches=Inches(1), width_inches=Inches(8), - height_inches=Inches(2)): +def add_commentary_with_bullets( + slide, + commentary, + top_inches, + left_inches=Inches(1), + width_inches=Inches(8), + height_inches=Inches(2), +): """ Adds commentary with bullet points to a slide. @@ -179,7 +236,9 @@ def add_commentary_with_bullets(slide, commentary, top_inches, left_inches=Inche :param width_inches: The width of the commentary text box. :param height_inches: The height of the commentary text box. """ - txBox = slide.shapes.add_textbox(left_inches, top_inches, width_inches, height_inches) + txBox = slide.shapes.add_textbox( + left_inches, top_inches, width_inches, height_inches + ) tf = txBox.text_frame # Configure text frame @@ -192,7 +251,9 @@ def add_commentary_with_bullets(slide, commentary, top_inches, left_inches=Inche for i, section in enumerate(sections): if i > 0: - p = tf.add_paragraph() # Add a new paragraph for each section after the first + p = ( + tf.add_paragraph() + ) # Add a new paragraph for each section after the first else: p = tf.paragraphs[0] # Use the first paragraph for the first section p.text = section @@ -215,7 +276,9 @@ def add_slide_with_image(prs, title, img_path=None, commentary=None): # Determine the position of the commentary text box based on whether an image is included if img_path: # Add the image - slide.shapes.add_picture(img_path, Inches(1), Inches(1.5), Inches(8), Inches(4.5)) + slide.shapes.add_picture( + img_path, Inches(1), Inches(1.5), Inches(8), Inches(4.5) + ) # Position for commentary when image is present commentary_top = Inches(6) else: @@ -237,16 +300,18 @@ def create_powerpoint(data, save_location): prs = Presentation() for slide, slide_data in data.items(): - slide_figure_path = data[slide].get('image_path') - text = data[slide].get('text') - title = data[slide].get('title', "") + slide_figure_path = data[slide].get("image_path") + text = data[slide].get("text") + title = data[slide].get("title", "") add_slide_with_image(prs, title, slide_figure_path, text) # Save the presentation prs.save(save_location) -def create_recommendations_summary(recommendations_df, properties_df, property_details_df, sap_target): +def create_recommendations_summary( + recommendations_df, properties_df, property_details_df, sap_target +): # Aggregate the impact of the recommendations # We want: # Total number of sap points @@ -254,40 +319,52 @@ def create_recommendations_summary(recommendations_df, properties_df, property_d # total bill savings # total cost # Total Co2 impact - recommendations_summary = recommendations_df.groupby(["property_id"]).agg( - total_sap_points=("sap_points", "sum"), - total_valuation_impact=("property_valuation_increase", "sum"), - total_bill_savings=("energy_cost_savings", "sum"), - total_cost=("estimated_cost", "sum"), - total_carbon=("co2_equivalent_savings", "sum"), - adjusted_heat_demand=("adjusted_heat_demand", "sum") - ).reset_index() + recommendations_summary = ( + recommendations_df.groupby(["property_id"]) + .agg( + total_sap_points=("sap_points", "sum"), + total_valuation_impact=("property_valuation_increase", "sum"), + total_bill_savings=("energy_cost_savings", "sum"), + total_cost=("estimated_cost", "sum"), + total_carbon=("co2_equivalent_savings", "sum"), + adjusted_heat_demand=("adjusted_heat_demand", "sum"), + ) + .reset_index() + ) # Merge on current sap points, current CO2, current adjusted_heat_demand, current annual bill recommendations_summary = recommendations_summary.merge( - properties_df[["id", "uprn", "current_sap_points"]].rename(columns={"id": "property_id"}), on="property_id", - how="left" + properties_df[["id", "uprn", "current_sap_points"]].rename( + columns={"id": "property_id"} + ), + on="property_id", + how="left", ) recommendations_summary["expected_sap_points"] = ( - recommendations_summary["current_sap_points"] + recommendations_summary["total_sap_points"] + recommendations_summary["current_sap_points"] + + recommendations_summary["total_sap_points"] ) - recommendations_summary["expected_epc_rating"] = recommendations_summary["expected_sap_points"].apply( - lambda x: sap_to_epc(x) + recommendations_summary["expected_epc_rating"] = recommendations_summary[ + "expected_sap_points" + ].apply(lambda x: sap_to_epc(x)) + recommendations_summary["sap_difference"] = ( + sap_target - recommendations_summary["expected_sap_points"] ) - recommendations_summary["sap_difference"] = sap_target - recommendations_summary["expected_sap_points"] if property_details_df is not None: recommendations_summary = recommendations_summary.merge( - property_details_df[["uprn", "co2_emissions", "adjusted_energy_consumption", "energy_bill"]].rename( + property_details_df[ + ["uprn", "co2_emissions", "adjusted_energy_consumption", "energy_bill"] + ].rename( columns={ "id": "property_id", "co2_emissions": "current_co2", "adjusted_energy_consumption": "current_energy", - "energy_bill": "current_energy_bill" + "energy_bill": "current_energy_bill", } ), on="uprn", - how="left" + how="left", ) return recommendations_summary diff --git a/sfr/principal_pitch/2_export_data.py b/sfr/principal_pitch/2_export_data.py index a65509d5..d5a81423 100644 --- a/sfr/principal_pitch/2_export_data.py +++ b/sfr/principal_pitch/2_export_data.py @@ -13,7 +13,7 @@ from sqlalchemy.orm import sessionmaker from backend.app.db.connection import db_engine, db_read_session from backend.app.db.models.recommendations import ( Recommendation, - Plan, + PlanModel, PlanRecommendations, RecommendationMaterials, ) @@ -73,12 +73,12 @@ def get_data(portfolio_id, scenario_ids): # -------------------- latest_plans_subq = ( session.query( - Plan.scenario_id, - Plan.property_id, - func.max(Plan.created_at).label("latest_created_at"), + PlanModel.scenario_id, + PlanModel.property_id, + func.max(PlanModel.created_at).label("latest_created_at"), ) - .filter(Plan.scenario_id.in_(scenario_ids)) - .group_by(Plan.scenario_id, Plan.property_id) + .filter(PlanModel.scenario_id.in_(scenario_ids)) + .group_by(PlanModel.scenario_id, PlanModel.property_id) .subquery() ) @@ -87,12 +87,12 @@ def get_data(portfolio_id, scenario_ids): # ).all() plans_query = ( - session.query(Plan) + session.query(PlanModel) .join( latest_plans_subq, - (Plan.scenario_id == latest_plans_subq.c.scenario_id) - & (Plan.property_id == latest_plans_subq.c.property_id) - & (Plan.created_at == latest_plans_subq.c.latest_created_at), + (PlanModel.scenario_id == latest_plans_subq.c.scenario_id) + & (PlanModel.property_id == latest_plans_subq.c.property_id) + & (PlanModel.created_at == latest_plans_subq.c.latest_created_at), ) .all() ) @@ -108,7 +108,7 @@ def get_data(portfolio_id, scenario_ids): # ) plans_data = [ - {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} + {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns} for plan in plans_query ] @@ -118,12 +118,14 @@ def get_data(portfolio_id, scenario_ids): # Recommendations (NO materials yet) # -------------------- recommendations_query = ( - session.query(Recommendation, Plan.scenario_id, PlanRecommendations.plan_id) + session.query( + Recommendation, PlanModel.scenario_id, PlanRecommendations.plan_id + ) .join( PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id, ) - .join(Plan, Plan.id == PlanRecommendations.plan_id) + .join(PlanModel, PlanModel.id == PlanRecommendations.plan_id) .filter( PlanRecommendations.plan_id.in_(plan_ids), Recommendation.default.is_(True),