from pprint import pprint import pandas as pd import numpy as np from backend.app.utils import sap_to_epc from sqlalchemy.orm import sessionmaker from backend.app.db.connection import db_engine from backend.app.db.models.recommendations import ( Recommendation, PlanModel, PlanRecommendations, ) from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel def get_data(portfolio_id, scenario_ids): session = sessionmaker(bind=db_engine)() session.begin() # Get properties and their details for a specific portfolio properties_query = ( session.query(PropertyModel, PropertyDetailsEpcModel) .join( PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id, ) .filter(PropertyModel.portfolio_id == portfolio_id) # Filter by portfolio ID .all() ) # Transform properties data to include all fields dynamically properties_data = [ { **{ col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns }, **{ col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in PropertyDetailsEpcModel.__table__.columns }, } for prop in properties_query ] # Get property IDs from fetched properties # Get plans linked to the fetched properties plans_query = ( session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all() ) # Transform plans data to include all fields dynamically plans_data = [ {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns} for plan in plans_query ] # Extract plan IDs for filtering recommendations through PlanRecommendations plan_ids = [plan["id"] for plan in plans_data] # Get recommendations through PlanRecommendations for those plans and that are default recommendations_query = ( session.query(Recommendation, PlanModel.scenario_id) .join( PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id, ) .join( PlanModel, PlanModel.id == PlanRecommendations.plan_id, # Join with Plan to access scenario_id ) .filter( PlanRecommendations.plan_id.in_(plan_ids), Recommendation.default == True, # Filtering for default recommendations ) .all() ) # Transform recommendations data to include all fields dynamically and include scenario_id recommendations_data = [ { **{ col.name: ( getattr(rec.Recommendation, col.name) if hasattr(rec, "Recommendation") else getattr(rec, col.name) ) for col in Recommendation.__table__.columns }, "Scenario ID": rec.scenario_id, } for rec in recommendations_query ] session.close() return properties_data, plans_data, recommendations_data def app(): """ Given a portfolio and a scenario, this function prepares an excel model to present the data """ # Set the inputs: portfolio_id = 139 scenario_ids = [237, 238] properties_data, plans_data, recommendations_data = get_data( portfolio_id=portfolio_id, scenario_ids=scenario_ids ) properties_df = pd.DataFrame(properties_data) plans_df = pd.DataFrame(plans_data) recommendations_df = pd.DataFrame(recommendations_data) # Merge on the orignal data mod_property_data = pd.read_csv( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/MOD property data.csv" ) property_asset_data = properties_df.merge( mod_property_data.drop(columns=["address", "postcode", "tenure"]), how="left", on="uprn", ) property_asset_data["is_pitched"] = property_asset_data["roof"].str.contains( "pitched", case=False ) property_asset_data["pre_1970"] = property_asset_data["BUILD_YEAR"] < 1970 property_asset_data["wall_type"] = ( property_asset_data["walls"].str.split(" ").str[0].str.strip() ) property_asset_data["is_insulated"] = property_asset_data["walls"].str.split( "," ).str[1].str.strip().isin( [ "filled cavity", "with external insulation", "filled cavity and external insulation", ] ) | property_asset_data[ "walls" ].str.split( "," ).str[ 2 ].str.strip().isin( ["insulated"] ) property_asset_data["is_insulated"] = np.where( property_asset_data["is_insulated"], "Insulated", "Uninsulated" ) property_asset_data["is_pitched"] = np.where( property_asset_data["is_pitched"], "Pitched roof", "Not Pitched Roof" ) property_asset_data["pre_1970"] = np.where( property_asset_data["pre_1970"], "Pre 1970", "Post 1970" ) archetype_variables = [ "property_type", "wall_type", "is_insulated", "is_pitched", "pre_1970", ] assigned_archetypes = ( property_asset_data.groupby(archetype_variables) .size() .reset_index() .rename(columns={0: "n_properties"}) .sort_values("n_properties", ascending=False) ) # Make the archetype ID a concatenation of the variables assigned_archetypes["archetype_id"] = assigned_archetypes[ archetype_variables ].apply(lambda x: "_".join(x.astype(str)), axis=1) # Most prominent archetypes prominent_archetypes = assigned_archetypes.head(6) other_archetypes = assigned_archetypes.tail(-6) # 2 or fewer properties in the other archetypes property_asset_data = property_asset_data.merge( assigned_archetypes[archetype_variables + ["archetype_id"]], how="left", on=archetype_variables, ) # Create age bands: # 1960-1969 # 1970-1979 # 1980-1989 # 1990-1999 # 2000+ property_asset_data["age_band"] = pd.cut( property_asset_data["BUILD_YEAR"], bins=[1959, 1969, 1979, 1989, 1999, 2022], labels=["1960-1969", "1970-1979", "1980-1989", "1990-1999", "2000+"], ) # Create floor area bands # 0-73 # 74-97 # 98-199 # 200+ property_asset_data["floor_area_band"] = pd.cut( property_asset_data["total_floor_area"], bins=[0, 73, 97, 199, 10000], labels=["0-73", "74-97", "98-199", "200+"], ) property_asset_data["archetype_group"] = property_asset_data["archetype_id"].copy() property_asset_data["archetype_group"] = np.where( property_asset_data["archetype_id"].isin( other_archetypes["archetype_id"].values ), "other", property_asset_data["archetype_group"], ) # For colour wall_types = ( property_asset_data[["wall_type"]] .value_counts() .to_frame() .reset_index() .rename(columns={"wall_type": "Wall Type"}) ) # Group into age bands ages = ( property_asset_data[["age_band"]] .value_counts() .to_frame() .reset_index() .sort_values("age_band", ascending=True) .rename(columns={"age_band": "Age Band"}) ) floor_area_bands = ( property_asset_data[["floor_area_band"]] .value_counts() .to_frame() .reset_index() .sort_values("floor_area_band", ascending=True) .rename(columns={"floor_area_band": "Floor Area Band"}) ) archetype_counts = ( property_asset_data[["archetype_group"]] .value_counts() .to_frame() .reset_index() .rename(columns={"archetype_group": "Archetype"}) ) property_types = ( ( property_asset_data["property_type"] + ": " + property_asset_data["built_form"] ) .value_counts() .to_frame() .reset_index() .rename(columns={"index": "Property Type", 0: "Count"}) ) # epc breakdown epc_breakdown = ( property_asset_data["current_epc_rating"] .apply(lambda x: x.value) .value_counts() .to_frame() .reset_index() ) # Figures for the deck # Carbon per property totals = property_asset_data[ [ "Total_household_members", "co2_emissions", "current_energy_demand", "current_energy_demand_heating_hotwater", "heating_cost_current", "hot_water_cost_current", "lighting_cost_current", "appliances_cost_current", "gas_standing_charge", "electricity_standing_charge", ] ].copy() totals["total_cost"] = ( totals["heating_cost_current"] + totals["hot_water_cost_current"] + totals["lighting_cost_current"] + totals["appliances_cost_current"] + totals["gas_standing_charge"] + totals["electricity_standing_charge"] ) print( totals[ [ "Total_household_members", "co2_emissions", "current_energy_demand", "total_cost", ] ].mean() ) # Store these to an excel # with pd.ExcelWriter( # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/MOD archetype breakdowns.xlsx" # ) as writer: # wall_types.to_excel(writer, sheet_name="Wall Types", index=False) # ages.to_excel(writer, sheet_name="Ages", index=False) # floor_area_bands.to_excel(writer, sheet_name="Floor Area Bands", index=False) # archetype_counts.to_excel(writer, sheet_name="Archetype Counts", index=False) # epc_breakdown.to_excel(writer, sheet_name="EPC Rating", index=False) contingency = 0.26 # We prepare the outputs, by scenario scenario_data = {} for scenario in scenario_ids: scenario_recommendations_df = recommendations_df[ recommendations_df["Scenario ID"] == scenario ].copy() scenario_recommendations_df["contingency"] = ( contingency * scenario_recommendations_df["estimated_cost"] ) scenario_recommendations_df["total_cost"] = ( scenario_recommendations_df["estimated_cost"] + scenario_recommendations_df["contingency"] ) recommended_measures_df = scenario_recommendations_df[ ["property_id", "measure_type", "estimated_cost", "default"] ] recommended_measures_df = recommended_measures_df[ recommended_measures_df["default"] ] recommended_measures_df = recommended_measures_df.drop(columns=["default"]) # Metrics by property ID aggregated_metrics = scenario_recommendations_df[ [ "property_id", "type", "default", "sap_points", "energy_cost_savings", "kwh_savings", "co2_equivalent_savings", "estimated_cost", "contingency", "total_cost", ] ] aggregated_metrics = aggregated_metrics[aggregated_metrics["default"]] aggregated_metrics = ( aggregated_metrics.groupby("property_id")[ [ "sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings", "estimated_cost", "total_cost", "contingency", ] ] .sum() .reset_index() ) recommendations_measures_pivot = recommended_measures_df.pivot( index="property_id", columns="measure_type", values="estimated_cost" ) recommendations_measures_pivot = recommendations_measures_pivot.reset_index() recommendations_measures_pivot = recommendations_measures_pivot.fillna(0) # We flag with boolean if the measure is recommended for c in recommendations_measures_pivot.columns: if c == "property_id": continue recommendations_measures_pivot["Recommendation: " + c] = ( recommendations_measures_pivot[c] > 0 ) # We now create a final output df = ( properties_df[ [ "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows", "current_epc_rating", "current_sap_points", "total_floor_area", "number_of_rooms", "co2_emissions", "current_energy_demand", "current_energy_demand_heating_hotwater", "heating_cost_current", "hot_water_cost_current", "lighting_cost_current", "appliances_cost_current", "gas_standing_charge", "electricity_standing_charge", ] ] .merge(recommendations_measures_pivot, how="left", on="property_id") .merge(aggregated_metrics, how="left", on="property_id") ) df["bills_total_cost"] = ( df["heating_cost_current"] + df["hot_water_cost_current"] + df["lighting_cost_current"] + df["appliances_cost_current"] + df["gas_standing_charge"] + df["electricity_standing_charge"] ) df = df.drop(columns=["property_id"]) for c in [ "sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings", ]: df[c] = df[c].fillna(0) df = df.rename( columns={ "uprn": "UPRN", "address": "Address", "postcode": "Postcode", "walls": "Walls", "roof": "Roof", "heating": "Heating", "windows": "Windows", "current_epc_rating": "Current EPC Rating", "current_sap_points": "Current SAP Points", "total_floor_area": "Total Floor Area", "number_of_rooms": "Number of Habitable Rooms", "floor_height": "Floor Height", } ) # Calculate post SAP df["Predicted Post Works SAP"] = df["Current SAP Points"] + df["sap_points"] df["Predicted Post Works SAP"] = df["Predicted Post Works SAP"].round() df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply( lambda x: sap_to_epc(x) ) # Calculate the relative savings on carbon, kwh, and bills df["relative_carbon_savings"] = ( df["co2_equivalent_savings"] / df["co2_emissions"] ) df["relative_kwh_savings"] = df["kwh_savings"] / df["current_energy_demand"] df["relative_bill_savings"] = df["energy_cost_savings"] / df["bills_total_cost"] # Add on the archetype df = df.merge( property_asset_data[["uprn", "archetype_group"]], how="left", left_on="UPRN", right_on="uprn", ) # For properties that don't make it to EPC B, check why. E.g. for a property that has an oil boiler, it # the bills go up recommending HHRSH, so it doesn't make it to EPC B # For mid-terrace units, use the ordnance survey API to check if there is space for a heat pump? # DO it manually??? # Doesn't make it # misses = df[df["Predicted Post Works EPC"] == "C"] # # 5 of them are flats and so are difficult to get to EPC B without renewables. Possibly not worth it from an # # ROI perspective # # misses[["UPRN", "Address", "Postcode", "property_type"]] # UPRN Address Postcode property_type # 2 100120988937 13 Sidbury Circular Road SP9 7HX Flat No further action # 3 100120988998 74 Sidbury Circular Road SP9 7JA Flat No further action # 4 100120989416 47 Zouch Avenue SP9 7LR Flat No further action # 6 100060585002 42, Muscott Close, Shipton Bellinger SP9 7TX House Can probably take a heat pump # 37 10000801072 34 Luffenham Place, Chicksands SG17 5XH House Already surveyed as having # an ASHP - should be looked at # 121 100120988259 8, Karachi Close SP9 7LW Flat # 122 100121101217 599, Pepper Place BA12 0DW Flat # 140 100021455241 33 Blenheim Crescent, Ruislip HA4 7HA House - Solar isnt recommended # due to bug # 149 100120915656 10 Bower Green, Shrivenham SN6 8TU House - Solar isn't recommended # due to bug scenario_data[scenario] = df printing_scenario_id = scenario_ids[0] # EPC breakdown print( scenario_data[printing_scenario_id]["Predicted Post Works EPC"].value_counts() ) # Cost # Total cost print(scenario_data[printing_scenario_id]["total_cost"].sum()) # Base cost print(scenario_data[printing_scenario_id]["estimated_cost"].sum()) # Contingency print(scenario_data[printing_scenario_id]["contingency"].sum()) # Costs averaged per unit print(scenario_data[printing_scenario_id]["total_cost"].mean()) print(scenario_data[printing_scenario_id]["estimated_cost"].mean()) print(scenario_data[printing_scenario_id]["contingency"].mean()) # Average relative savings print(scenario_data[printing_scenario_id]["relative_carbon_savings"].mean()) print(scenario_data[printing_scenario_id]["relative_kwh_savings"].mean()) print(scenario_data[printing_scenario_id]["relative_bill_savings"].mean()) measure_details = {} for scenario in scenario_ids: measure_details[scenario] = {} recommendation_cols = [ c for c in scenario_data[scenario].columns if "Recommendation:" in c ] measure_details[scenario]["count"] = ( scenario_data[scenario][recommendation_cols].sum().to_dict() ) # Get average cost per measure measure_columns = [ c.split("Recommendation: ")[1] for c in scenario_data[scenario].columns if "Recommendation:" in c ] # Take the mean, drop zero columns measure_costs = {} for m in measure_columns: measure_costs[m] = float( scenario_data[scenario][scenario_data[scenario][m] > 0][m].mean() ) measure_details[scenario]["cost_per_measure"] = measure_costs pprint(measure_details[scenario_ids[0]]["count"]) pprint(measure_details[scenario_ids[1]]["count"]) # Cost per measures pprint(measure_details[scenario_ids[0]]["cost_per_measure"]) pprint(measure_details[scenario_ids[1]]["cost_per_measure"]) # Do not get to EPC B: # 5 are flats # 1) 34 Luffenham Place, Chicksands SG17 5XH, has been surveyed as having a low performing heat pump - # should be looked at but several surrounding properties have been surveyed in a similar fashion # 2) 42, Muscott Close, Shipton Bellinger SP9 7TX, has an oil boiler and the bills go up recommending HHRSH. # we could non-intrusively recommend a heat pump. # 3) 33 Blenheim Crescent, Ruislip, HA4 7HA, 100021455241 Solar potential modelling returned nothing - # manual review indicates that there are multiple trees surrouding the south facing side of the property # 4) 10 Bower Green, Shrivenham, SN6 8TU - Solar isn't recommended without further survey due to the local # area being surrounded by trees # Scenario adjustments: # Exclude: boiler_upgrade # Make ASHP COP 3.5 # Metrics we need by scenario: # Cost # contingency # Carbon # kwh # bill savings scenario_metrics = {} for scenario in scenario_ids: df = scenario_data[scenario].copy() avg_savings = ( df[ [ "sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings", "estimated_cost", "total_cost", "contingency", ] ] .mean() .to_dict() ) avg_savings["cost_per_sap_point"] = ( avg_savings["total_cost"] / avg_savings["sap_points"] ) avg_savings["cost_per_carbon"] = ( avg_savings["total_cost"] / avg_savings["co2_equivalent_savings"] ) scenario_metrics[scenario] = avg_savings pprint(scenario_metrics[scenario_ids[0]]) pprint(scenario_metrics[scenario_ids[1]]) scenario_data[scenario_ids[0]]["loft_insulation"][ scenario_data[scenario_ids[0]]["loft_insulation"] > 0 ].mean() scenario_data[scenario_ids[0]]["cavity_wall_insulation"][ scenario_data[scenario_ids[0]]["cavity_wall_insulation"] > 0 ].mean() # Testing checking floor risk import requests def get_flood_risk(lat, lon, radius_km=1): url = "https://environment.data.gov.uk/flood-monitoring/id/floods" params = {"lat": lat, "long": lon, "dist": radius_km} # search radius in km response = requests.get(url, params=params) response.raise_for_status() data = response.json() flood_warnings = data.get("items", []) if not flood_warnings: print("No active flood warnings near this location.") else: print(f"{len(flood_warnings)} warning(s) found near the location:") for warning in flood_warnings: print(f"- Area: {warning.get('description')}") print( f" Severity: {warning.get('severity')} (Level {warning.get('severityLevel')})" ) print(f" Message changed at: {warning.get('timeMessageChanged')}") print() return flood_warnings from shapely.geometry import shape, Point def get_flood_areas_near_point(lat, lon, radius_km=2): url = "https://environment.data.gov.uk/flood-monitoring/id/floodAreas" params = {"lat": lat, "long": lon, "dist": radius_km} response = requests.get(url, params=params) response.raise_for_status() return response.json().get("items", []) def point_in_flood_area(lat, lon): flood_areas = get_flood_areas_near_point(lat, lon, radius_km=1) point = Point(lon, lat) # GeoJSON uses (lon, lat) format for area in flood_areas: polygon_url = area.get("polygon") if not polygon_url: continue polygon_response = requests.get(polygon_url) polygon_response.raise_for_status() polygon_geojson = polygon_response.json() features = polygon_geojson.get("features", []) if not features: continue flood_polygon = shape(features[0]["geometry"]) try: is_inside = flood_polygon.contains(point) except: is_inside = False if is_inside: print( f"📍 Point is inside flood area: {area['label']} ({area['notation']})" ) return area from tqdm import tqdm floor_warnings_data = [] for _, property in tqdm( property_asset_data.iterrows(), total=len(property_asset_data) ): # warnings = floor_warnings_data.extend( # get_flood_risk(lat=property["LATITUDE"], lon=property["LONGITUDE"], radius_km=1) # ) resp = point_in_flood_area(lat=property["LATITUDE"], lon=property["LONGITUDE"]) if resp: floor_warnings_data.append( { "uprn": property["uprn"], "address": property["address"], "postcode": property["postcode"], "area": resp, } ) continue import plotly.graph_objects as go labels = [ "House_Cavity_Insulated_Pitched roof_Pre 1970", "House_Cavity_Insulated_Pitched roof_Post 1970", "House_Cavity_Uninsulated_Pitched roof_Pre 1970", "House_Cavity_Uninsulated_Pitched roof_Post 1970", "other", "House_System_Uninsulated_Pitched roof_Pre 1970", "House_Solid_Uninsulated_Not Pitched Roof_Pre 1970", ] values = [62, 36, 21, 16, 16, 4, 2] hovertext = [ "Loft insulation, draft proofing", "Top-up loft insulation", "Cavity wall insulation, loft insulation", "Cavity wall insulation, ventilation", "Bespoke retrofit measures", "External wall insulation, roof insulation", "Flat roof insulation, internal wall insulation", ] fig = go.Figure( go.Treemap( labels=labels, parents=[""] * len(labels), # No root values=values, hovertext=hovertext, hoverinfo="text", textinfo="none", marker=dict( line=dict(color="white", width=4), colors=values, colorscale="Blues" ), ) ) fig.update_layout( margin=dict(t=10, l=10, r=10, b=10), plot_bgcolor="white", paper_bgcolor="white" ) fig.show() # Get the recommended measures by scenario id recommendation_cols = [ c for c in scenario_data[scenario_ids[1]].columns if "Recommendation:" in c ] measure_counts_by_scenario = ( scenario_data[scenario_ids[1]] .groupby("archetype_group")[recommendation_cols] .sum() .reset_index() ) measure_counts_by_scenario.to_csv( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/measure_counts_by_scenario.csv" ) # Estimate average valuation improvment by scenarios valuation_data = pd.read_csv( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/property_valuation.csv" ) from backend.ml_models.Valuation import PropertyValuation uplift = [] for _, x in valuation_data.iterrows(): uprn = x["uprn"] to_append = {"uprn": uprn} for _id in scenario_ids: scenario = scenario_data[_id][scenario_data[_id]["uprn"] == uprn].squeeze() val = PropertyValuation.estimate_valuation_improvement( current_value=x["valuation"], current_epc=scenario["Current EPC Rating"].value, target_epc=scenario["Predicted Post Works EPC"], total_cost=None, ) to_append[_id] = val["average_increase"] uplift.append(to_append) uplift = pd.DataFrame(uplift) print(uplift[scenario_ids[0]].mean()) # £8,161 print(uplift[scenario_ids[1]].mean()) # £16,938