implementing new sequenced scoring approach for recommendations

This commit is contained in:
Khalim Conn-Kowlessar 2024-02-15 19:32:33 +00:00
parent b39e9c9899
commit 82d19fc6fc
7 changed files with 94 additions and 45 deletions

2
.idea/Model.iml generated
View file

@ -7,7 +7,7 @@
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
</content>
<orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyNamespacePackagesService">

2
.idea/misc.xml generated
View file

@ -3,7 +3,7 @@
<component name="Black">
<option name="sdkName" value="Python 3.10 (backend)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
<component name="PythonCompatibilityInspectionAdvertiser">
<option name="version" value="3" />
</component>

View file

@ -1,20 +1,15 @@
from datetime import datetime
import re
import os
import numpy as np
from itertools import groupby
import pandas as pd
from etl.epc.DataProcessor import EPCDataProcessor
from etl.epc.Dataset import TrainingDataset
from etl.epc.settings import LATEST_FIELD, MANDATORY_FIXED_FEATURES, POTENTIAL_COLUMNS, EFFICIENCY_FEATURES, \
BUILT_FORM_REMAP
from etl.epc.settings import LATEST_FIELD, MANDATORY_FIXED_FEATURES
from etl.epc_clean.epc_attributes.all_cleaners import all_cleaner_map
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
from utils.logger import setup_logger
from utils.s3 import read_dataframe_from_s3_parquet
from etl.epc.settings import DATA_ANOMALY_MATCHES
from recommendations.rdsap_tables import england_wales_age_band_lookup, FLOOR_LEVEL_MAP
from recommendations.rdsap_tables import FLOOR_LEVEL_MAP
from recommendations.recommendation_utils import (
estimate_perimeter, get_wall_type, estimate_external_wall_area, esimtate_pitched_roof_area, estimate_windows
)
@ -172,11 +167,28 @@ class Property:
previous_phase_representatives = [
r for r in property_representative_recommendations if r["phase"] in previous_phases
]
# For solid wall insulation, we will actually have 2 representative recommendations, since we consider
# both internal and external wall insulation as possible measures. We will use the representative that
# has the lowest efficiency.
# Take the representative with the lowest efficiency, by phase
# To be safe, we sort by phase
previous_phase_representatives = sorted(previous_phase_representatives, key=lambda x: x['phase'])
previous_phase_representatives = [
min(group, key=lambda x: x['efficiency']) for _, group in groupby(
previous_phase_representatives, key=lambda x: x['phase']
)
]
recommendation_record = self.base_difference_record.df.to_dict("records")[0].copy()
for rec in property_recommendations_by_phase:
# We simulate the impact of the recommendation at this current phase, and all of the prior phases
if rec["type"] == "mechanical_ventilation":
continue
scoring_dict = self.create_recommendation_scoring_data(
property_id=self.id,
recommendation_record=recommendation_record,
@ -230,9 +242,10 @@ class Property:
output["external_insulation"] = False
output["internal_insulation"] = True
# TODO: perhaps detrimental
# When making a recommendation for the wall, we will also update the ventilation
if output["mechanical_ventilation_ending"] == 'natural':
output["mechanical_ventilation_ending"] = 'mechanical, extract only'
# if output["mechanical_ventilation_ending"] == 'natural':
# output["mechanical_ventilation_ending"] = 'mechanical, extract only'
else:
if output["walls_thermal_transmittance_ending"] is None:
@ -328,7 +341,7 @@ class Property:
output["photo_supply_ending"] = recommendation["photo_supply"]
if recommendation["type"] not in [
"mechanical_ventilation", "sealing_open_fireplace", "low_energy_lighting",
"sealing_open_fireplace", "low_energy_lighting",
"internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation",
"loft_insulation", "room_roof_insulation", "flat_roof_insulation",
"solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation",

View file

@ -130,8 +130,14 @@ class ModelApi:
)
)
predictions_df["predictions"] = predictions_df["predictions"].astype(float).round(1)
predictions_df['predictions'] = predictions_df["predictions"].astype(float).round(1)
predictions_df[['property_id', 'recommendation_id']] = predictions_df['id'].str.split('+', expand=True)
# To grab the phase, we pull the integer after "phase=" in the recommendation_id. We can do this with a
# string split on phase= and then grab the second element of the resulting list. We could also use a
# regular expression to do this but we use the string split method here, for safety.
predictions_df['phase'] = predictions_df['recommendation_id'].str.split('phase=').str[1].str[0]
# Convert back to int
predictions_df['phase'] = predictions_df['phase'].astype(int)
predictions[model_prefix] = predictions_df

View file

@ -4,6 +4,9 @@ from recommendations.Costs import Costs
class LightingRecommendations:
# We introduce a SAP limit to lighting, which is based on empirical findings. We do see cases where lighting is
# worth more than 2 points, but this is unlikely in the context of other upgrades that can be made to the property
SAP_LIMIT = 2
def __init__(self, property_instance: Property, materials: List):
"""

View file

@ -1,5 +1,6 @@
from backend.Property import Property
from typing import List
from itertools import groupby
from recommendations.FloorRecommendations import FloorRecommendations
from recommendations.WallRecommendations import WallRecommendations
from recommendations.RoofRecommendations import RoofRecommendations
@ -134,23 +135,32 @@ class Recommendations:
has_u_value = recommendations_by_type[0].get("new_u_value") is not None
has_sap_points = recommendations_by_type[0].get("sap_points") is not None
if has_u_value:
# We sort by the cost per U-value improvement - the lower the better
recommendations_by_type.sort(
key=lambda x: x["total"] / x["starting_u_value"] - x["new_u_value"]
)
elif not has_u_value and has_sap_points:
# Sort the options by the cost per SAP point improvement - the lower the better
recommendations_by_type.sort(
key=lambda x: x["total"] / x["sap_points"]
)
else:
# Sort the options by cost - the lower the better
recommendations_by_type.sort(
key=lambda x: x["total"]
)
# When check if these recommendations have two different types, such as solid wall insulation
# If we have multiple types, we group by type and then select the best recommendation for each type
property_representative_recommendations.append(recommendations_by_type[0])
recommendations_by_type = sorted(recommendations_by_type, key=lambda x: x["type"])
representative_recommendations = []
for type, recommendations in groupby(recommendations_by_type, key=lambda x: x["type"]):
recommendations = list(recommendations)
# We also create an efficiency key, which is used to sort the recommendations
if has_u_value:
# We sort by the cost per U-value improvement - the lower the better
for rec in recommendations:
rec["efficiency"] = rec["total"] / rec["starting_u_value"] - rec["new_u_value"]
elif not has_u_value and has_sap_points:
# Sort the options by the cost per SAP point improvement - the lower the better
for rec in recommendations:
rec["efficiency"] = rec["total"] / rec["sap_points"]
else:
# Sort the options by cost - the lower the better
for rec in recommendations:
rec["efficiency"] = rec["total"]
recommendations.sort(
key=lambda x: x["efficiency"]
)
representative_recommendations.append(recommendations[0])
property_representative_recommendations.extend(representative_recommendations)
return property_representative_recommendations
@ -168,7 +178,7 @@ class Recommendations:
for recs in property_recommendations:
for rec in recs:
rec["recommendation_id"] = idx
rec["recommendation_id"] = f"{str(idx)}_phase={str(rec['phase'])}"
idx += 1
return property_recommendations
@ -198,11 +208,16 @@ class Recommendations:
property_recommendations = recommendations[property_instance.id].copy()
# We calculate the impact by phase
sap_phase_impact = property_sap_predictions.groupby("phase")["predictions"].median().reset_index()
heat_phase_impact = property_heat_predictions.groupby("phase")["predictions"].median().reset_index()
carbon_phase_impact = property_carbon_predictions.groupby("phase")["predictions"].median().reset_index()
for recommendations_by_type in property_recommendations:
for rec in recommendations_by_type:
# We don't use the model for low energy lighting at the moment
if rec["type"] == "low_energy_lighting":
if rec["type"] == "mechanical_ventilation":
# We don't have a percieved sap impact of mechanical ventilation
continue
new_heat_demand = property_heat_predictions[property_heat_predictions["recommendation_id"] == str(
@ -216,22 +231,37 @@ class Recommendations:
new_sap = property_sap_predictions[property_sap_predictions["recommendation_id"] == str(
rec["recommendation_id"]
)]["predictions"].values[0]
rec["sap_points"] = new_sap - float(property_instance.data["current-energy-efficiency"])
if rec["type"] == "mechanical_ventilation":
if rec["phase"] == 0:
rec["sap_points"] = new_sap - float(property_instance.data["current-energy-efficiency"])
rec["co2_equivalent_savings"] = float(property_instance.data["co2-emissions-current"]) - new_carbon
rec["heat_demand"] = property_instance.floor_area * (
float(property_instance.data["energy-consumption-current"]) - new_heat_demand
)
else:
previous_phase = rec["phase"] - 1
rec["sap_points"] = (
new_sap - sap_phase_impact[sap_phase_impact["phase"] == previous_phase]["predictions"].values[0]
)
rec["co2_equivalent_savings"] = (
carbon_phase_impact[carbon_phase_impact["phase"] == previous_phase]["predictions"].values[0] -
new_carbon
)
rec["heat_demand"] = property_instance.floor_area * (
heat_phase_impact[heat_phase_impact["phase"] == previous_phase]["predictions"].values[0] -
new_heat_demand
)
if rec["type"] == "low_energy_lighting":
# For the moment, we cap the number of SAP points that can be achieved by ventilation at 2
rec["sap_points"] = min(rec["sap_points"], VentilationRecommendations.SAP_LIMIT)
rec["sap_points"] = min(rec["sap_points"], LightingRecommendations.SAP_LIMIT)
# Round to 2 decimal places
rec["sap_points"] = round(rec["sap_points"], 2)
rec["co2_equivalent_savings"] = float(property_instance.data["co2-emissions-current"]) - new_carbon
# Energy consumption current is per meter squared, so we need to multiply by the floor area to get
# an absolute figure for the home
rec["heat_demand"] = (
(float(property_instance.data["energy-consumption-current"]) - new_heat_demand
) * property_instance.floor_area)
rec["energy_cost_savings"] = AnnualBillSavings.estimate(rec["heat_demand"])
if (rec["sap_points"] is None) and (rec["co2_equivalent_savings"] is None) or (

View file

@ -15,9 +15,6 @@ class VentilationRecommendations(Definitions):
'mechanical, supply and extract'
]
# We introduce a SAP limit, to prevent over-predicting the SAP impact of mechanical ventilation
SAP_LIMIT = 2
def __init__(
self,
property_instance: Property,
@ -68,7 +65,7 @@ class VentilationRecommendations(Definitions):
"description": f"Install {n_units} {part[0]['description']} units",
"starting_u_value": None,
"new_u_value": None,
"sap_points": None,
"sap_points": 0,
"total": estimated_cost,
# We use a very simple and rough estimate of 4 hours per unit
"labour_hours": 4 * n_units,