mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
implementing new sequenced scoring approach for recommendations
This commit is contained in:
parent
b39e9c9899
commit
82d19fc6fc
7 changed files with 94 additions and 45 deletions
2
.idea/Model.iml
generated
2
.idea/Model.iml
generated
|
|
@ -7,7 +7,7 @@
|
|||
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
|
||||
</content>
|
||||
<orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
|
||||
<orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
<component name="PyNamespacePackagesService">
|
||||
|
|
|
|||
2
.idea/misc.xml
generated
2
.idea/misc.xml
generated
|
|
@ -3,7 +3,7 @@
|
|||
<component name="Black">
|
||||
<option name="sdkName" value="Python 3.10 (backend)" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
|
||||
<component name="PythonCompatibilityInspectionAdvertiser">
|
||||
<option name="version" value="3" />
|
||||
</component>
|
||||
|
|
|
|||
|
|
@ -1,20 +1,15 @@
|
|||
from datetime import datetime
|
||||
import re
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
from itertools import groupby
|
||||
import pandas as pd
|
||||
|
||||
from etl.epc.DataProcessor import EPCDataProcessor
|
||||
from etl.epc.Dataset import TrainingDataset
|
||||
from etl.epc.settings import LATEST_FIELD, MANDATORY_FIXED_FEATURES, POTENTIAL_COLUMNS, EFFICIENCY_FEATURES, \
|
||||
BUILT_FORM_REMAP
|
||||
from etl.epc.settings import LATEST_FIELD, MANDATORY_FIXED_FEATURES
|
||||
from etl.epc_clean.epc_attributes.all_cleaners import all_cleaner_map
|
||||
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
|
||||
from utils.logger import setup_logger
|
||||
from utils.s3 import read_dataframe_from_s3_parquet
|
||||
from etl.epc.settings import DATA_ANOMALY_MATCHES
|
||||
from recommendations.rdsap_tables import england_wales_age_band_lookup, FLOOR_LEVEL_MAP
|
||||
from recommendations.rdsap_tables import FLOOR_LEVEL_MAP
|
||||
from recommendations.recommendation_utils import (
|
||||
estimate_perimeter, get_wall_type, estimate_external_wall_area, esimtate_pitched_roof_area, estimate_windows
|
||||
)
|
||||
|
|
@ -172,11 +167,28 @@ class Property:
|
|||
previous_phase_representatives = [
|
||||
r for r in property_representative_recommendations if r["phase"] in previous_phases
|
||||
]
|
||||
# For solid wall insulation, we will actually have 2 representative recommendations, since we consider
|
||||
# both internal and external wall insulation as possible measures. We will use the representative that
|
||||
# has the lowest efficiency.
|
||||
# Take the representative with the lowest efficiency, by phase
|
||||
|
||||
# To be safe, we sort by phase
|
||||
previous_phase_representatives = sorted(previous_phase_representatives, key=lambda x: x['phase'])
|
||||
|
||||
previous_phase_representatives = [
|
||||
min(group, key=lambda x: x['efficiency']) for _, group in groupby(
|
||||
previous_phase_representatives, key=lambda x: x['phase']
|
||||
)
|
||||
]
|
||||
|
||||
recommendation_record = self.base_difference_record.df.to_dict("records")[0].copy()
|
||||
|
||||
for rec in property_recommendations_by_phase:
|
||||
# We simulate the impact of the recommendation at this current phase, and all of the prior phases
|
||||
|
||||
if rec["type"] == "mechanical_ventilation":
|
||||
continue
|
||||
|
||||
scoring_dict = self.create_recommendation_scoring_data(
|
||||
property_id=self.id,
|
||||
recommendation_record=recommendation_record,
|
||||
|
|
@ -230,9 +242,10 @@ class Property:
|
|||
output["external_insulation"] = False
|
||||
output["internal_insulation"] = True
|
||||
|
||||
# TODO: perhaps detrimental
|
||||
# When making a recommendation for the wall, we will also update the ventilation
|
||||
if output["mechanical_ventilation_ending"] == 'natural':
|
||||
output["mechanical_ventilation_ending"] = 'mechanical, extract only'
|
||||
# if output["mechanical_ventilation_ending"] == 'natural':
|
||||
# output["mechanical_ventilation_ending"] = 'mechanical, extract only'
|
||||
|
||||
else:
|
||||
if output["walls_thermal_transmittance_ending"] is None:
|
||||
|
|
@ -328,7 +341,7 @@ class Property:
|
|||
output["photo_supply_ending"] = recommendation["photo_supply"]
|
||||
|
||||
if recommendation["type"] not in [
|
||||
"mechanical_ventilation", "sealing_open_fireplace", "low_energy_lighting",
|
||||
"sealing_open_fireplace", "low_energy_lighting",
|
||||
"internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation",
|
||||
"loft_insulation", "room_roof_insulation", "flat_roof_insulation",
|
||||
"solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation",
|
||||
|
|
|
|||
|
|
@ -130,8 +130,14 @@ class ModelApi:
|
|||
)
|
||||
)
|
||||
|
||||
predictions_df["predictions"] = predictions_df["predictions"].astype(float).round(1)
|
||||
predictions_df['predictions'] = predictions_df["predictions"].astype(float).round(1)
|
||||
predictions_df[['property_id', 'recommendation_id']] = predictions_df['id'].str.split('+', expand=True)
|
||||
# To grab the phase, we pull the integer after "phase=" in the recommendation_id. We can do this with a
|
||||
# string split on phase= and then grab the second element of the resulting list. We could also use a
|
||||
# regular expression to do this but we use the string split method here, for safety.
|
||||
predictions_df['phase'] = predictions_df['recommendation_id'].str.split('phase=').str[1].str[0]
|
||||
# Convert back to int
|
||||
predictions_df['phase'] = predictions_df['phase'].astype(int)
|
||||
|
||||
predictions[model_prefix] = predictions_df
|
||||
|
||||
|
|
|
|||
|
|
@ -4,6 +4,9 @@ from recommendations.Costs import Costs
|
|||
|
||||
|
||||
class LightingRecommendations:
|
||||
# We introduce a SAP limit to lighting, which is based on empirical findings. We do see cases where lighting is
|
||||
# worth more than 2 points, but this is unlikely in the context of other upgrades that can be made to the property
|
||||
SAP_LIMIT = 2
|
||||
|
||||
def __init__(self, property_instance: Property, materials: List):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
from backend.Property import Property
|
||||
from typing import List
|
||||
from itertools import groupby
|
||||
from recommendations.FloorRecommendations import FloorRecommendations
|
||||
from recommendations.WallRecommendations import WallRecommendations
|
||||
from recommendations.RoofRecommendations import RoofRecommendations
|
||||
|
|
@ -134,23 +135,32 @@ class Recommendations:
|
|||
has_u_value = recommendations_by_type[0].get("new_u_value") is not None
|
||||
has_sap_points = recommendations_by_type[0].get("sap_points") is not None
|
||||
|
||||
if has_u_value:
|
||||
# We sort by the cost per U-value improvement - the lower the better
|
||||
recommendations_by_type.sort(
|
||||
key=lambda x: x["total"] / x["starting_u_value"] - x["new_u_value"]
|
||||
)
|
||||
elif not has_u_value and has_sap_points:
|
||||
# Sort the options by the cost per SAP point improvement - the lower the better
|
||||
recommendations_by_type.sort(
|
||||
key=lambda x: x["total"] / x["sap_points"]
|
||||
)
|
||||
else:
|
||||
# Sort the options by cost - the lower the better
|
||||
recommendations_by_type.sort(
|
||||
key=lambda x: x["total"]
|
||||
)
|
||||
# When check if these recommendations have two different types, such as solid wall insulation
|
||||
# If we have multiple types, we group by type and then select the best recommendation for each type
|
||||
|
||||
property_representative_recommendations.append(recommendations_by_type[0])
|
||||
recommendations_by_type = sorted(recommendations_by_type, key=lambda x: x["type"])
|
||||
representative_recommendations = []
|
||||
for type, recommendations in groupby(recommendations_by_type, key=lambda x: x["type"]):
|
||||
recommendations = list(recommendations)
|
||||
# We also create an efficiency key, which is used to sort the recommendations
|
||||
if has_u_value:
|
||||
# We sort by the cost per U-value improvement - the lower the better
|
||||
for rec in recommendations:
|
||||
rec["efficiency"] = rec["total"] / rec["starting_u_value"] - rec["new_u_value"]
|
||||
elif not has_u_value and has_sap_points:
|
||||
# Sort the options by the cost per SAP point improvement - the lower the better
|
||||
for rec in recommendations:
|
||||
rec["efficiency"] = rec["total"] / rec["sap_points"]
|
||||
else:
|
||||
# Sort the options by cost - the lower the better
|
||||
for rec in recommendations:
|
||||
rec["efficiency"] = rec["total"]
|
||||
|
||||
recommendations.sort(
|
||||
key=lambda x: x["efficiency"]
|
||||
)
|
||||
representative_recommendations.append(recommendations[0])
|
||||
property_representative_recommendations.extend(representative_recommendations)
|
||||
|
||||
return property_representative_recommendations
|
||||
|
||||
|
|
@ -168,7 +178,7 @@ class Recommendations:
|
|||
|
||||
for recs in property_recommendations:
|
||||
for rec in recs:
|
||||
rec["recommendation_id"] = idx
|
||||
rec["recommendation_id"] = f"{str(idx)}_phase={str(rec['phase'])}"
|
||||
idx += 1
|
||||
|
||||
return property_recommendations
|
||||
|
|
@ -198,11 +208,16 @@ class Recommendations:
|
|||
|
||||
property_recommendations = recommendations[property_instance.id].copy()
|
||||
|
||||
# We calculate the impact by phase
|
||||
sap_phase_impact = property_sap_predictions.groupby("phase")["predictions"].median().reset_index()
|
||||
heat_phase_impact = property_heat_predictions.groupby("phase")["predictions"].median().reset_index()
|
||||
carbon_phase_impact = property_carbon_predictions.groupby("phase")["predictions"].median().reset_index()
|
||||
|
||||
for recommendations_by_type in property_recommendations:
|
||||
for rec in recommendations_by_type:
|
||||
|
||||
# We don't use the model for low energy lighting at the moment
|
||||
if rec["type"] == "low_energy_lighting":
|
||||
if rec["type"] == "mechanical_ventilation":
|
||||
# We don't have a percieved sap impact of mechanical ventilation
|
||||
continue
|
||||
|
||||
new_heat_demand = property_heat_predictions[property_heat_predictions["recommendation_id"] == str(
|
||||
|
|
@ -216,22 +231,37 @@ class Recommendations:
|
|||
new_sap = property_sap_predictions[property_sap_predictions["recommendation_id"] == str(
|
||||
rec["recommendation_id"]
|
||||
)]["predictions"].values[0]
|
||||
rec["sap_points"] = new_sap - float(property_instance.data["current-energy-efficiency"])
|
||||
|
||||
if rec["type"] == "mechanical_ventilation":
|
||||
if rec["phase"] == 0:
|
||||
rec["sap_points"] = new_sap - float(property_instance.data["current-energy-efficiency"])
|
||||
rec["co2_equivalent_savings"] = float(property_instance.data["co2-emissions-current"]) - new_carbon
|
||||
rec["heat_demand"] = property_instance.floor_area * (
|
||||
float(property_instance.data["energy-consumption-current"]) - new_heat_demand
|
||||
)
|
||||
else:
|
||||
|
||||
previous_phase = rec["phase"] - 1
|
||||
rec["sap_points"] = (
|
||||
new_sap - sap_phase_impact[sap_phase_impact["phase"] == previous_phase]["predictions"].values[0]
|
||||
)
|
||||
rec["co2_equivalent_savings"] = (
|
||||
carbon_phase_impact[carbon_phase_impact["phase"] == previous_phase]["predictions"].values[0] -
|
||||
new_carbon
|
||||
)
|
||||
rec["heat_demand"] = property_instance.floor_area * (
|
||||
heat_phase_impact[heat_phase_impact["phase"] == previous_phase]["predictions"].values[0] -
|
||||
new_heat_demand
|
||||
)
|
||||
|
||||
if rec["type"] == "low_energy_lighting":
|
||||
# For the moment, we cap the number of SAP points that can be achieved by ventilation at 2
|
||||
rec["sap_points"] = min(rec["sap_points"], VentilationRecommendations.SAP_LIMIT)
|
||||
rec["sap_points"] = min(rec["sap_points"], LightingRecommendations.SAP_LIMIT)
|
||||
|
||||
# Round to 2 decimal places
|
||||
rec["sap_points"] = round(rec["sap_points"], 2)
|
||||
rec["co2_equivalent_savings"] = float(property_instance.data["co2-emissions-current"]) - new_carbon
|
||||
|
||||
# Energy consumption current is per meter squared, so we need to multiply by the floor area to get
|
||||
# an absolute figure for the home
|
||||
rec["heat_demand"] = (
|
||||
(float(property_instance.data["energy-consumption-current"]) - new_heat_demand
|
||||
) * property_instance.floor_area)
|
||||
|
||||
rec["energy_cost_savings"] = AnnualBillSavings.estimate(rec["heat_demand"])
|
||||
|
||||
if (rec["sap_points"] is None) and (rec["co2_equivalent_savings"] is None) or (
|
||||
|
|
|
|||
|
|
@ -15,9 +15,6 @@ class VentilationRecommendations(Definitions):
|
|||
'mechanical, supply and extract'
|
||||
]
|
||||
|
||||
# We introduce a SAP limit, to prevent over-predicting the SAP impact of mechanical ventilation
|
||||
SAP_LIMIT = 2
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
property_instance: Property,
|
||||
|
|
@ -68,7 +65,7 @@ class VentilationRecommendations(Definitions):
|
|||
"description": f"Install {n_units} {part[0]['description']} units",
|
||||
"starting_u_value": None,
|
||||
"new_u_value": None,
|
||||
"sap_points": None,
|
||||
"sap_points": 0,
|
||||
"total": estimated_cost,
|
||||
# We use a very simple and rough estimate of 4 hours per unit
|
||||
"labour_hours": 4 * n_units,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue