mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
added recommendation scoring setup process
This commit is contained in:
parent
f551fda186
commit
efdef5eb46
9 changed files with 211 additions and 25 deletions
|
|
@ -569,3 +569,89 @@ class Property(Definitions):
|
|||
:return:
|
||||
"""
|
||||
self.wall_type = get_wall_type(**self.walls)
|
||||
|
||||
@staticmethod
|
||||
def _extract_component(component_data, component_rename_cols, component_drop_cols, rename_prefix=None):
|
||||
for k in component_rename_cols:
|
||||
component_data[f"{rename_prefix}_{k}"] = component_data[k]
|
||||
|
||||
component_data = {
|
||||
k: v for k, v in component_data.items() if k not in component_drop_cols + component_rename_cols
|
||||
}
|
||||
|
||||
return component_data
|
||||
|
||||
def get_model_data(self):
|
||||
"""
|
||||
This method extracts cleaned data from the property object, which is used in our machine learning models
|
||||
|
||||
This will use many of the cleaned properties, extracted from the epc data, or methods in DataProcessor.
|
||||
|
||||
For future iterations of this, we probably want to implement a singular method in DataProcessor, which can
|
||||
be used in the etl code and in here
|
||||
|
||||
:return: dictionary of model data to be scored in the model
|
||||
"""
|
||||
|
||||
drop_cols = ["original_description", "clean_description"]
|
||||
insulation_drop_cols = ["thermal_transmittance_unit", "is_assumed", "is_valid"]
|
||||
insulation_rename_cols = ["thermal_transmittance", "insulation_thickness"]
|
||||
|
||||
walls = self._extract_component(self.walls, insulation_rename_cols, insulation_drop_cols + drop_cols, "walls")
|
||||
roof = self._extract_component(self.roof, insulation_rename_cols, insulation_drop_cols + drop_cols, "roof")
|
||||
floor = self._extract_component(self.floor, insulation_rename_cols, insulation_drop_cols + drop_cols, "floor")
|
||||
|
||||
windows = self._extract_component(self.windows, [], drop_cols + ["no_data"])
|
||||
fuel = self._extract_component(self.main_fuel, ["tariff_type"], drop_cols + ["tariff_type"], "main-fuel")
|
||||
main_heating = self._extract_component(self.main_heating, [], drop_cols + ["has_assumed"])
|
||||
main_heating_controls = self._extract_component(self.main_heating_controls, [], drop_cols)
|
||||
hotwater = self._extract_component(self.hotwater, ["tariff_type"], drop_cols + ['assumed'], "hotwater")
|
||||
|
||||
# We'll need to clean second heating
|
||||
second_heating = self.data["secondheat-description"]
|
||||
|
||||
epc_raw_columns = [
|
||||
'TRANSACTION_TYPE',
|
||||
'ENERGY_TARIFF',
|
||||
'PROPERTY_TYPE',
|
||||
'UPRN',
|
||||
'NUMBER_OPEN_FIREPLACES',
|
||||
'FIXED_LIGHTING_OUTLETS_COUNT',
|
||||
'MULTI_GLAZE_PROPORTION',
|
||||
'MECHANICAL_VENTILATION',
|
||||
'PHOTO_SUPPLY',
|
||||
'LOW_ENERGY_LIGHTING',
|
||||
'SOLAR_WATER_HEATING_FLAG',
|
||||
'BUILT_FORM',
|
||||
'GLAZED_TYPE',
|
||||
'CONSTITUENCY',
|
||||
'NUMBER_HEATED_ROOMS',
|
||||
'EXTENSION_COUNT',
|
||||
]
|
||||
epc_raw_data = {
|
||||
k: self.data[k.lower().replace("_", "-")] for k in epc_raw_columns
|
||||
}
|
||||
|
||||
property_data = {
|
||||
**walls,
|
||||
**roof,
|
||||
**floor,
|
||||
**fuel,
|
||||
**main_heating,
|
||||
**main_heating_controls,
|
||||
**hotwater,
|
||||
**windows,
|
||||
"SECONDHEAT_DESCRIPTION": second_heating,
|
||||
"DAYS_TO": DataProcessor.calculate_days_to(self.data["lodgement-date"]),
|
||||
"SAP": self.data["current-energy-efficiency"],
|
||||
"CARBON": self.data["co2-emissions-current"],
|
||||
"HEAT_DEMAND": self.data["energy-consumption-current"],
|
||||
"estimated_perimeter": self.perimeter,
|
||||
"CONSTRUCTION_AGE_BAND": self.age_band,
|
||||
"FLOOR_HEIGHT": self.floor_height,
|
||||
"NUMBER_HABITABLE_ROOMS": self.number_of_rooms,
|
||||
"TOTAL_FLOOR_AREA": self.floor_area,
|
||||
**epc_raw_data
|
||||
}
|
||||
|
||||
return property_data
|
||||
|
|
|
|||
|
|
@ -7,6 +7,11 @@ def get_materials(session):
|
|||
"""
|
||||
This function will retrieve all materials from the database.
|
||||
:return: A list of Material objects if successful, an empty list otherwise.
|
||||
|
||||
|
||||
TODO: It might not be the best choice to store the materials data in a database table since thi
|
||||
table probably won't be very large and won't be updated that often. It might be better to
|
||||
store this data in s3 load it into memory when the app starts up. We will test this
|
||||
"""
|
||||
|
||||
materials = session.query(Material).filter(Material.is_active).all()
|
||||
|
|
|
|||
|
|
@ -106,10 +106,6 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
# The materials data could be cached or local so we don't need to make
|
||||
# consistent requests to the backend for
|
||||
# the same data
|
||||
# TODO: It might not be the best choice to store the materials data in a database table since thi
|
||||
# table probably won't be very large and won't be updated that often. It might be better to
|
||||
# store this data in s3 load it into memory when the app starts up. We will test this
|
||||
|
||||
logger.info("Reading in materials and cleaned datasets")
|
||||
materials = get_materials(session)
|
||||
materials_by_type = filter_materials(materials)
|
||||
|
|
@ -158,20 +154,15 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
recommendations[p.id] = property_recommendations
|
||||
|
||||
# Finally, we'll prepare data for predicting the impact on SAP
|
||||
# TODO: We should use the cleaned data from get_components in the data rather than the raw
|
||||
# values. We should create a method in Property which takes the EPC data and inserts the cleaned
|
||||
# data
|
||||
|
||||
data_processor = DataProcessor(None, newdata=True)
|
||||
data_processor.insert_data(pd.DataFrame([p.data.copy()]))
|
||||
data_processor.pre_process()
|
||||
data_processor.insert_data(pd.DataFrame([p.get_model_data()]))
|
||||
|
||||
starting_epc_data = data_processor.get_component_features(suffix="_STARTING")
|
||||
ending_epc_data = data_processor.get_component_features(suffix="_ENDING")
|
||||
fixed_data = data_processor.get_fixed_features()
|
||||
|
||||
# We update the ending record with the recommended updates and we set lodgement date to today
|
||||
ending_epc_data["LODGEMENT_DATE_ENDING"] = created_at
|
||||
ending_epc_data["LODGEMENT_DATE_ENDING"] = data_processor.calculate_days_to(created_at)
|
||||
|
||||
for recommendations_by_type in property_recommendations:
|
||||
for rec in recommendations_by_type:
|
||||
|
|
|
|||
|
|
@ -2,7 +2,10 @@ import pandas as pd
|
|||
from backend.Property import Property
|
||||
from collections import defaultdict
|
||||
from utils.s3 import read_from_s3
|
||||
|
||||
from recommendations.config import UPGRADES_MAP
|
||||
from recommendations.recommendation_utils import get_wall_u_value, get_floor_u_value
|
||||
|
||||
from backend.app.db.utils import row2dict
|
||||
from backend.app.config import get_settings
|
||||
import msgpack
|
||||
|
|
@ -85,8 +88,27 @@ def create_recommendation_scoring_data(
|
|||
|
||||
# We update the description to indicate it's insulated
|
||||
if recommendation["type"] == "wall_insulation":
|
||||
scoring_dict["WALLS_DESCRIPTION_ENDING"] = UPGRADES_MAP[property.walls["clean_description"]]
|
||||
# The upgrade made here is to the u-value of the walls and the description of the
|
||||
# insulation thickness
|
||||
# We may not have the u-value initially, so we calculate it
|
||||
if not scoring_dict["walls_thermal_transmittance"]:
|
||||
scoring_dict["walls_thermal_transmittance"] = get_wall_u_value(
|
||||
clean_description=property.walls["clean_description"],
|
||||
age_band=property.age_band,
|
||||
is_granite_or_whinstone=property.walls["is_granite_or_whinstone"],
|
||||
is_sandstone_or_limestone=property.walls["is_sandstone_or_limestone"]
|
||||
)
|
||||
|
||||
scoring_dict["walls_thermal_transmittance_ENDING"] = get_wall_u_value(
|
||||
clean_description=UPGRADES_MAP[property.walls["clean_description"]],
|
||||
age_band=property.age_band,
|
||||
is_granite_or_whinstone=property.walls["is_granite_or_whinstone"],
|
||||
is_sandstone_or_limestone=property.walls["is_sandstone_or_limestone"]
|
||||
)
|
||||
scoring_dict["walls_insulation_thickness_ENDING"] = "above average"
|
||||
|
||||
elif recommendation["type"] == "floor_insulation":
|
||||
blah
|
||||
scoring_dict["FLOOR_DESCRIPTION_ENDING"] = UPGRADES_MAP[property.floor["clean_description"]]
|
||||
else:
|
||||
raise NotImplementedError("Implement me")
|
||||
|
|
|
|||
0
backend/tests/test_sap_model_prep.py
Normal file
0
backend/tests/test_sap_model_prep.py
Normal file
|
|
@ -9,12 +9,14 @@ from etl.epc.settings import (
|
|||
AVERAGE_FIXED_FEATURES,
|
||||
BUILT_FORM_REMAP,
|
||||
COLUMNS_TO_MERGE_ON,
|
||||
COMPONENT_FEATURES,
|
||||
FIXED_FEATURES,
|
||||
COLUMNTYPES,
|
||||
RDSAP_RESPONSE,
|
||||
MAX_SAP_SCORE,
|
||||
fill_na_map,
|
||||
STARTING_SUFFIX_COMPONENT_COLS,
|
||||
NO_SUFFIX_COMPONENT_COLS,
|
||||
ENDING_SUFFIX_COMPONENT_COLS
|
||||
)
|
||||
from recommendations.rdsap_tables import FLOOR_LEVEL_MAP
|
||||
|
||||
|
|
@ -500,9 +502,15 @@ class DataProcessor:
|
|||
"""
|
||||
|
||||
if suffix not in ["_STARTING", "_ENDING"]:
|
||||
raise Exception("Suffix should be one of _STARTING or _ENFING")
|
||||
raise Exception("Suffix should be one of _STARTING or _ENDING")
|
||||
|
||||
return self.data[COMPONENT_FEATURES + ["LODGEMENT_DATE"]].copy().add_suffix(suffix)
|
||||
if suffix == "_STARTING":
|
||||
starting_cols = self.data[STARTING_SUFFIX_COMPONENT_COLS].copy().add_suffix(suffix)
|
||||
fixed_cols = self.data[NO_SUFFIX_COMPONENT_COLS].copy()
|
||||
|
||||
return pd.concat([starting_cols, fixed_cols], axis=1)
|
||||
|
||||
return self.data[ENDING_SUFFIX_COMPONENT_COLS].copy().add_suffix(suffix)
|
||||
|
||||
def get_fixed_features(self) -> pd.DataFrame:
|
||||
"""
|
||||
|
|
@ -531,3 +539,15 @@ class DataProcessor:
|
|||
df[column] = df[column].astype(bool)
|
||||
|
||||
return df
|
||||
|
||||
@staticmethod
|
||||
def calculate_days_to(lodgement_date):
|
||||
|
||||
if isinstance(lodgement_date, str):
|
||||
return (
|
||||
pd.to_datetime(lodgement_date).tz_localize(None) - pd.to_datetime(EARLIEST_EPC_DATE)
|
||||
).days
|
||||
|
||||
return (
|
||||
pd.to_datetime(lodgement_date) - pd.to_datetime(EARLIEST_EPC_DATE)
|
||||
).dt.days
|
||||
|
|
|
|||
|
|
@ -514,12 +514,11 @@ def app():
|
|||
|
||||
# Add some temporal features - we look at the days from the standard starting point in time
|
||||
# for the starting and ending date so all records are from a fixed point
|
||||
data_by_urpn_df["DAYS_TO_STARTING"] = (
|
||||
pd.to_datetime(data_by_urpn_df["LODGEMENT_DATE_STARTING"]) - pd.to_datetime(EARLIEST_EPC_DATE)
|
||||
).dt.days
|
||||
data_by_urpn_df["DAYS_TO_ENDING"] = (
|
||||
pd.to_datetime(data_by_urpn_df["LODGEMENT_DATE_ENDING"]) - pd.to_datetime(EARLIEST_EPC_DATE)
|
||||
).dt.days
|
||||
data_by_urpn_df["DAYS_TO_STARTING"] = DataProcessor.calculate_days_to(
|
||||
data_by_urpn_df["LODGEMENT_DATE_STARTING"])
|
||||
|
||||
data_by_urpn_df["DAYS_TO_ENDING"] = DataProcessor.calculate_days_to(
|
||||
data_by_urpn_df["LODGEMENT_DATE_ENDING"])
|
||||
|
||||
data_by_urpn_df = data_by_urpn_df.drop(columns=["LODGEMENT_DATE_STARTING", "LODGEMENT_DATE_ENDING"])
|
||||
|
||||
|
|
|
|||
|
|
@ -189,3 +189,67 @@ fill_na_map = {
|
|||
"EXTENSION_COUNT": 0,
|
||||
"NUMBER_OPEN_FIREPLACES": 0
|
||||
}
|
||||
|
||||
################################################################################################
|
||||
# These are the features we need for scoring
|
||||
# We'll likely change how we do this in the future
|
||||
################################################################################################
|
||||
|
||||
STARTING_SUFFIX_COMPONENT_COLS = [
|
||||
"SAP", "HEAT_DEMAND", "CARBON", "TRANSACTION_TYPE", "MECHANICAL_VENTILATION",
|
||||
"SECONDHEAT_DESCRIPTION", "ENERGY_TARIFF", "SOLAR_WATER_HEATING_FLAG", "PHOTO_SUPPLY",
|
||||
"GLAZED_TYPE", "MULTI_GLAZE_PROPORTION", "LOW_ENERGY_LIGHTING", "NUMBER_OPEN_FIREPLACES",
|
||||
"EXTENSION_COUNT", "TOTAL_FLOOR_AREA", "FLOOR_HEIGHT", "DAYS_TO", "estimated_perimeter"
|
||||
]
|
||||
NO_SUFFIX_COMPONENT_COLS = ['walls_thermal_transmittance', 'is_cavity_wall',
|
||||
'is_filled_cavity', 'is_solid_brick', 'is_system_built', 'is_timber_frame',
|
||||
'is_granite_or_whinstone', 'is_as_built', 'is_cob', 'is_sandstone_or_limestone',
|
||||
'is_park_home', 'walls_insulation_thickness', 'external_insulation', 'internal_insulation',
|
||||
'floor_thermal_transmittance', 'is_to_unheated_space', 'is_to_external_air', 'is_suspended',
|
||||
'is_solid', 'another_property_below', 'floor_insulation_thickness',
|
||||
'roof_thermal_transmittance', 'is_pitched', 'is_roof_room', 'is_loft', 'is_flat',
|
||||
'is_thatched', 'is_at_rafters', 'has_dwelling_above', 'roof_insulation_thickness',
|
||||
'heater_type', 'system_type', 'thermostat_characteristics', 'heating_scope',
|
||||
'energy_recovery',
|
||||
'hotwater_tariff_type', 'extra_features', 'chp_systems', 'distribution_system',
|
||||
'no_system_present', 'appliance', 'has_radiators', 'has_fan_coil_units',
|
||||
'has_pipes_in_screed_above_insulation', 'has_pipes_in_insulated_timber_floor',
|
||||
'has_pipes_in_concrete_slab', 'has_boiler', 'has_air_source_heat_pump', 'has_room_heaters',
|
||||
'has_electric_storage_heaters', 'has_warm_air', 'has_electric_underfloor_heating',
|
||||
'has_electric_ceiling_heating', 'has_community_scheme', 'has_ground_source_heat_pump',
|
||||
'has_no_system_present', 'has_portable_electric_heaters', 'has_water_source_heat_pump',
|
||||
'has_electric_heat_pump', 'has_micro-cogeneration', 'has_solar_assisted_heat_pump',
|
||||
'has_exhaust_source_heat_pump', 'has_community_heat_pump', 'has_electric', 'has_mains_gas',
|
||||
'has_wood_logs', 'has_coal', 'has_oil', 'has_wood_pellets', 'has_anthracite',
|
||||
'has_dual_fuel_mineral_and_wood', 'has_smokeless_fuel', 'has_lpg', 'has_b30k',
|
||||
'has_electricaire', 'has_assumed_for_most_rooms', 'has_underfloor_heating',
|
||||
'thermostatic_control', 'charging_system', 'switch_system', 'no_control', 'dhw_control',
|
||||
'community_heating', 'multiple_room_thermostats', 'auxiliary_systems', 'trvs',
|
||||
'rate_control',
|
||||
'glazing_type', 'fuel_type', 'main-fuel_tariff_type', 'is_community',
|
||||
'no_individual_heating_or_community_network', 'complex_fuel_type',
|
||||
]
|
||||
|
||||
ENDING_SUFFIX_COMPONENT_COLS = [
|
||||
'SAP', 'HEAT_DEMAND', 'CARBON', 'TRANSACTION_TYPE', 'MECHANICAL_VENTILATION', 'SECONDHEAT_DESCRIPTION',
|
||||
'ENERGY_TARIFF', 'SOLAR_WATER_HEATING_FLAG', 'PHOTO_SUPPLY', 'GLAZED_TYPE', 'MULTI_GLAZE_PROPORTION',
|
||||
'LOW_ENERGY_LIGHTING', 'NUMBER_OPEN_FIREPLACES', 'EXTENSION_COUNT', 'TOTAL_FLOOR_AREA', 'FLOOR_HEIGHT',
|
||||
'DAYS_TO', 'walls_thermal_transmittance', 'is_park_home', 'walls_insulation_thickness',
|
||||
'external_insulation', 'internal_insulation', 'floor_thermal_transmittance', 'floor_insulation_thickness',
|
||||
'roof_thermal_transmittance', 'roof_insulation_thickness', 'heater_type', 'system_type',
|
||||
'thermostat_characteristics', 'heating_scope', 'energy_recovery', 'hotwater_tariff_type', 'extra_features',
|
||||
'chp_systems', 'distribution_system', 'no_system_present', 'appliance', 'has_radiators',
|
||||
'has_fan_coil_units', 'has_pipes_in_screed_above_insulation', 'has_pipes_in_insulated_timber_floor',
|
||||
'has_pipes_in_concrete_slab', 'has_boiler', 'has_air_source_heat_pump', 'has_room_heaters',
|
||||
'has_electric_storage_heaters', 'has_warm_air', 'has_electric_underfloor_heating',
|
||||
'has_electric_ceiling_heating', 'has_community_scheme', 'has_ground_source_heat_pump',
|
||||
'has_no_system_present', 'has_portable_electric_heaters', 'has_water_source_heat_pump',
|
||||
'has_electric_heat_pump', 'has_micro-cogeneration', 'has_solar_assisted_heat_pump',
|
||||
'has_exhaust_source_heat_pump', 'has_community_heat_pump', 'has_electric', 'has_mains_gas', 'has_wood_logs',
|
||||
'has_coal', 'has_oil', 'has_wood_pellets', 'has_anthracite', 'has_dual_fuel_mineral_and_wood',
|
||||
'has_smokeless_fuel', 'has_lpg', 'has_b30k', 'has_electricaire', 'has_assumed_for_most_rooms',
|
||||
'has_underfloor_heating', 'thermostatic_control', 'charging_system', 'switch_system', 'no_control',
|
||||
'dhw_control', 'community_heating', 'multiple_room_thermostats', 'auxiliary_systems', 'trvs',
|
||||
'rate_control', 'glazing_type', 'fuel_type', 'main-fuel_tariff_type', 'is_community',
|
||||
'no_individual_heating_or_community_network', 'complex_fuel_type', 'estimated_perimeter'
|
||||
]
|
||||
|
|
|
|||
|
|
@ -1,10 +1,9 @@
|
|||
# This map defines the upgrades that are possible to be recommended by the recommendation engine
|
||||
# For example,
|
||||
# TODO: once we use cleaned descriptions, this should be updated using the cleaned descriptions
|
||||
UPGRADES_MAP = {
|
||||
'Solid brick, as built, no insulation (assumed)': 'Solid brick, as built, insulated (assumed)',
|
||||
'Suspended, no insulation (assumed)': 'Suspended, insulated (assumed)',
|
||||
'Solid, no insulation (assumed)': 'Solid, insulated (assumed)',
|
||||
'Solid brick, as built, no insulation': 'Solid brick, as built, insulated',
|
||||
'Suspended, no insulation': 'Suspended, insulated',
|
||||
'Solid, no insulation': 'Solid, insulated',
|
||||
}
|
||||
|
||||
PARTIAL_CAVITY_DESCRIPTIONS = [
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue