added recommendation scoring setup process

This commit is contained in:
Khalim Conn-Kowlessar 2023-10-10 05:41:25 +08:00
parent f551fda186
commit efdef5eb46
9 changed files with 211 additions and 25 deletions

View file

@ -569,3 +569,89 @@ class Property(Definitions):
:return:
"""
self.wall_type = get_wall_type(**self.walls)
@staticmethod
def _extract_component(component_data, component_rename_cols, component_drop_cols, rename_prefix=None):
for k in component_rename_cols:
component_data[f"{rename_prefix}_{k}"] = component_data[k]
component_data = {
k: v for k, v in component_data.items() if k not in component_drop_cols + component_rename_cols
}
return component_data
def get_model_data(self):
"""
This method extracts cleaned data from the property object, which is used in our machine learning models
This will use many of the cleaned properties, extracted from the epc data, or methods in DataProcessor.
For future iterations of this, we probably want to implement a singular method in DataProcessor, which can
be used in the etl code and in here
:return: dictionary of model data to be scored in the model
"""
drop_cols = ["original_description", "clean_description"]
insulation_drop_cols = ["thermal_transmittance_unit", "is_assumed", "is_valid"]
insulation_rename_cols = ["thermal_transmittance", "insulation_thickness"]
walls = self._extract_component(self.walls, insulation_rename_cols, insulation_drop_cols + drop_cols, "walls")
roof = self._extract_component(self.roof, insulation_rename_cols, insulation_drop_cols + drop_cols, "roof")
floor = self._extract_component(self.floor, insulation_rename_cols, insulation_drop_cols + drop_cols, "floor")
windows = self._extract_component(self.windows, [], drop_cols + ["no_data"])
fuel = self._extract_component(self.main_fuel, ["tariff_type"], drop_cols + ["tariff_type"], "main-fuel")
main_heating = self._extract_component(self.main_heating, [], drop_cols + ["has_assumed"])
main_heating_controls = self._extract_component(self.main_heating_controls, [], drop_cols)
hotwater = self._extract_component(self.hotwater, ["tariff_type"], drop_cols + ['assumed'], "hotwater")
# We'll need to clean second heating
second_heating = self.data["secondheat-description"]
epc_raw_columns = [
'TRANSACTION_TYPE',
'ENERGY_TARIFF',
'PROPERTY_TYPE',
'UPRN',
'NUMBER_OPEN_FIREPLACES',
'FIXED_LIGHTING_OUTLETS_COUNT',
'MULTI_GLAZE_PROPORTION',
'MECHANICAL_VENTILATION',
'PHOTO_SUPPLY',
'LOW_ENERGY_LIGHTING',
'SOLAR_WATER_HEATING_FLAG',
'BUILT_FORM',
'GLAZED_TYPE',
'CONSTITUENCY',
'NUMBER_HEATED_ROOMS',
'EXTENSION_COUNT',
]
epc_raw_data = {
k: self.data[k.lower().replace("_", "-")] for k in epc_raw_columns
}
property_data = {
**walls,
**roof,
**floor,
**fuel,
**main_heating,
**main_heating_controls,
**hotwater,
**windows,
"SECONDHEAT_DESCRIPTION": second_heating,
"DAYS_TO": DataProcessor.calculate_days_to(self.data["lodgement-date"]),
"SAP": self.data["current-energy-efficiency"],
"CARBON": self.data["co2-emissions-current"],
"HEAT_DEMAND": self.data["energy-consumption-current"],
"estimated_perimeter": self.perimeter,
"CONSTRUCTION_AGE_BAND": self.age_band,
"FLOOR_HEIGHT": self.floor_height,
"NUMBER_HABITABLE_ROOMS": self.number_of_rooms,
"TOTAL_FLOOR_AREA": self.floor_area,
**epc_raw_data
}
return property_data

View file

@ -7,6 +7,11 @@ def get_materials(session):
"""
This function will retrieve all materials from the database.
:return: A list of Material objects if successful, an empty list otherwise.
TODO: It might not be the best choice to store the materials data in a database table since thi
table probably won't be very large and won't be updated that often. It might be better to
store this data in s3 load it into memory when the app starts up. We will test this
"""
materials = session.query(Material).filter(Material.is_active).all()

View file

@ -106,10 +106,6 @@ async def trigger_plan(body: PlanTriggerRequest):
# The materials data could be cached or local so we don't need to make
# consistent requests to the backend for
# the same data
# TODO: It might not be the best choice to store the materials data in a database table since thi
# table probably won't be very large and won't be updated that often. It might be better to
# store this data in s3 load it into memory when the app starts up. We will test this
logger.info("Reading in materials and cleaned datasets")
materials = get_materials(session)
materials_by_type = filter_materials(materials)
@ -158,20 +154,15 @@ async def trigger_plan(body: PlanTriggerRequest):
recommendations[p.id] = property_recommendations
# Finally, we'll prepare data for predicting the impact on SAP
# TODO: We should use the cleaned data from get_components in the data rather than the raw
# values. We should create a method in Property which takes the EPC data and inserts the cleaned
# data
data_processor = DataProcessor(None, newdata=True)
data_processor.insert_data(pd.DataFrame([p.data.copy()]))
data_processor.pre_process()
data_processor.insert_data(pd.DataFrame([p.get_model_data()]))
starting_epc_data = data_processor.get_component_features(suffix="_STARTING")
ending_epc_data = data_processor.get_component_features(suffix="_ENDING")
fixed_data = data_processor.get_fixed_features()
# We update the ending record with the recommended updates and we set lodgement date to today
ending_epc_data["LODGEMENT_DATE_ENDING"] = created_at
ending_epc_data["LODGEMENT_DATE_ENDING"] = data_processor.calculate_days_to(created_at)
for recommendations_by_type in property_recommendations:
for rec in recommendations_by_type:

View file

@ -2,7 +2,10 @@ import pandas as pd
from backend.Property import Property
from collections import defaultdict
from utils.s3 import read_from_s3
from recommendations.config import UPGRADES_MAP
from recommendations.recommendation_utils import get_wall_u_value, get_floor_u_value
from backend.app.db.utils import row2dict
from backend.app.config import get_settings
import msgpack
@ -85,8 +88,27 @@ def create_recommendation_scoring_data(
# We update the description to indicate it's insulated
if recommendation["type"] == "wall_insulation":
scoring_dict["WALLS_DESCRIPTION_ENDING"] = UPGRADES_MAP[property.walls["clean_description"]]
# The upgrade made here is to the u-value of the walls and the description of the
# insulation thickness
# We may not have the u-value initially, so we calculate it
if not scoring_dict["walls_thermal_transmittance"]:
scoring_dict["walls_thermal_transmittance"] = get_wall_u_value(
clean_description=property.walls["clean_description"],
age_band=property.age_band,
is_granite_or_whinstone=property.walls["is_granite_or_whinstone"],
is_sandstone_or_limestone=property.walls["is_sandstone_or_limestone"]
)
scoring_dict["walls_thermal_transmittance_ENDING"] = get_wall_u_value(
clean_description=UPGRADES_MAP[property.walls["clean_description"]],
age_band=property.age_band,
is_granite_or_whinstone=property.walls["is_granite_or_whinstone"],
is_sandstone_or_limestone=property.walls["is_sandstone_or_limestone"]
)
scoring_dict["walls_insulation_thickness_ENDING"] = "above average"
elif recommendation["type"] == "floor_insulation":
blah
scoring_dict["FLOOR_DESCRIPTION_ENDING"] = UPGRADES_MAP[property.floor["clean_description"]]
else:
raise NotImplementedError("Implement me")

View file

View file

@ -9,12 +9,14 @@ from etl.epc.settings import (
AVERAGE_FIXED_FEATURES,
BUILT_FORM_REMAP,
COLUMNS_TO_MERGE_ON,
COMPONENT_FEATURES,
FIXED_FEATURES,
COLUMNTYPES,
RDSAP_RESPONSE,
MAX_SAP_SCORE,
fill_na_map,
STARTING_SUFFIX_COMPONENT_COLS,
NO_SUFFIX_COMPONENT_COLS,
ENDING_SUFFIX_COMPONENT_COLS
)
from recommendations.rdsap_tables import FLOOR_LEVEL_MAP
@ -500,9 +502,15 @@ class DataProcessor:
"""
if suffix not in ["_STARTING", "_ENDING"]:
raise Exception("Suffix should be one of _STARTING or _ENFING")
raise Exception("Suffix should be one of _STARTING or _ENDING")
return self.data[COMPONENT_FEATURES + ["LODGEMENT_DATE"]].copy().add_suffix(suffix)
if suffix == "_STARTING":
starting_cols = self.data[STARTING_SUFFIX_COMPONENT_COLS].copy().add_suffix(suffix)
fixed_cols = self.data[NO_SUFFIX_COMPONENT_COLS].copy()
return pd.concat([starting_cols, fixed_cols], axis=1)
return self.data[ENDING_SUFFIX_COMPONENT_COLS].copy().add_suffix(suffix)
def get_fixed_features(self) -> pd.DataFrame:
"""
@ -531,3 +539,15 @@ class DataProcessor:
df[column] = df[column].astype(bool)
return df
@staticmethod
def calculate_days_to(lodgement_date):
if isinstance(lodgement_date, str):
return (
pd.to_datetime(lodgement_date).tz_localize(None) - pd.to_datetime(EARLIEST_EPC_DATE)
).days
return (
pd.to_datetime(lodgement_date) - pd.to_datetime(EARLIEST_EPC_DATE)
).dt.days

View file

@ -514,12 +514,11 @@ def app():
# Add some temporal features - we look at the days from the standard starting point in time
# for the starting and ending date so all records are from a fixed point
data_by_urpn_df["DAYS_TO_STARTING"] = (
pd.to_datetime(data_by_urpn_df["LODGEMENT_DATE_STARTING"]) - pd.to_datetime(EARLIEST_EPC_DATE)
).dt.days
data_by_urpn_df["DAYS_TO_ENDING"] = (
pd.to_datetime(data_by_urpn_df["LODGEMENT_DATE_ENDING"]) - pd.to_datetime(EARLIEST_EPC_DATE)
).dt.days
data_by_urpn_df["DAYS_TO_STARTING"] = DataProcessor.calculate_days_to(
data_by_urpn_df["LODGEMENT_DATE_STARTING"])
data_by_urpn_df["DAYS_TO_ENDING"] = DataProcessor.calculate_days_to(
data_by_urpn_df["LODGEMENT_DATE_ENDING"])
data_by_urpn_df = data_by_urpn_df.drop(columns=["LODGEMENT_DATE_STARTING", "LODGEMENT_DATE_ENDING"])

View file

@ -189,3 +189,67 @@ fill_na_map = {
"EXTENSION_COUNT": 0,
"NUMBER_OPEN_FIREPLACES": 0
}
################################################################################################
# These are the features we need for scoring
# We'll likely change how we do this in the future
################################################################################################
STARTING_SUFFIX_COMPONENT_COLS = [
"SAP", "HEAT_DEMAND", "CARBON", "TRANSACTION_TYPE", "MECHANICAL_VENTILATION",
"SECONDHEAT_DESCRIPTION", "ENERGY_TARIFF", "SOLAR_WATER_HEATING_FLAG", "PHOTO_SUPPLY",
"GLAZED_TYPE", "MULTI_GLAZE_PROPORTION", "LOW_ENERGY_LIGHTING", "NUMBER_OPEN_FIREPLACES",
"EXTENSION_COUNT", "TOTAL_FLOOR_AREA", "FLOOR_HEIGHT", "DAYS_TO", "estimated_perimeter"
]
NO_SUFFIX_COMPONENT_COLS = ['walls_thermal_transmittance', 'is_cavity_wall',
'is_filled_cavity', 'is_solid_brick', 'is_system_built', 'is_timber_frame',
'is_granite_or_whinstone', 'is_as_built', 'is_cob', 'is_sandstone_or_limestone',
'is_park_home', 'walls_insulation_thickness', 'external_insulation', 'internal_insulation',
'floor_thermal_transmittance', 'is_to_unheated_space', 'is_to_external_air', 'is_suspended',
'is_solid', 'another_property_below', 'floor_insulation_thickness',
'roof_thermal_transmittance', 'is_pitched', 'is_roof_room', 'is_loft', 'is_flat',
'is_thatched', 'is_at_rafters', 'has_dwelling_above', 'roof_insulation_thickness',
'heater_type', 'system_type', 'thermostat_characteristics', 'heating_scope',
'energy_recovery',
'hotwater_tariff_type', 'extra_features', 'chp_systems', 'distribution_system',
'no_system_present', 'appliance', 'has_radiators', 'has_fan_coil_units',
'has_pipes_in_screed_above_insulation', 'has_pipes_in_insulated_timber_floor',
'has_pipes_in_concrete_slab', 'has_boiler', 'has_air_source_heat_pump', 'has_room_heaters',
'has_electric_storage_heaters', 'has_warm_air', 'has_electric_underfloor_heating',
'has_electric_ceiling_heating', 'has_community_scheme', 'has_ground_source_heat_pump',
'has_no_system_present', 'has_portable_electric_heaters', 'has_water_source_heat_pump',
'has_electric_heat_pump', 'has_micro-cogeneration', 'has_solar_assisted_heat_pump',
'has_exhaust_source_heat_pump', 'has_community_heat_pump', 'has_electric', 'has_mains_gas',
'has_wood_logs', 'has_coal', 'has_oil', 'has_wood_pellets', 'has_anthracite',
'has_dual_fuel_mineral_and_wood', 'has_smokeless_fuel', 'has_lpg', 'has_b30k',
'has_electricaire', 'has_assumed_for_most_rooms', 'has_underfloor_heating',
'thermostatic_control', 'charging_system', 'switch_system', 'no_control', 'dhw_control',
'community_heating', 'multiple_room_thermostats', 'auxiliary_systems', 'trvs',
'rate_control',
'glazing_type', 'fuel_type', 'main-fuel_tariff_type', 'is_community',
'no_individual_heating_or_community_network', 'complex_fuel_type',
]
ENDING_SUFFIX_COMPONENT_COLS = [
'SAP', 'HEAT_DEMAND', 'CARBON', 'TRANSACTION_TYPE', 'MECHANICAL_VENTILATION', 'SECONDHEAT_DESCRIPTION',
'ENERGY_TARIFF', 'SOLAR_WATER_HEATING_FLAG', 'PHOTO_SUPPLY', 'GLAZED_TYPE', 'MULTI_GLAZE_PROPORTION',
'LOW_ENERGY_LIGHTING', 'NUMBER_OPEN_FIREPLACES', 'EXTENSION_COUNT', 'TOTAL_FLOOR_AREA', 'FLOOR_HEIGHT',
'DAYS_TO', 'walls_thermal_transmittance', 'is_park_home', 'walls_insulation_thickness',
'external_insulation', 'internal_insulation', 'floor_thermal_transmittance', 'floor_insulation_thickness',
'roof_thermal_transmittance', 'roof_insulation_thickness', 'heater_type', 'system_type',
'thermostat_characteristics', 'heating_scope', 'energy_recovery', 'hotwater_tariff_type', 'extra_features',
'chp_systems', 'distribution_system', 'no_system_present', 'appliance', 'has_radiators',
'has_fan_coil_units', 'has_pipes_in_screed_above_insulation', 'has_pipes_in_insulated_timber_floor',
'has_pipes_in_concrete_slab', 'has_boiler', 'has_air_source_heat_pump', 'has_room_heaters',
'has_electric_storage_heaters', 'has_warm_air', 'has_electric_underfloor_heating',
'has_electric_ceiling_heating', 'has_community_scheme', 'has_ground_source_heat_pump',
'has_no_system_present', 'has_portable_electric_heaters', 'has_water_source_heat_pump',
'has_electric_heat_pump', 'has_micro-cogeneration', 'has_solar_assisted_heat_pump',
'has_exhaust_source_heat_pump', 'has_community_heat_pump', 'has_electric', 'has_mains_gas', 'has_wood_logs',
'has_coal', 'has_oil', 'has_wood_pellets', 'has_anthracite', 'has_dual_fuel_mineral_and_wood',
'has_smokeless_fuel', 'has_lpg', 'has_b30k', 'has_electricaire', 'has_assumed_for_most_rooms',
'has_underfloor_heating', 'thermostatic_control', 'charging_system', 'switch_system', 'no_control',
'dhw_control', 'community_heating', 'multiple_room_thermostats', 'auxiliary_systems', 'trvs',
'rate_control', 'glazing_type', 'fuel_type', 'main-fuel_tariff_type', 'is_community',
'no_individual_heating_or_community_network', 'complex_fuel_type', 'estimated_perimeter'
]

View file

@ -1,10 +1,9 @@
# This map defines the upgrades that are possible to be recommended by the recommendation engine
# For example,
# TODO: once we use cleaned descriptions, this should be updated using the cleaned descriptions
UPGRADES_MAP = {
'Solid brick, as built, no insulation (assumed)': 'Solid brick, as built, insulated (assumed)',
'Suspended, no insulation (assumed)': 'Suspended, insulated (assumed)',
'Solid, no insulation (assumed)': 'Solid, insulated (assumed)',
'Solid brick, as built, no insulation': 'Solid brick, as built, insulated',
'Suspended, no insulation': 'Suspended, insulated',
'Solid, no insulation': 'Solid, insulated',
}
PARTIAL_CAVITY_DESCRIPTIONS = [