# Using a simply python file as settings for now # TODO: migrate to dynaconf from pathlib import Path METRIC_FILENAME = "metrics.csv" OPTIMISE_METRIC = "mean_absolute_error" BEST_MODEL_COLUMN_NAME = "best_model" # TODO: remove these setting elsewhere for CML RESIDUAL_TRUE_LABEL = "true" RESIDUAL_PREDICTION_LABEL = "pred" RESIDUAL_FILE = "residual.png" SEABORN_RESIDUAL_AXIS_FONTSIZE = 12 SEABORN_RESIDUAL_TITLE_FONTSIZE = 22 SEABORN_RESIDUAL_STYLE = "whitegrid" SEABORN_RESIDUAL_ASPECT_RATIO = "equal" SEABORN_RESIDUAL_PLOT_DPI = 120 SEABORN_RESIDUAL_RANGE = [-100, 100] SEABORN_RESIDUAL_LINE_COLOUR = "black" SEABORN_RESIDUAL_LINE_WIDTH = 1 # Can move to a hyperparmeters file # If anything we might want to have a file that can be loaded and sent to this script MODEL_HYPERPARAMETERS = { "autogluon": { "problem_type": "regression", "eval_metric": "mean_absolute_error", "time_limit": 45, "presets": "medium_quality", "excluded_model_types": None, } } TIMESTAMP_FORMAT = "%Y_%m_%d_%H_%M_%S" RANDOM_SEED = 0 SUBSAMPLE_FACTOR = 200 TRAIN_AND_VALIDATION_DATA_NAME = "train_validation_data.parquet" TEST_DATA_NAME = "test_data.parquet" REGISTRY_FILE = "model_registry.csv" MODEL_DIRECTORY = "model_directory" BASE_REGISTRY_PATH = Path(__file__).parent.parent / MODEL_DIRECTORY PREDICTION_LOCATION = Path("predictions") PREDICTION_FILE = "prediction.json" METADATA_FILE = "metadata.json" MODEL_FOLDER = "model" METRICS_FOLDER = "metrics" DEPLOYMENT_FOLDER = "deployment" TOTAL_FLOOR_AREA_NATIONAL_AVERAGE = 70 FLOOR_HEIGHT_NATIONAL_AVERAGE = 2.45 COLUMNS_TO_MERGE_ON = [ "PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS", ] FULLY_GLAZED_DESCRIPTIONS = [ "Fully double glazed", "High performance glazing", "Fully triple glazed", "Full secondary glazing", "Multiple glazing throughout", ] FIXED_FEATURES = [ "PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "NUMBER_HABITABLE_ROOMS", "CONSTITUENCY", "NUMBER_HEATED_ROOMS", "FIXED_LIGHTING_OUTLETS_COUNT", "FLOOR_HEIGHT", "FLOOR_LEVEL", "TOTAL_FLOOR_AREA", ] COMPONENT_FEATURES = [ "TRANSACTION_TYPE", "WALLS_DESCRIPTION", "FLOOR_DESCRIPTION", "LIGHTING_DESCRIPTION", "ROOF_DESCRIPTION", "MAINHEAT_DESCRIPTION", "HOTWATER_DESCRIPTION", "MAIN_FUEL", "MECHANICAL_VENTILATION", "SECONDHEAT_DESCRIPTION", "ENERGY_TARIFF", # Not sure if this is relevant "SOLAR_WATER_HEATING_FLAG", "PHOTO_SUPPLY", "WINDOWS_DESCRIPTION", "GLAZED_TYPE", "MULTI_GLAZE_PROPORTION", "LIGHTING_DESCRIPTION", "LOW_ENERGY_LIGHTING", "NUMBER_OPEN_FIREPLACES", "MAINHEATCONT_DESCRIPTION", "EXTENSION_COUNT", # 'GLAZED_AREA', # May not need this since we have MULTI_GLAZE_PROPORTION ] # For these fields, we take an average if we have multiple values AVERAGE_FIXED_FEATURES = ["TOTAL_FLOOR_AREA", "FLOOR_HEIGHT"] # For these fields, we take the latest value if we have multiple values # Since more recent EPCs have been conducted with more rigour, we assume that the latest value is # the most accurate LATEST_FIELD = [ "NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS", "FIXED_LIGHTING_OUTLETS_COUNT", "FLOOR_LEVEL", "CONSTRUCTION_AGE_BAND", # This is a field we're probably want to use verisk data for ] # If we see thee features changing, we don't use the EPC, since deem it not to be reliable MANDATORY_FIXED_FEATURES = ["PROPERTY_TYPE", "BUILT_FORM", "CONSTITUENCY"] # For particularly old EPC data, we have inconsistent records so we'll only include EPCS that were # conducted after 2010, since SAP09 was introduced in 2009 an later SAP12 was introduced in England # and Wales from 31 July 2014 EARLIEST_EPC_DATE = "2014-08-01" RDSAP_RESPONSE = "CURRENT_ENERGY_EFFICIENCY" HEAT_DEMAND_RESPONSE = "ENERGY_CONSUMPTION_CURRENT" def ordinal(n): if 10 <= n % 100 <= 20: suffix = "th" else: suffix = {1: "st", 2: "nd", 3: "rd"}.get(n % 10, "th") return str(n) + suffix FLOOR_LEVEL_MAP = { "Basement": -1, "Ground": 0, "ground floor": 0, "20+": 20, "21st or above": 21, **{str(i).zfill(2): i for i in range(0, 21)}, **{ordinal(i): i for i in range(-1, 21)}, **{str(i): i for i in range(-1, 21)}, **{i: i for i in range(-1, 21)}, } BUILT_FORM_REMAP = { "Enclosed End-Terrace": "End-Terrace", "Enclosed Mid-Terrace": "Mid-Terrace", } DATA_PROCESSOR_SETTINGS = { "low_memory": False, "epc_minimum_count": 1, "column_mappings": {"UPRN": [int, str]}, }