# Using a simply python file as settings for now # TODO: migrate to dynaconf from pathlib import Path # Can move to a hyperparmeters file # If anything we might want to have a file that can be loaded and sent to this script MODEL_HYPERPARAMETERS = { "autogluon": { 'problem_type': 'regression', 'eval_metric': 'mean_absolute_error', 'time_limit': 30, 'presets': 'medium_quality', 'excluded_model_types': None } } RANDOM_SEED = 0 SUBSAMPLE_FACTOR = 200 TRAIN_AND_VALIDATION_DATA_NAME = 'train_validation_data.parquet' TEST_DATA_NAME = 'test_data.parquet' REGISTRY_FILE = "model_registry.csv" MODEL_DIRECTORY = "model_directory" BASE_REGISTRY_PATH = Path(__file__).parent.parent / MODEL_DIRECTORY PREDICTION_LOCATION = Path("predictions") PREDICTION_FILE = 'prediction.json' METADATA_FILE = 'metadata.json' MODEL_FOLDER = "model" METRICS_FOLDER = "metrics" DEPLOYMENT_FOLDER = "deployment" TOTAL_FLOOR_AREA_NATIONAL_AVERAGE = 70 FLOOR_HEIGHT_NATIONAL_AVERAGE = 2.45 COLUMNS_TO_MERGE_ON = [ "PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS" ] FULLY_GLAZED_DESCRIPTIONS = [ "Fully double glazed", "High performance glazing", "Fully triple glazed", "Full secondary glazing", "Multiple glazing throughout", ] FIXED_FEATURES = [ 'PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'NUMBER_HABITABLE_ROOMS', 'CONSTITUENCY', 'NUMBER_HEATED_ROOMS', 'FIXED_LIGHTING_OUTLETS_COUNT', 'FLOOR_HEIGHT', 'FLOOR_LEVEL', 'TOTAL_FLOOR_AREA', ] COMPONENT_FEATURES = [ 'TRANSACTION_TYPE', 'WALLS_DESCRIPTION', 'FLOOR_DESCRIPTION', 'LIGHTING_DESCRIPTION', 'ROOF_DESCRIPTION', 'MAINHEAT_DESCRIPTION', 'HOTWATER_DESCRIPTION', 'MAIN_FUEL', 'MECHANICAL_VENTILATION', 'SECONDHEAT_DESCRIPTION', 'ENERGY_TARIFF', # Not sure if this is relevant 'SOLAR_WATER_HEATING_FLAG', 'PHOTO_SUPPLY', 'WINDOWS_DESCRIPTION', 'GLAZED_TYPE', 'MULTI_GLAZE_PROPORTION', 'LIGHTING_DESCRIPTION', 'LOW_ENERGY_LIGHTING', 'NUMBER_OPEN_FIREPLACES', 'MAINHEATCONT_DESCRIPTION', 'EXTENSION_COUNT', # 'GLAZED_AREA', # May not need this since we have MULTI_GLAZE_PROPORTION ] # For these fields, we take an average if we have multiple values AVERAGE_FIXED_FEATURES = [ "TOTAL_FLOOR_AREA", "FLOOR_HEIGHT" ] # For these fields, we take the latest value if we have multiple values # Since more recent EPCs have been conducted with more rigour, we assume that the latest value is # the most accurate LATEST_FIELD = [ "NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS", "FIXED_LIGHTING_OUTLETS_COUNT", "FLOOR_LEVEL", "CONSTRUCTION_AGE_BAND", # This is a field we're probably want to use verisk data for ] # If we see thee features changing, we don't use the EPC, since deem it not to be reliable MANDATORY_FIXED_FEATURES = [ "PROPERTY_TYPE", "BUILT_FORM", "CONSTITUENCY" ] # For particularly old EPC data, we have inconsistent records so we'll only include EPCS that were # conducted after 2010, since SAP09 was introduced in 2009 an later SAP12 was introduced in England # and Wales from 31 July 2014 EARLIEST_EPC_DATE = "2014-08-01" RDSAP_RESPONSE = "CURRENT_ENERGY_EFFICIENCY" HEAT_DEMAND_RESPONSE = "ENERGY_CONSUMPTION_CURRENT" def ordinal(n): if 10 <= n % 100 <= 20: suffix = 'th' else: suffix = {1: 'st', 2: 'nd', 3: 'rd'}.get(n % 10, 'th') return str(n) + suffix FLOOR_LEVEL_MAP = { "Basement": -1, "Ground": 0, "ground floor": 0, "20+": 20, "21st or above": 21, **{str(i).zfill(2): i for i in range(0, 21)}, **{ordinal(i): i for i in range(-1, 21)}, **{str(i): i for i in range(-1, 21)}, **{i: i for i in range(-1, 21)}, } BUILT_FORM_REMAP = { "Enclosed End-Terrace": "End-Terrace", "Enclosed Mid-Terrace": "Mid-Terrace", } DATA_PROCESSOR_SETTINGS = { 'low_memory': False, 'epc_minimum_count': 1, 'column_mappings': {'UPRN': [int, str]} }