Skip to content

Commit

Permalink
Refactor HyperOpt to use RayTune (#1994)
Browse files Browse the repository at this point in the history
  • Loading branch information
jimthompson5802 authored May 23, 2022
1 parent 37db773 commit cba6d5c
Show file tree
Hide file tree
Showing 24 changed files with 682 additions and 779 deletions.
4 changes: 2 additions & 2 deletions ludwig/automl/auto_tune_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ def memory_tune_config(config, dataset, model_category, row_count):
modified_config = copy.deepcopy(config)

modified_config[HYPEROPT]["parameters"] = modified_hyperparam_search_space
modified_config[HYPEROPT]["sampler"]["num_samples"] = _update_num_samples(
modified_config[HYPEROPT]["sampler"]["num_samples"], modified_hyperparam_search_space
modified_config[HYPEROPT]["executor"]["num_samples"] = _update_num_samples(
modified_config[HYPEROPT]["executor"]["num_samples"], modified_hyperparam_search_space
)
return modified_config, fits_in_memory
4 changes: 2 additions & 2 deletions ludwig/automl/automl.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ def train_with_config(
# TODO (ASN): Decide how we want to proceed if at least one trial has
# completed
for trial in hyperopt_results.ordered_trials:
if np.isnan(trial.metric_score):
if isinstance(trial.metric_score, str) or np.isnan(trial.metric_score):
warnings.warn(
"There was an error running the experiment. "
"A trial failed to start. "
Expand Down Expand Up @@ -250,7 +250,7 @@ def _model_select(
model_category = TEXT
input_feature["encoder"] = AUTOML_DEFAULT_TEXT_ENCODER
base_config = merge_dict(base_config, default_configs[TEXT][AUTOML_DEFAULT_TEXT_ENCODER])
base_config[HYPEROPT]["sampler"]["num_samples"] = 5 # set for small hyperparameter search space
base_config[HYPEROPT]["executor"]["num_samples"] = 5 # set for small hyperparameter search space

# TODO (ASN): add image heuristics
if input_feature["type"] == IMAGE:
Expand Down
30 changes: 22 additions & 8 deletions ludwig/automl/base_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,21 @@

from ludwig.automl.data_source import DataframeSource, DataSource
from ludwig.automl.utils import _ray_init, FieldConfig, FieldInfo, FieldMetadata, get_available_resources
from ludwig.constants import AUDIO, BINARY, CATEGORY, DATE, IMAGE, NUMBER, TEXT
from ludwig.constants import (
AUDIO,
BINARY,
CATEGORY,
COMBINER,
DATE,
EXECUTOR,
HYPEROPT,
IMAGE,
NUMBER,
SCHEDULER,
SEARCH_ALG,
TEXT,
TYPE,
)
from ludwig.utils import strings_utils
from ludwig.utils.data_utils import load_dataset, load_yaml
from ludwig.utils.defaults import default_random_seed
Expand Down Expand Up @@ -121,18 +135,18 @@ def _create_default_config(
dataset_info.fields, dataset_info.row_count, resources, target_name
)
# create set of all feature types appearing in the dataset
feature_types = [[feat["type"] for feat in features] for features in input_and_output_feature_config.values()]
feature_types = [[feat[TYPE] for feat in features] for features in input_and_output_feature_config.values()]
feature_types = set(sum(feature_types, []))

model_configs = {}

# read in base config and update with experiment resources
base_automl_config = load_yaml(BASE_AUTOML_CONFIG)
base_automl_config["hyperopt"]["executor"].update(experiment_resources)
base_automl_config["hyperopt"]["executor"]["time_budget_s"] = time_limit_s
base_automl_config[HYPEROPT][EXECUTOR].update(experiment_resources)
base_automl_config[HYPEROPT][EXECUTOR]["time_budget_s"] = time_limit_s
if time_limit_s is not None:
base_automl_config["hyperopt"]["sampler"]["scheduler"]["max_t"] = time_limit_s
base_automl_config["hyperopt"]["sampler"]["search_alg"]["random_state_seed"] = random_seed
base_automl_config[HYPEROPT][EXECUTOR][SCHEDULER]["max_t"] = time_limit_s
base_automl_config[HYPEROPT][SEARCH_ALG]["random_state_seed"] = random_seed
base_automl_config.update(input_and_output_feature_config)

model_configs["base_config"] = base_automl_config
Expand All @@ -146,10 +160,10 @@ def _create_default_config(
model_configs[feat_type][encoder_name] = load_yaml(encoder_config_path)

# read in all combiner configs
model_configs["combiner"] = {}
model_configs[COMBINER] = {}
for combiner_type, default_config in combiner_defaults.items():
combiner_config = load_yaml(default_config)
model_configs["combiner"][combiner_type] = combiner_config
model_configs[COMBINER][combiner_type] = combiner_config

return model_configs

Expand Down
15 changes: 6 additions & 9 deletions ludwig/automl/defaults/base_automl_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,17 @@ trainer:
# validation_metric: accuracy

hyperopt:
sampler:
search_alg:
# Gives results like default + supports random_state_seed for sample sequence repeatability
type: hyperopt
executor:
type: ray
search_alg:
# Gives results like default + supports random_state_seed for sample sequence repeatability
type: hyperopt
num_samples: 10
time_budget_s: 7200
scheduler:
type: async_hyperband
time_attr: time_total_s
max_t: 7200
grace_period: 72
# Increased over default to get more pruning/exploration
reduction_factor: 5
num_samples: 10

executor:
type: ray
time_budget_s: 7200
4 changes: 2 additions & 2 deletions ludwig/automl/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from numpy import nan_to_num
from pandas import Series

from ludwig.constants import COMBINER, CONFIG, HYPEROPT, NAME, NUMBER, PARAMETERS, SAMPLER, TRAINER, TYPE
from ludwig.constants import COMBINER, CONFIG, HYPEROPT, NAME, NUMBER, PARAMETERS, SEARCH_ALG, TRAINER, TYPE
from ludwig.features.feature_registries import output_type_registry
from ludwig.modules.metric_registry import metric_registry
from ludwig.utils.defaults import default_combiner_type
Expand Down Expand Up @@ -127,7 +127,7 @@ def _add_transfer_config(base_config: Dict, ref_configs: Dict) -> Dict:
point_to_evaluate = {}
_add_option_to_evaluate(point_to_evaluate, min_dataset_config, hyperopt_params, COMBINER)
_add_option_to_evaluate(point_to_evaluate, min_dataset_config, hyperopt_params, TRAINER)
base_config[HYPEROPT][SAMPLER]["search_alg"]["points_to_evaluate"] = [point_to_evaluate]
base_config[HYPEROPT][SEARCH_ALG]["points_to_evaluate"] = [point_to_evaluate]
return base_config


Expand Down
2 changes: 2 additions & 0 deletions ludwig/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,8 @@
MINIMIZE = "minimize"
MAXIMIZE = "maximize"
SAMPLER = "sampler"
SEARCH_ALG = "search_alg"
SCHEDULER = "scheduler"
PARAMETERS = "parameters"

NAME = "name"
Expand Down
Loading

0 comments on commit cba6d5c

Please sign in to comment.