Merge pull request #663 from Epistimio/release-v0.1.17rc1

Release candidate v0.1.17rc1
Epistimio · Sep 14, 2021 · 0ef3eea · 0ef3eea
2 parents 6bc3b79 + bd33c32
commit 0ef3eea
Show file tree

Hide file tree

Showing 51 changed files with 900 additions and 415 deletions.
diff --git a/README.rst b/README.rst
@@ -129,7 +129,7 @@ If you use Oríon for published work, please cite our work using the following b
  Dendi Suhubdy and
  Reyhane Askari and
  Michael Noukhovitch and
- Chao Xua and
+ Chao Xue and
  Satya Ortiz-Gagné and
  Olivier Breuleux and
  Arnaud Bergeron and
@@ -145,7 +145,7 @@ If you use Oríon for published work, please cite our work using the following b
  month = may,
  year = 2021,
  publisher = {Zenodo},
- version = {v0.1.15},
+ version = {v0.1.17},
  doi = {10.5281/zenodo.3478592},
  url = {https://doi.org/10.5281/zenodo.3478592}
  }

diff --git a/ROADMAP.md b/ROADMAP.md
@@ -1,12 +1,8 @@
 # Roadmap
-Last update May 19th, 2021
+Last update Sep 14th, 2021
 
 ## Next releases - Short-Term
 
-### v0.1.16
-
-#### Quick release for bug fixes
-
 ### v0.2
 
 #### Generic `Optimizer` interface supporting various types of algorithms

diff --git a/docs/src/install/gettingstarted.rst b/docs/src/install/gettingstarted.rst
@@ -62,9 +62,10 @@ the values for the ``lr`` hyper-parameter in a log uniform distribution between
 trial will be stored in the database that you configured during the installation process (which can
 be in-memory, a file, or a local or remote MongoDB instance).
 
-Additionally, the experiments are versioned -- think of it as a git for scientific experimentation
--- enabling you to keep track of all your trials with their parameters. This guarantees that you can
-reproduce or trace back the steps in your work for free.
+Additionally, the experiments can be versioned -- think of it as a git for scientific
+experimentation -- enabling you to keep track of all your trials with their parameters. This
+guarantees that you can reproduce or trace back the steps in your work for free. See configuration
+options for the :ref:`config_evc` to enable the versionning of the experiments.
 
 You can fine-tune the distribution and algorithm with many options either with more arguments or by
 using a configuration file. Learn more at :doc:`/user/api`.

diff --git a/docs/src/tutorials/pytorch-mnist.rst b/docs/src/tutorials/pytorch-mnist.rst
@@ -162,76 +162,3 @@ don't use ``--debug`` you will likely quickly fill your database with broken exp
 .. code-block:: bash
 
  $ orion --debug hunt -n orion-tutorial python main.py --lr~'loguniform(1e-5, 1.0)'
-
-Hunting Options
----------------
-
-.. code-block:: console
-
- $ orion hunt --help
-
- Oríon arguments (optional):
- These arguments determine orion's behaviour
-
- -n stringID, --name stringID
- experiment's unique name; (default: None - specified
- either here or in a config)
- -u USER, --user USER user associated to experiment's unique name; (default:
- $USER - can be overriden either here or in a config)
- -c path-to-config, --config path-to-config
- user provided orion configuration file
- --max-trials # number of trials to be completed for the experiment.
- This value will be saved within the experiment
- configuration and reused across all workers to
- determine experiment's completion. (default: inf/until
- preempted)
- --worker-trials # number of trials to be completed for this worker. If
- the experiment is completed, the worker will die even
- if it did not reach its maximum number of trials
- (default: inf/until preempted)
- --working-dir WORKING_DIR
- Set working directory for running experiment.
- --pool-size # number of simultaneous trials the algorithm should
- suggest. This is useful if many workers are executed
- in parallel and the algorithm has a strategy to sample
- non-independant trials simultaneously. Otherwise, it
- is better to leave `pool_size` to 1 and set a Strategy
- for Oríon's producer. Note that this option is not usefull useless you
- know the algorithm have a strategy to produce multiple trials
- simultaneously. If you have any doubt, leave it to 1.
- (default: 1)
-
-``name``
-
-The unique name of the experiment.
-
-``user``
-
-Username used to identify the experiments of a user. The default value is the system's username
-$USER.
-
-``config``
-
-Configuration file for Oríon which may define the database, the algorithm and all options of the
-command hunt, including ``name``, ``pool-size`` and ``max-trials``.
-
-``max-trials``
-
-The maximum number of trials tried during an experiment.
-
-``worker-trials``
-
-The maximum number of trials to be executed by a worker (a single call to ``orion hunt [...]``).
-
-``working-dir``
-
-The directory where configuration files are created. If not specified, Oríon will create a
-temporary directory that will be removed at end of execution of the trial.
-
-``pool-size``
-
-The number of trials which are generated by the algorithm each time it is interrogated. This is
-useful if many workers are executed in parallel and the algorithm has a strategy to sample
-non-independant trials simultaneously. Otherwise, it is better to leave ``pool_size`` to its default
-value 1. Note that this option is not usefull useless you know the algorithm have a strategy
-to produce multiple trials simultaneously. If you have any doubt, leave it to 1. :)
diff --git a/docs/src/user/config.rst b/docs/src/user/config.rst
@@ -97,14 +97,14 @@ Full Example of Global Configuration
  seed: None
  max_broken: 3
  max_trials: 1000000000
- pool_size: 1
  strategy:
  MaxParallelStrategy
  worker_trials: 1000000000
  working_dir:
 
  worker:
  n_workers: 1
+ pool_size: 0
  executor: joblib
  executor_configuration: {}
  heartbeat: 120
@@ -211,7 +211,6 @@ Experiment
  seed: None
  max_broken: 3
  max_trials: 1000000000
- pool_size: 1
  strategy:
  MaxParallelStrategy
  worker_trials: 1000000000
@@ -322,22 +321,6 @@ working_dir
 
 
 
-.. _config_experiment_pool_size:
-
-pool_size
-~~~~~~~~~
-
-.. warning::
-
- **DEPRECATED.** This argument will be removed in v0.3.
-
-:Type: int
-:Default: 1
-:Env var:
-:Description:
- (DEPRECATED) This argument will be removed in v0.3.
-
-
 .. _config_experiment_algorithms:
 
 algorithms
@@ -376,6 +359,7 @@ Worker
 
  worker:
  n_workers: 1
+ pool_size: 0
  executor: joblib
  executor_configuration: {}
  heartbeat: 120
@@ -400,6 +384,20 @@ n_workers
  It is possible to run many `orion hunt` in parallel, and each will spawn
  ``n_workers``.
 
+.. _config_worker_pool_size:
+
+pool_size
+~~~~~~~~~
+
+:Type: int
+:Default: 0
+:Env var:
+:Description:
+ Number of trials to sample at a time. If 0, default to number of workers.
+ Increase it to improve the sampling speed if workers spend too much time
+ waiting for algorithms to sample points. An algorithm will try sampling `pool_size`
+ trials but may return less.
+
 
 .. _config_worker_executor:
 
@@ -520,6 +518,7 @@ Experiment Version Control
 .. code-block:: yaml
 
  evc:
+ enable: False
  algorithm_change: False
  auto_resolution: True
  cli_change_type: break
@@ -531,6 +530,25 @@ Experiment Version Control
  non_monitored_arguments: []
 
 
+.. _config_evc_enable:
+
+enable
+~~~~~~~~~~~~~~~
+
+.. note::
+
+ New in version v0.1.16. Previously the EVC was always enabled. It is now disable by default
+ and can be enabled using this option.
+
+:Type: bool
+:Default: False
+:Env var: ORION_EVC_ENABLE
+:Description:
+ Enable the Experiment Version Control. Defaults to False. When disabled, running
+ an experiment different from an earlier one but sharing the same name will have the
+ effect of overwriting the previous one in the database. Trials of the previous experiment
+ will still point to the experiment but may be incoherent with the new search space.
+
 
 .. _config_evc_auto_resolution:
 

diff --git a/docs/src/user/evc.rst b/docs/src/user/evc.rst
@@ -10,7 +10,8 @@ could pre-train on all prior data resulting in a much more efficient optimizatio
 advantage of the EVC system is that it provides a systematic way to organize research and the
 possibility to go back in time and compare the evolution of performance throughout your research.
 
-Experiments inside the EVC are organized by version. By default, every time an experiment has
+Experiments inside the EVC are organized by version. When enabled (See :ref:`config_evc_enable`),
+every time an experiment has
 changed but has not been explicitly renamed, its version number will automatically increment and
 this new version will appear as a new branch for that experiment.
 

diff --git a/src/orion/algo/space.py b/src/orion/algo/space.py
@@ -787,6 +787,8 @@ def get_prior_string(self):
 
  args = [prior]
 
+ if self._shape is not None:
+ args += ["shape={}".format(self._shape)]
  if self.default_value is not self.NO_DEFAULT_VALUE:
  args += ["default_value={}".format(repr(self.default_value))]
 

diff --git a/src/orion/algo/tpe.py b/src/orion/algo/tpe.py
@@ -412,7 +412,7 @@ def sample_one_dimension(
 
  def _sample_real_dimension(self, dimension, shape_size, below_points, above_points):
  """Sample values for real dimension"""
- if dimension.prior_name in ["uniform", "reciprocal"]:
+ if any(map(dimension.prior_name.endswith, ["uniform", "reciprocal"])):
  return self.sample_one_dimension(
  dimension,
  shape_size,
@@ -421,7 +421,9 @@ def _sample_real_dimension(self, dimension, shape_size, below_points, above_poin
  self._sample_real_point,
  )
  else:
- raise NotImplementedError()
+ raise NotImplementedError(
+ f"Prior {dimension.prior_name} is not supported for real values"
+ )
 
  def _sample_loguniform_real_point(self, dimension, below_points, above_points):
  """Sample one value for real dimension in a loguniform way"""
@@ -555,10 +557,31 @@ class GMMSampler:
  weights: list
  Weights for each Gaussian components in the GMM
  Default: ``None``
+ base_attempts: int, optional
+ Base number of attempts to sample points within `low` and `high` bounds.
+ Defaults to 10.
+ attempts_factor: int, optional
+ If sampling always falls out of bound try again with `attempts` * `attempts_factor`.
+ Defaults to 10.
+ max_attempts: int, optional
+ If sampling always falls out of bound try again with `attempts` * `attempts_factor`
+ up to `max_attempts` (inclusive).
+ Defaults to 10000.
 
  """
 
- def __init__(self, tpe, mus, sigmas, low, high, weights=None):
+ def __init__(
+ self,
+ tpe,
+ mus,
+ sigmas,
+ low,
+ high,
+ weights=None,
+ base_attempts=10,
+ attempts_factor=10,
+ max_attempts=10000,
+ ):
  self.tpe = tpe
 
  self.mus = mus
@@ -567,6 +590,10 @@ def __init__(self, tpe, mus, sigmas, low, high, weights=None):
  self.high = high
  self.weights = weights if weights is not None else len(mus) * [1.0 / len(mus)]
 
+ self.base_attempts = base_attempts
+ self.attempts_factor = attempts_factor
+ self.max_attempts = max_attempts
+
  self.pdfs = []
  self._build_mixture()
 
@@ -575,24 +602,38 @@ def _build_mixture(self):
  for mu, sigma in zip(self.mus, self.sigmas):
  self.pdfs.append(norm(mu, sigma))
 
- def sample(self, num=1, attempts=10):
+ def sample(self, num=1, attempts=None):
  """Sample required number of points"""
+ if attempts is None:
+ attempts = self.base_attempts
+
  point = []
  for _ in range(num):
  pdf = numpy.argmax(self.tpe.rng.multinomial(1, self.weights))
- new_points = list(
- self.pdfs[pdf].rvs(size=attempts, random_state=self.tpe.rng)
- )
- while True:
- if not new_points:
- raise RuntimeError(
- f"Failed to sample in interval ({self.low}, {self.high})"
- )
- pt = new_points.pop(0)
- if self.low <= pt <= self.high:
- point.append(pt)
+ attempts_tried = 0
+ while attempts_tried < attempts:
+ new_points = self.pdfs[pdf].rvs(
+ size=attempts, random_state=self.tpe.rng
+ )
+ valid_points = (self.low <= new_points) * (self.high >= new_points)
+
+ if any(valid_points):
+ index = numpy.argmax(valid_points)
+ point.append(float(new_points[index]))
  break
 
+ index = None
+ attempts_tried += 1
+
+ if index is None and attempts >= self.max_attempts:
+ raise RuntimeError(
+ f"Failed to sample in interval ({self.low}, {self.high})"
+ )
+ elif index is None:
+ point.append(
+ self.sample(num=1, attempts=attempts * self.attempts_factor)[0]
+ )
+
  return point
 
  def get_loglikelis(self, points):