| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383 |
- """BOHB (Bayesian Optimization with HyperBand)"""
- import copy
- import logging
- import math
- from typing import Dict, List, Optional, Union
- # use cloudpickle instead of pickle to make BOHB obj
- # pickleable
- from ray import cloudpickle
- from ray.tune.result import DEFAULT_METRIC
- from ray.tune.search import (
- UNDEFINED_METRIC_MODE,
- UNDEFINED_SEARCH_SPACE,
- UNRESOLVED_SEARCH_SPACE,
- Searcher,
- )
- from ray.tune.search.sample import (
- Categorical,
- Domain,
- Float,
- Integer,
- LogUniform,
- Normal,
- Quantized,
- Uniform,
- )
- from ray.tune.search.variant_generator import parse_spec_vars
- from ray.tune.utils.util import flatten_dict, unflatten_list_dict
- try:
- import ConfigSpace
- from hpbandster.optimizers.config_generators.bohb import BOHB
- except ImportError:
- BOHB = ConfigSpace = None
- logger = logging.getLogger(__name__)
- class _BOHBJobWrapper:
- """Mock object for HpBandSter to process."""
- def __init__(self, loss: float, budget: float, config: Dict):
- self.result = {"loss": loss}
- self.kwargs = {"budget": budget, "config": config.copy()}
- self.exception = None
- class TuneBOHB(Searcher):
- """BOHB suggestion component.
- Requires HpBandSter and ConfigSpace to be installed. You can install
- HpBandSter and ConfigSpace with: ``pip install hpbandster ConfigSpace``.
- This should be used in conjunction with HyperBandForBOHB.
- Args:
- space: Continuous ConfigSpace search space.
- Parameters will be sampled from this space which will be used
- to run trials.
- bohb_config: configuration for HpBandSter BOHB algorithm
- metric: The training result objective value attribute. If None
- but a mode was passed, the anonymous metric `_metric` will be used
- per default.
- mode: One of {min, max}. Determines whether objective is
- minimizing or maximizing the metric attribute.
- points_to_evaluate: Initial parameter suggestions to be run
- first. This is for when you already have some good parameters
- you want to run first to help the algorithm make better suggestions
- for future parameters. Needs to be a list of dicts containing the
- configurations.
- seed: Optional random seed to initialize the random number
- generator. Setting this should lead to identical initial
- configurations at each run.
- max_concurrent: Number of maximum concurrent trials.
- If this Searcher is used in a ``ConcurrencyLimiter``, the
- ``max_concurrent`` value passed to it will override the
- value passed here. Set to <= 0 for no limit on concurrency.
- Tune automatically converts search spaces to TuneBOHB's format:
- .. code-block:: python
- config = {
- "width": tune.uniform(0, 20),
- "height": tune.uniform(-100, 100),
- "activation": tune.choice(["relu", "tanh"])
- }
- algo = TuneBOHB(metric="mean_loss", mode="min")
- bohb = HyperBandForBOHB(
- time_attr="training_iteration",
- metric="mean_loss",
- mode="min",
- max_t=100)
- run(my_trainable, config=config, scheduler=bohb, search_alg=algo)
- If you would like to pass the search space manually, the code would
- look like this:
- .. code-block:: python
- import ConfigSpace as CS
- config_space = CS.ConfigurationSpace()
- config_space.add_hyperparameter(
- CS.UniformFloatHyperparameter("width", lower=0, upper=20))
- config_space.add_hyperparameter(
- CS.UniformFloatHyperparameter("height", lower=-100, upper=100))
- config_space.add_hyperparameter(
- CS.CategoricalHyperparameter(
- name="activation", choices=["relu", "tanh"]))
- algo = TuneBOHB(
- config_space, metric="mean_loss", mode="min")
- bohb = HyperBandForBOHB(
- time_attr="training_iteration",
- metric="mean_loss",
- mode="min",
- max_t=100)
- run(my_trainable, scheduler=bohb, search_alg=algo)
- """
- def __init__(
- self,
- space: Optional[Union[Dict, "ConfigSpace.ConfigurationSpace"]] = None,
- bohb_config: Optional[Dict] = None,
- metric: Optional[str] = None,
- mode: Optional[str] = None,
- points_to_evaluate: Optional[List[Dict]] = None,
- seed: Optional[int] = None,
- max_concurrent: int = 0,
- ):
- assert (
- BOHB is not None
- ), """HpBandSter must be installed!
- You can install HpBandSter with the command:
- `pip install hpbandster ConfigSpace`."""
- if mode:
- assert mode in ["min", "max"], "`mode` must be 'min' or 'max'."
- self.trial_to_params = {}
- self._metric = metric
- self._bohb_config = bohb_config
- if isinstance(space, dict) and space:
- resolved_vars, domain_vars, grid_vars = parse_spec_vars(space)
- if domain_vars or grid_vars:
- logger.warning(
- UNRESOLVED_SEARCH_SPACE.format(par="space", cls=type(self))
- )
- space = self.convert_search_space(space)
- self._space = space
- self._seed = seed
- self.running = set()
- self.paused = set()
- self._max_concurrent = max_concurrent
- self._points_to_evaluate = points_to_evaluate
- super(TuneBOHB, self).__init__(
- metric=self._metric,
- mode=mode,
- )
- if self._space:
- self._setup_bohb()
- def set_max_concurrency(self, max_concurrent: int) -> bool:
- self._max_concurrent = max_concurrent
- return True
- def _setup_bohb(self):
- from hpbandster.optimizers.config_generators.bohb import BOHB
- if self._metric is None and self._mode:
- # If only a mode was passed, use anonymous metric
- self._metric = DEFAULT_METRIC
- if self._mode == "max":
- self._metric_op = -1.0
- elif self._mode == "min":
- self._metric_op = 1.0
- if self._seed is not None:
- self._space.seed(self._seed)
- self.running = set()
- self.paused = set()
- bohb_config = self._bohb_config or {}
- self.bohber = BOHB(self._space, **bohb_config)
- def set_search_properties(
- self, metric: Optional[str], mode: Optional[str], config: Dict, **spec
- ) -> bool:
- if self._space:
- return False
- space = self.convert_search_space(config)
- self._space = space
- if metric:
- self._metric = metric
- if mode:
- self._mode = mode
- self._setup_bohb()
- return True
- def suggest(self, trial_id: str) -> Optional[Dict]:
- if not self._space:
- raise RuntimeError(
- UNDEFINED_SEARCH_SPACE.format(
- cls=self.__class__.__name__, space="space"
- )
- )
- if not self._metric or not self._mode:
- raise RuntimeError(
- UNDEFINED_METRIC_MODE.format(
- cls=self.__class__.__name__, metric=self._metric, mode=self._mode
- )
- )
- max_concurrent = (
- self._max_concurrent if self._max_concurrent > 0 else float("inf")
- )
- if len(self.running) >= max_concurrent:
- return None
- if self._points_to_evaluate:
- config = self._points_to_evaluate.pop(0)
- else:
- # This parameter is not used in hpbandster implementation.
- config, _ = self.bohber.get_config(None)
- self.trial_to_params[trial_id] = copy.deepcopy(config)
- self.running.add(trial_id)
- return unflatten_list_dict(config)
- def on_trial_result(self, trial_id: str, result: Dict):
- if trial_id not in self.paused:
- self.running.add(trial_id)
- if "hyperband_info" not in result:
- logger.warning(
- "BOHB Info not detected in result. Are you using "
- "HyperBandForBOHB as a scheduler?"
- )
- elif "budget" in result.get("hyperband_info", {}):
- hbs_wrapper = self.to_wrapper(trial_id, result)
- self.bohber.new_result(hbs_wrapper)
- def on_trial_complete(
- self, trial_id: str, result: Optional[Dict] = None, error: bool = False
- ):
- del self.trial_to_params[trial_id]
- self.paused.discard(trial_id)
- self.running.discard(trial_id)
- def to_wrapper(self, trial_id: str, result: Dict) -> _BOHBJobWrapper:
- return _BOHBJobWrapper(
- self._metric_op * result[self.metric],
- result["hyperband_info"]["budget"],
- self.trial_to_params[trial_id],
- )
- # BOHB Specific.
- # TODO(team-ml): Refactor alongside HyperBandForBOHB
- def on_pause(self, trial_id: str):
- self.paused.add(trial_id)
- self.running.discard(trial_id)
- def on_unpause(self, trial_id: str):
- self.paused.discard(trial_id)
- self.running.add(trial_id)
- @staticmethod
- def convert_search_space(spec: Dict) -> "ConfigSpace.ConfigurationSpace":
- resolved_vars, domain_vars, grid_vars = parse_spec_vars(spec)
- if grid_vars:
- raise ValueError(
- "Grid search parameters cannot be automatically converted "
- "to a TuneBOHB search space."
- )
- # Flatten and resolve again after checking for grid search.
- spec = flatten_dict(spec, prevent_delimiter=True)
- resolved_vars, domain_vars, grid_vars = parse_spec_vars(spec)
- def resolve_value(
- par: str, domain: Domain
- ) -> ConfigSpace.hyperparameters.Hyperparameter:
- quantize = None
- sampler = domain.get_sampler()
- if isinstance(sampler, Quantized):
- quantize = sampler.q
- sampler = sampler.sampler
- if isinstance(domain, Float):
- if isinstance(sampler, LogUniform):
- lower = domain.lower
- upper = domain.upper
- if quantize:
- lower = math.ceil(domain.lower / quantize) * quantize
- upper = math.floor(domain.upper / quantize) * quantize
- return ConfigSpace.UniformFloatHyperparameter(
- par, lower=lower, upper=upper, q=quantize, log=True
- )
- elif isinstance(sampler, Uniform):
- lower = domain.lower
- upper = domain.upper
- if quantize:
- lower = math.ceil(domain.lower / quantize) * quantize
- upper = math.floor(domain.upper / quantize) * quantize
- return ConfigSpace.UniformFloatHyperparameter(
- par, lower=lower, upper=upper, q=quantize, log=False
- )
- elif isinstance(sampler, Normal):
- return ConfigSpace.hyperparameters.NormalFloatHyperparameter(
- par, mu=sampler.mean, sigma=sampler.sd, q=quantize, log=False
- )
- elif isinstance(domain, Integer):
- if isinstance(sampler, LogUniform):
- lower = domain.lower
- upper = domain.upper
- if quantize:
- lower = math.ceil(domain.lower / quantize) * quantize
- upper = math.floor(domain.upper / quantize) * quantize
- else:
- # Tune search space integers are exclusive
- upper -= 1
- return ConfigSpace.UniformIntegerHyperparameter(
- par, lower=lower, upper=upper, q=quantize, log=True
- )
- elif isinstance(sampler, Uniform):
- lower = domain.lower
- upper = domain.upper
- if quantize:
- lower = math.ceil(domain.lower / quantize) * quantize
- upper = math.floor(domain.upper / quantize) * quantize
- else:
- # Tune search space integers are exclusive
- upper -= 1
- return ConfigSpace.UniformIntegerHyperparameter(
- par, lower=lower, upper=upper, q=quantize, log=False
- )
- elif isinstance(domain, Categorical):
- if isinstance(sampler, Uniform):
- return ConfigSpace.CategoricalHyperparameter(
- par, choices=domain.categories
- )
- raise ValueError(
- "TuneBOHB does not support parameters of type "
- "`{}` with samplers of type `{}`".format(
- type(domain).__name__, type(domain.sampler).__name__
- )
- )
- cs = ConfigSpace.ConfigurationSpace()
- for path, domain in domain_vars:
- par = "/".join(str(p) for p in path)
- value = resolve_value(par, domain)
- cs.add_hyperparameter(value)
- return cs
- def save(self, checkpoint_path: str):
- save_object = self.__dict__
- with open(checkpoint_path, "wb") as outputFile:
- cloudpickle.dump(save_object, outputFile)
- def restore(self, checkpoint_path: str):
- with open(checkpoint_path, "rb") as inputFile:
- save_object = cloudpickle.load(inputFile)
- self.__dict__.update(save_object)
|