| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263 |
- import json
- import os
- import time
- import numpy as np
- from ray.tune import Trainable
- MOCK_TRAINABLE_NAME = "mock_trainable"
- MOCK_ERROR_KEY = "mock_error"
- class MyTrainableClass(Trainable):
- """Example agent whose learning curve is a random sigmoid.
- The dummy hyperparameters "width" and "height" determine the slope and
- maximum reward value reached.
- """
- def setup(self, config):
- self._sleep_time = config.get("sleep", 0)
- self._mock_error = config.get(MOCK_ERROR_KEY, False)
- self._persistent_error = config.get("persistent_error", False)
- self.timestep = 0
- self.restored = False
- def step(self):
- if (
- self._mock_error
- and self.timestep > 0 # allow at least 1 successful checkpoint.
- and (self._persistent_error or not self.restored)
- ):
- raise RuntimeError(f"Failing on purpose! {self.timestep=}")
- if self._sleep_time > 0:
- time.sleep(self._sleep_time)
- self.timestep += 1
- v = np.tanh(float(self.timestep) / self.config.get("width", 1))
- v *= self.config.get("height", 1)
- # Here we use `episode_reward_mean`, but you can also report other
- # objectives such as loss or accuracy.
- return {"episode_reward_mean": v}
- def save_checkpoint(self, checkpoint_dir):
- path = os.path.join(checkpoint_dir, "checkpoint")
- with open(path, "w") as f:
- f.write(json.dumps({"timestep": self.timestep}))
- def load_checkpoint(self, checkpoint_dir):
- path = os.path.join(checkpoint_dir, "checkpoint")
- with open(path, "r") as f:
- self.timestep = json.loads(f.read())["timestep"]
- self.restored = True
- def register_mock_trainable():
- from ray.tune import register_trainable
- register_trainable(MOCK_TRAINABLE_NAME, MyTrainableClass)
|