import json import os import time import numpy as np from ray.tune import Trainable MOCK_TRAINABLE_NAME = "mock_trainable" MOCK_ERROR_KEY = "mock_error" class MyTrainableClass(Trainable): """Example agent whose learning curve is a random sigmoid. The dummy hyperparameters "width" and "height" determine the slope and maximum reward value reached. """ def setup(self, config): self._sleep_time = config.get("sleep", 0) self._mock_error = config.get(MOCK_ERROR_KEY, False) self._persistent_error = config.get("persistent_error", False) self.timestep = 0 self.restored = False def step(self): if ( self._mock_error and self.timestep > 0 # allow at least 1 successful checkpoint. and (self._persistent_error or not self.restored) ): raise RuntimeError(f"Failing on purpose! {self.timestep=}") if self._sleep_time > 0: time.sleep(self._sleep_time) self.timestep += 1 v = np.tanh(float(self.timestep) / self.config.get("width", 1)) v *= self.config.get("height", 1) # Here we use `episode_reward_mean`, but you can also report other # objectives such as loss or accuracy. return {"episode_reward_mean": v} def save_checkpoint(self, checkpoint_dir): path = os.path.join(checkpoint_dir, "checkpoint") with open(path, "w") as f: f.write(json.dumps({"timestep": self.timestep})) def load_checkpoint(self, checkpoint_dir): path = os.path.join(checkpoint_dir, "checkpoint") with open(path, "r") as f: self.timestep = json.loads(f.read())["timestep"] self.restored = True def register_mock_trainable(): from ray.tune import register_trainable register_trainable(MOCK_TRAINABLE_NAME, MyTrainableClass)