# If want to use checkpointing with a custom training function (not a Ray # integration like PyTorch or Tensorflow), your function can read/write # checkpoint through the ``ray.tune.report(metrics, checkpoint=...)`` API. import argparse import json import os import tempfile import time from ray import tune from ray.tune import Checkpoint def evaluation_fn(step, width, height): time.sleep(0.1) return (0.1 + width * step / 100) ** (-1) + height * 0.1 def train_func(config): step = 0 width, height = config["width"], config["height"] checkpoint = tune.get_checkpoint() if checkpoint: with checkpoint.as_directory() as checkpoint_dir: with open(os.path.join(checkpoint_dir, "checkpoint.json")) as f: state = json.load(f) step = state["step"] + 1 for current_step in range(step, 100): intermediate_score = evaluation_fn(current_step, width, height) with tempfile.TemporaryDirectory() as temp_checkpoint_dir: with open(os.path.join(temp_checkpoint_dir, "checkpoint.json"), "w") as f: json.dump({"step": current_step}, f) tune.report( {"iterations": current_step, "mean_loss": intermediate_score}, checkpoint=Checkpoint.from_directory(temp_checkpoint_dir), ) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--smoke-test", action="store_true", help="Finish quickly for testing" ) args, _ = parser.parse_known_args() tuner = tune.Tuner( train_func, run_config=tune.RunConfig( name="hyperband_test", stop={"training_iteration": 1 if args.smoke_test else 10}, ), tune_config=tune.TuneConfig( metric="mean_loss", mode="min", num_samples=5, ), param_space={ "steps": 10, "width": tune.randint(10, 100), "height": tune.loguniform(10, 100), }, ) results = tuner.fit() best_result = results.get_best_result() print("Best hyperparameters: ", best_result.config) best_checkpoint = best_result.checkpoint print("Best checkpoint: ", best_checkpoint)