constants.py 3.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. # Key to denote the preprocessor in the checkpoint dict.
  2. PREPROCESSOR_KEY = "_preprocessor"
  3. # Key to denote the model in the checkpoint dict.
  4. MODEL_KEY = "model"
  5. # Key to denote which dataset is the evaluation dataset.
  6. # Only used in trainers which do not support multiple
  7. # evaluation datasets.
  8. EVALUATION_DATASET_KEY = "evaluation"
  9. # Key to denote which dataset is the training dataset.
  10. # This is the dataset that the preprocessor is fit on.
  11. TRAIN_DATASET_KEY = "train"
  12. # Name to use for the column when representing tensors in table format.
  13. TENSOR_COLUMN_NAME = "__value__"
  14. # The maximum length of strings returned by `__repr__` for AIR objects constructed with
  15. # default values.
  16. MAX_REPR_LENGTH = int(80 * 1.5)
  17. # Timeout used when putting exceptions raised by runner thread into the queue.
  18. _ERROR_REPORT_TIMEOUT = 10
  19. # Timeout when fetching new results after signaling the training function to continue.
  20. _RESULT_FETCH_TIMEOUT = 0.2
  21. # Timeout for fetching exceptions raised by the training function.
  22. _ERROR_FETCH_TIMEOUT = 1
  23. # The key used to identify whether we have already warned about ray.air.session
  24. # functions being used outside of the session
  25. SESSION_MISUSE_LOG_ONCE_KEY = "air_warn_session_misuse"
  26. # Name of attribute in Checkpoint storing current Tune ID for restoring
  27. # training with Ray Train
  28. CHECKPOINT_ID_ATTR = "_current_checkpoint_id"
  29. # Name of the marker dropped by the Trainable. If a worker detects
  30. # the presence of the marker in the trial dir, it will use lazy
  31. # checkpointing.
  32. LAZY_CHECKPOINT_MARKER_FILE = ".lazy_checkpoint_marker"
  33. # The timestamp of when the result is generated.
  34. # Default to when the result is processed by tune.
  35. TIMESTAMP = "timestamp"
  36. # (Auto-filled) Time in seconds this iteration took to run.
  37. # This may be overridden to override the system-computed time difference.
  38. TIME_THIS_ITER_S = "time_this_iter_s"
  39. # (Auto-filled) The index of this training iteration.
  40. TRAINING_ITERATION = "training_iteration"
  41. # File that stores parameters of the trial.
  42. EXPR_PARAM_FILE = "params.json"
  43. # Pickle File that stores parameters of the trial.
  44. EXPR_PARAM_PICKLE_FILE = "params.pkl"
  45. # File that stores the progress of the trial.
  46. EXPR_PROGRESS_FILE = "progress.csv"
  47. # File that stores results of the trial.
  48. EXPR_RESULT_FILE = "result.json"
  49. # File that stores the pickled error file
  50. EXPR_ERROR_PICKLE_FILE = "error.pkl"
  51. # File that stores the error file
  52. EXPR_ERROR_FILE = "error.txt"
  53. # File that stores the checkpoint metadata
  54. CHECKPOINT_TUNE_METADATA_FILE = ".tune_metadata"
  55. # ==================================================
  56. # Environment Variables
  57. # ==================================================
  58. # Integer value which if set will copy files in reported AIR directory
  59. # checkpoints instead of moving them (if worker is on the same node as Trainable)
  60. COPY_DIRECTORY_CHECKPOINTS_INSTEAD_OF_MOVING_ENV = (
  61. "TRAIN_COPY_DIRECTORY_CHECKPOINTS_INSTEAD_OF_MOVING"
  62. )
  63. # NOTE: When adding a new environment variable, please track it in this list.
  64. # TODO(ml-team): Most env var constants should get moved here.
  65. AIR_ENV_VARS = {
  66. COPY_DIRECTORY_CHECKPOINTS_INSTEAD_OF_MOVING_ENV,
  67. "RAY_AIR_FULL_TRACEBACKS",
  68. "RAY_AIR_NEW_OUTPUT",
  69. }