defaults.yaml 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. # An unique identifier for the head node and workers of this cluster.
  2. cluster_name: ray-cluster
  3. # The maximum number of workers nodes to launch in addition to the head
  4. # node.
  5. max_workers: 5
  6. # The autoscaler will scale up the cluster faster with higher upscaling speed.
  7. # E.g., if the task requires adding more nodes then autoscaler will gradually
  8. # scale up the cluster in chunks of upscaling_speed*currently_running_nodes.
  9. # This number should be > 0.
  10. upscaling_speed: 1.0
  11. # This executes all commands on all nodes in the docker container,
  12. # and opens all the necessary ports to support the Ray cluster.
  13. # Empty string means disabled.
  14. docker:
  15. image: "rayproject/ray-ml:latest"
  16. # image: rayproject/ray:latest # use this one if you don't need ML dependencies, it's faster to pull
  17. container_name: "ray_container"
  18. # If true, pulls latest version of image. Otherwise, `docker run` will only pull the image
  19. # if no cached version is present.
  20. pull_before_run: True
  21. run_options: # Extra options to pass into "docker run"
  22. - --ulimit nofile=65536:65536
  23. # If a node is idle for this many minutes, it will be removed.
  24. idle_timeout_minutes: 5
  25. # Cloud-provider specific configuration.
  26. provider:
  27. type: vsphere
  28. # How Ray will authenticate with newly launched nodes.
  29. auth:
  30. ssh_user: ray
  31. # By default Ray creates a new private keypair, but you can also use your own.
  32. # If you do so, make sure to also set "KeyName" in the head and worker node
  33. # configurations below.
  34. ssh_private_key: ~/ray-bootstrap-key.pem
  35. # Tell the autoscaler the allowed node types and the resources they provide.
  36. # The key is the name of the node type, which is just for debugging purposes.
  37. # The node config specifies the launch config and physical instance type.
  38. available_node_types:
  39. ray.head.default:
  40. # You can override the resources here. Adding GPU to the head node is not recommended.
  41. # resources: { "CPU": 2, "Memory": 4096}
  42. resources: {}
  43. worker:
  44. # The minimum number of nodes of this type to launch.
  45. # This number should be >= 0.
  46. min_workers: 1
  47. max_workers: 3
  48. # You can override the resources here. For GPU, currently only NVIDIA GPU is supported. If no ESXi host can
  49. # fulfill the requirement, the Ray node creation will fail. The number of created nodes may not meet the desired
  50. # minimum number. The vSphere node provider will not distinguish the GPU type. It will just count the quantity:
  51. # mount the first k random available NVIDIA GPU to the VM, if the user set {"GPU": k}.
  52. # resources: {"CPU": 2, "Memory": 4096, "GPU": 1}
  53. resources: {}
  54. worker_2:
  55. # The minimum number of nodes of this type to launch.
  56. # This number should be >= 0.
  57. min_workers: 1
  58. max_workers: 2
  59. # You can override the resources here. For GPU, currently only NVIDIA GPU is supported. If no ESXi host can
  60. # fulfill the requirement, the Ray node creation will fail. The number of created nodes may not meet the desired
  61. # minimum number. The vSphere node provider will not distinguish the GPU type. It will just count the quantity:
  62. # mount the first k random available NVIDIA GPU to the VM, if the user set {"GPU": k}.
  63. # resources: {"CPU": 2, "Memory": 4096, "GPU": 1}
  64. resources: {}
  65. # Specify the node type of the head node (as configured above).
  66. head_node_type: ray.head.default
  67. # Files or directories to copy to the head and worker nodes. The format is a
  68. # dictionary from REMOTE_PATH: LOCAL_PATH, e.g.
  69. file_mounts: {
  70. # "/path1/on/remote/machine": "/path1/on/local/machine",
  71. # "/path2/on/remote/machine": "/path2/on/local/machine",
  72. }
  73. # Files or directories to copy from the head node to the worker nodes. The format is a
  74. # list of paths. The same path on the head node will be copied to the worker node.
  75. # This behavior is a subset of the file_mounts behavior. In the vast majority of cases
  76. # you should just use file_mounts. Only use this if you know what you're doing!
  77. cluster_synced_files: []
  78. # Whether changes to directories in file_mounts or cluster_synced_files in the head node
  79. # should sync to the worker node continuously
  80. file_mounts_sync_continuously: False
  81. # Patterns for files to exclude when running rsync up or rsync down
  82. rsync_exclude: []
  83. # Pattern files to use for filtering out files when running rsync up or rsync down. The file is searched for
  84. # in the source directory and recursively through all subdirectories. For example, if .gitignore is provided
  85. # as a value, the behavior will match git's behavior for finding and using .gitignore files.
  86. rsync_filter: []
  87. # List of commands that will be run before `setup_commands`. If docker is
  88. # enabled, these commands will run outside the container and before docker
  89. # is setup.
  90. initialization_commands: []
  91. # List of shell commands to run to set up nodes.
  92. setup_commands: []
  93. # Custom commands that will be run on the head node after common setup.
  94. head_setup_commands: []
  95. # Custom commands that will be run on worker nodes after common setup.
  96. worker_setup_commands: []
  97. # Command to start ray on the head node. You don't need to change this.
  98. head_start_ray_commands:
  99. - ray stop
  100. - ulimit -n 65536; ray start --head --port=6379 --autoscaling-config=~/ray_bootstrap_config.yaml --dashboard-host=0.0.0.0
  101. # Command to start ray on worker nodes. You don't need to change this.
  102. worker_start_ray_commands:
  103. - ray stop
  104. - ulimit -n 65536; ray start --address=$RAY_HEAD_IP:6379