| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596 |
- # Copyright (c) Facebook, Inc. and its affiliates.
- # All rights reserved.
- #
- # This source code is licensed under the BSD-style license found in the
- # LICENSE file in the root directory of this source tree.
- import logging
- from importlib.metadata import entry_points
- from .api import (
- rendezvous_handler_registry as handler_registry,
- RendezvousHandler,
- RendezvousParameters,
- )
- from .dynamic_rendezvous import create_handler
- log = logging.getLogger(__name__)
- __all__ = ["get_rendezvous_handler"]
- def _create_static_handler(params: RendezvousParameters) -> RendezvousHandler:
- from . import static_tcp_rendezvous
- return static_tcp_rendezvous.create_rdzv_handler(params)
- def _create_etcd_handler(params: RendezvousParameters) -> RendezvousHandler:
- from . import etcd_rendezvous
- return etcd_rendezvous.create_rdzv_handler(params)
- def _create_etcd_v2_handler(params: RendezvousParameters) -> RendezvousHandler:
- from .etcd_rendezvous_backend import create_backend
- backend, store = create_backend(params)
- return create_handler(store, backend, params)
- def _create_c10d_handler(params: RendezvousParameters) -> RendezvousHandler:
- from .c10d_rendezvous_backend import create_backend
- backend, store = create_backend(params)
- return create_handler(store, backend, params)
- def _register_default_handlers() -> None:
- handler_registry.register("etcd", _create_etcd_handler)
- handler_registry.register("etcd-v2", _create_etcd_v2_handler)
- handler_registry.register("c10d", _create_c10d_handler)
- handler_registry.register("static", _create_static_handler)
- def _register_out_of_tree_handlers() -> None:
- discovered_handler_generators = entry_points(group="torchrun.handlers")
- for handler_generator in discovered_handler_generators:
- try:
- # pyrefly: ignore [bad-index]
- get_handler = discovered_handler_generators[handler_generator.name].load()
- handler_registry.register(handler_generator.name, get_handler())
- except Exception:
- log.warning(
- "Exception while registering out of tree plugin %s: ",
- handler_generator.name,
- exc_info=True,
- )
- def get_rendezvous_handler(params: RendezvousParameters) -> RendezvousHandler:
- """
- Obtain a reference to a :py:class`RendezvousHandler`.
- Custom rendezvous handlers can be registered by
- ::
- from torch.distributed.elastic.rendezvous import rendezvous_handler_registry
- from torch.distributed.elastic.rendezvous.registry import get_rendezvous_handler
- def create_my_rdzv(params: RendezvousParameters):
- return MyCustomRdzv(params)
- rendezvous_handler_registry.register("my_rdzv_backend_name", create_my_rdzv)
- my_rdzv_handler = get_rendezvous_handler(
- "my_rdzv_backend_name", RendezvousParameters
- )
- """
- return handler_registry.create_handler(params)
|