CUDAGuard.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311
  1. #if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION)
  2. #pragma once
  3. #include <c10/core/DeviceType.h>
  4. #include <c10/core/impl/InlineDeviceGuard.h>
  5. #include <c10/core/impl/InlineStreamGuard.h>
  6. #include <c10/cuda/CUDAMacros.h>
  7. #include <c10/cuda/impl/CUDAGuardImpl.h>
  8. namespace c10::cuda {
  9. // This code is kind of boilerplatey. See Note [Whither the DeviceGuard
  10. // boilerplate]
  11. /// A variant of DeviceGuard that is specialized for CUDA. It accepts
  12. /// integer indices (interpreting them as CUDA devices) and is a little
  13. /// more efficient than DeviceGuard (it compiles to straight line
  14. /// cudaSetDevice/cudaGetDevice calls); however, it can only be used
  15. /// from code that links against CUDA directly.
  16. struct CUDAGuard {
  17. /// No default constructor; see Note [Omitted default constructor from RAII]
  18. explicit CUDAGuard() = delete;
  19. /// Set the current CUDA device to the passed device index.
  20. explicit CUDAGuard(DeviceIndex device_index) : guard_(device_index) {}
  21. /// Sets the current CUDA device to the passed device. Errors if the passed
  22. /// device is not a CUDA device.
  23. explicit CUDAGuard(Device device) : guard_(device) {}
  24. // Copy is not allowed
  25. CUDAGuard(const CUDAGuard&) = delete;
  26. CUDAGuard& operator=(const CUDAGuard&) = delete;
  27. // Move is not allowed (there is no uninitialized state)
  28. CUDAGuard(CUDAGuard&& other) = delete;
  29. CUDAGuard& operator=(CUDAGuard&& other) = delete;
  30. ~CUDAGuard() = default;
  31. /// Sets the CUDA device to the given device. Errors if the given device
  32. /// is not a CUDA device.
  33. void set_device(Device device) {
  34. guard_.set_device(device);
  35. }
  36. /// Sets the CUDA device to the given device. Errors if the given device
  37. /// is not a CUDA device. (This method is provided for uniformity with
  38. /// DeviceGuard).
  39. void reset_device(Device device) {
  40. guard_.reset_device(device);
  41. }
  42. /// Sets the CUDA device to the given device index.
  43. void set_index(DeviceIndex device_index) {
  44. guard_.set_index(device_index);
  45. }
  46. /// Returns the device that was set upon construction of the guard
  47. Device original_device() const {
  48. return guard_.original_device();
  49. }
  50. /// Returns the last device that was set via `set_device`, if any, otherwise
  51. /// the device passed during construction.
  52. Device current_device() const {
  53. return guard_.current_device();
  54. }
  55. private:
  56. /// The guard for the current device.
  57. c10::impl::InlineDeviceGuard<impl::CUDAGuardImpl> guard_;
  58. };
  59. /// A variant of OptionalDeviceGuard that is specialized for CUDA. See
  60. /// CUDAGuard for when you can use this.
  61. struct OptionalCUDAGuard {
  62. /// Create an uninitialized OptionalCUDAGuard.
  63. explicit OptionalCUDAGuard() = default;
  64. /// Set the current CUDA device to the passed Device, if it is not nullopt.
  65. explicit OptionalCUDAGuard(std::optional<Device> device_opt)
  66. : guard_(device_opt) {}
  67. /// Set the current CUDA device to the passed device index, if it is not
  68. /// nullopt
  69. explicit OptionalCUDAGuard(std::optional<DeviceIndex> device_index_opt)
  70. : guard_(device_index_opt) {}
  71. // Copy is not allowed
  72. OptionalCUDAGuard(const OptionalCUDAGuard&) = delete;
  73. OptionalCUDAGuard& operator=(const OptionalCUDAGuard&) = delete;
  74. // See Note [Move construction for RAII guards is tricky]
  75. OptionalCUDAGuard(OptionalCUDAGuard&& other) = delete;
  76. // See Note [Move assignment for RAII guards is tricky]
  77. OptionalCUDAGuard& operator=(OptionalCUDAGuard&& other) = delete;
  78. ~OptionalCUDAGuard() = default;
  79. /// Sets the CUDA device to the given device, initializing the guard if it
  80. /// is not already initialized. Errors if the given device is not a CUDA
  81. /// device.
  82. void set_device(Device device) {
  83. guard_.set_device(device);
  84. }
  85. /// Sets the CUDA device to the given device, initializing the guard if it is
  86. /// not already initialized. Errors if the given device is not a CUDA device.
  87. /// (This method is provided for uniformity with OptionalDeviceGuard).
  88. void reset_device(Device device) {
  89. guard_.reset_device(device);
  90. }
  91. /// Sets the CUDA device to the given device index, initializing the guard if
  92. /// it is not already initialized.
  93. void set_index(DeviceIndex device_index) {
  94. guard_.set_index(device_index);
  95. }
  96. /// Returns the device that was set immediately prior to initialization of the
  97. /// guard, or nullopt if the guard is uninitialized.
  98. std::optional<Device> original_device() const {
  99. return guard_.original_device();
  100. }
  101. /// Returns the most recent device that was set using this device guard,
  102. /// either from construction, or via set_device, if the guard is initialized,
  103. /// or nullopt if the guard is uninitialized.
  104. std::optional<Device> current_device() const {
  105. return guard_.current_device();
  106. }
  107. /// Restore the original CUDA device, resetting this guard to uninitialized
  108. /// state.
  109. void reset() {
  110. guard_.reset();
  111. }
  112. private:
  113. c10::impl::InlineOptionalDeviceGuard<impl::CUDAGuardImpl> guard_;
  114. };
  115. /// A variant of StreamGuard that is specialized for CUDA. See CUDAGuard
  116. /// for when you can use this.
  117. struct CUDAStreamGuard {
  118. /// No default constructor, see Note [Omitted default constructor from RAII]
  119. explicit CUDAStreamGuard() = delete;
  120. /// Set the current CUDA device to the device associated with the passed
  121. /// stream, and set the current CUDA stream on that device to the passed
  122. /// stream. Errors if the Stream is not a CUDA stream.
  123. explicit CUDAStreamGuard(Stream stream) : guard_(stream) {}
  124. ~CUDAStreamGuard() = default;
  125. /// Copy is disallowed
  126. CUDAStreamGuard(const CUDAStreamGuard&) = delete;
  127. CUDAStreamGuard& operator=(const CUDAStreamGuard&) = delete;
  128. /// Move is disallowed, as CUDAStreamGuard does not have an uninitialized
  129. /// state, which is required for moves on types with nontrivial destructors.
  130. CUDAStreamGuard(CUDAStreamGuard&& other) = delete;
  131. CUDAStreamGuard& operator=(CUDAStreamGuard&& other) = delete;
  132. /// Resets the currently set stream to the original stream and
  133. /// the currently set device to the original device. Then,
  134. /// set the current device to the device associated with the passed stream,
  135. /// and set the current stream on that device to the passed stream.
  136. /// Errors if the stream passed is not a CUDA stream.
  137. ///
  138. /// NOTE: this implementation may skip some stream/device setting if
  139. /// it can prove that it is unnecessary.
  140. ///
  141. /// WARNING: reset_stream does NOT preserve previously set streams on
  142. /// different devices. If you need to set streams on multiple devices
  143. /// on CUDA, use CUDAMultiStreamGuard instead.
  144. void reset_stream(Stream stream) {
  145. guard_.reset_stream(stream);
  146. }
  147. /// Returns the CUDA stream that was set at the time the guard was
  148. /// constructed.
  149. CUDAStream original_stream() const {
  150. return CUDAStream(CUDAStream::UNCHECKED, guard_.original_stream());
  151. }
  152. /// Returns the most recent CUDA stream that was set using this device guard,
  153. /// either from construction, or via set_stream.
  154. CUDAStream current_stream() const {
  155. return CUDAStream(CUDAStream::UNCHECKED, guard_.current_stream());
  156. }
  157. /// Returns the most recent CUDA device that was set using this device guard,
  158. /// either from construction, or via set_device/reset_device/set_index.
  159. Device current_device() const {
  160. return guard_.current_device();
  161. }
  162. /// Returns the CUDA device that was set at the most recent reset_stream(),
  163. /// or otherwise the device at construction time.
  164. Device original_device() const {
  165. return guard_.original_device();
  166. }
  167. private:
  168. c10::impl::InlineStreamGuard<impl::CUDAGuardImpl> guard_;
  169. };
  170. /// A variant of OptionalStreamGuard that is specialized for CUDA. See
  171. /// CUDAGuard for when you can use this.
  172. struct OptionalCUDAStreamGuard {
  173. /// Create an uninitialized guard.
  174. explicit OptionalCUDAStreamGuard() = default;
  175. /// Set the current CUDA device to the device associated with the passed
  176. /// stream, and set the current CUDA stream on that device to the passed
  177. /// stream. Errors if the Stream is not a CUDA stream.
  178. explicit OptionalCUDAStreamGuard(Stream stream) : guard_(stream) {}
  179. /// Set the current device to the device associated with the passed stream,
  180. /// and set the current stream on that device to the passed stream,
  181. /// if the passed stream is not nullopt.
  182. explicit OptionalCUDAStreamGuard(std::optional<Stream> stream_opt)
  183. : guard_(stream_opt) {}
  184. /// Copy is disallowed
  185. OptionalCUDAStreamGuard(const OptionalCUDAStreamGuard&) = delete;
  186. OptionalCUDAStreamGuard& operator=(const OptionalCUDAStreamGuard&) = delete;
  187. // See Note [Move construction for RAII guards is tricky]
  188. OptionalCUDAStreamGuard(OptionalCUDAStreamGuard&& other) = delete;
  189. // See Note [Move assignment for RAII guards is tricky]
  190. OptionalCUDAStreamGuard& operator=(OptionalCUDAStreamGuard&& other) = delete;
  191. ~OptionalCUDAStreamGuard() = default;
  192. /// Resets the currently set CUDA stream to the original stream and
  193. /// the currently set device to the original device. Then,
  194. /// set the current device to the device associated with the passed stream,
  195. /// and set the current stream on that device to the passed stream.
  196. /// Initializes the guard if it was not previously initialized.
  197. void reset_stream(Stream stream) {
  198. guard_.reset_stream(stream);
  199. }
  200. /// Returns the CUDA stream that was set at the time the guard was most
  201. /// recently initialized, or nullopt if the guard is uninitialized.
  202. std::optional<CUDAStream> original_stream() const {
  203. auto r = guard_.original_stream();
  204. if (r.has_value()) {
  205. return CUDAStream(CUDAStream::UNCHECKED, r.value());
  206. } else {
  207. return std::nullopt;
  208. }
  209. }
  210. /// Returns the most recent CUDA stream that was set using this stream guard,
  211. /// either from construction, or via reset_stream, if the guard is
  212. /// initialized, or nullopt if the guard is uninitialized.
  213. std::optional<CUDAStream> current_stream() const {
  214. auto r = guard_.current_stream();
  215. if (r.has_value()) {
  216. return CUDAStream(CUDAStream::UNCHECKED, r.value());
  217. } else {
  218. return std::nullopt;
  219. }
  220. }
  221. /// Restore the original CUDA device and stream, resetting this guard to
  222. /// uninitialized state.
  223. void reset() {
  224. guard_.reset();
  225. }
  226. private:
  227. c10::impl::InlineOptionalStreamGuard<impl::CUDAGuardImpl> guard_;
  228. };
  229. /// A variant of MultiStreamGuard that is specialized for CUDA.
  230. struct CUDAMultiStreamGuard {
  231. explicit CUDAMultiStreamGuard(ArrayRef<CUDAStream> streams)
  232. : guard_(unwrapStreams(streams)) {}
  233. /// Copy is disallowed
  234. CUDAMultiStreamGuard(const CUDAMultiStreamGuard&) = delete;
  235. CUDAMultiStreamGuard& operator=(const CUDAMultiStreamGuard&) = delete;
  236. // See Note [Move construction for RAII guards is tricky]
  237. CUDAMultiStreamGuard(CUDAMultiStreamGuard&& other) = delete;
  238. // See Note [Move assignment for RAII guards is tricky]
  239. CUDAMultiStreamGuard& operator=(CUDAMultiStreamGuard&& other) = delete;
  240. ~CUDAMultiStreamGuard() = default;
  241. private:
  242. c10::impl::InlineMultiStreamGuard<impl::CUDAGuardImpl> guard_;
  243. static std::vector<Stream> unwrapStreams(ArrayRef<CUDAStream> cudaStreams) {
  244. std::vector<Stream> streams;
  245. streams.reserve(cudaStreams.size());
  246. for (const CUDAStream& cudaStream : cudaStreams) {
  247. streams.push_back(cudaStream);
  248. }
  249. return streams;
  250. }
  251. };
  252. } // namespace c10::cuda
  253. #else
  254. #error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined."
  255. #endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION)