| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114 |
- #if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION)
- #pragma once
- // Light-weight version of CUDAContext.h with fewer transitive includes
- #include <cstdint>
- #include <map>
- #include <shared_mutex>
- #include <cuda_runtime_api.h>
- #include <cusparse.h>
- #include <cublas_v2.h>
- // cublasLT was introduced in CUDA 10.1 but we enable only for 11.1 that also
- // added bf16 support
- #include <cublasLt.h>
- #ifdef CUDART_VERSION
- #include <cusolverDn.h>
- #endif
- #if defined(USE_CUDSS)
- #include <cudss.h>
- #endif
- #if defined(USE_ROCM)
- #include <hipsolver/hipsolver.h>
- #endif
- #include <c10/core/Allocator.h>
- #include <c10/cuda/CUDAFunctions.h>
- namespace c10 {
- struct Allocator;
- }
- namespace at::cuda {
- /*
- A common CUDA interface for ATen.
- This interface is distinct from CUDAHooks, which defines an interface that links
- to both CPU-only and CUDA builds. That interface is intended for runtime
- dispatch and should be used from files that are included in both CPU-only and
- CUDA builds.
- CUDAContext, on the other hand, should be preferred by files only included in
- CUDA builds. It is intended to expose CUDA functionality in a consistent
- manner.
- This means there is some overlap between the CUDAContext and CUDAHooks, but
- the choice of which to use is simple: use CUDAContext when in a CUDA-only file,
- use CUDAHooks otherwise.
- Note that CUDAContext simply defines an interface with no associated class.
- It is expected that the modules whose functions compose this interface will
- manage their own state. There is only a single CUDA context/state.
- */
- /**
- * DEPRECATED: use device_count() instead
- */
- inline int64_t getNumGPUs() {
- return c10::cuda::device_count();
- }
- /**
- * CUDA is available if we compiled with CUDA, and there are one or more
- * devices. If we compiled with CUDA but there is a driver problem, etc.,
- * this function will report CUDA is not available (rather than raise an error.)
- */
- inline bool is_available() {
- return c10::cuda::device_count() > 0;
- }
- TORCH_CUDA_CPP_API cudaDeviceProp* getCurrentDeviceProperties();
- TORCH_CUDA_CPP_API int warp_size();
- TORCH_CUDA_CPP_API cudaDeviceProp* getDeviceProperties(c10::DeviceIndex device);
- TORCH_CUDA_CPP_API bool canDeviceAccessPeer(
- c10::DeviceIndex device,
- c10::DeviceIndex peer_device);
- TORCH_CUDA_CPP_API c10::Allocator* getCUDADeviceAllocator();
- /* Handles */
- TORCH_CUDA_CPP_API cusparseHandle_t getCurrentCUDASparseHandle();
- TORCH_CUDA_CPP_API cublasHandle_t getCurrentCUDABlasHandle();
- TORCH_CUDA_CPP_API cublasLtHandle_t getCurrentCUDABlasLtHandle();
- TORCH_CUDA_CPP_API void clearCublasWorkspaces();
- struct WorkspaceMapWithMutex {
- std::map<std::tuple<void*, void*>, at::DataPtr> map;
- std::shared_mutex mutex;
- };
- TORCH_CUDA_CPP_API WorkspaceMapWithMutex& cublas_handle_stream_to_workspace();
- TORCH_CUDA_CPP_API WorkspaceMapWithMutex& cublaslt_handle_stream_to_workspace();
- TORCH_CUDA_CPP_API size_t getChosenWorkspaceSize();
- TORCH_CUDA_CPP_API size_t getCUDABlasLtWorkspaceSize();
- TORCH_CUDA_CPP_API void* getCUDABlasLtWorkspace();
- TORCH_CUDA_CPP_API cusolverDnHandle_t getCurrentCUDASolverDnHandle();
- #if defined(USE_CUDSS)
- TORCH_CUDA_CPP_API cudssHandle_t getCurrentCudssHandle();
- #endif
- } // namespace at::cuda
- #else
- #error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined."
- #endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION)
|