CUDAMathCompat.h 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157
  1. #if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION)
  2. #pragma once
  3. /* This file defines math functions compatible across different gpu
  4. * platforms (currently CUDA and HIP).
  5. */
  6. #if defined(__CUDACC__) || defined(__HIPCC__)
  7. #include <c10/macros/Macros.h>
  8. #include <c10/util/Exception.h>
  9. #ifdef __HIPCC__
  10. #define __MATH_FUNCTIONS_DECL__ inline C10_DEVICE
  11. #else /* __HIPCC__ */
  12. #ifdef __CUDACC_RTC__
  13. #define __MATH_FUNCTIONS_DECL__ C10_HOST_DEVICE
  14. #else /* __CUDACC_RTC__ */
  15. #define __MATH_FUNCTIONS_DECL__ inline C10_HOST_DEVICE
  16. #endif /* __CUDACC_RTC__ */
  17. #endif /* __HIPCC__ */
  18. namespace c10::cuda::compat {
  19. __MATH_FUNCTIONS_DECL__ float abs(float x) {
  20. return ::fabsf(x);
  21. }
  22. __MATH_FUNCTIONS_DECL__ double abs(double x) {
  23. return ::fabs(x);
  24. }
  25. __MATH_FUNCTIONS_DECL__ float exp(float x) {
  26. return ::expf(x);
  27. }
  28. __MATH_FUNCTIONS_DECL__ double exp(double x) {
  29. return ::exp(x);
  30. }
  31. __MATH_FUNCTIONS_DECL__ float ceil(float x) {
  32. return ::ceilf(x);
  33. }
  34. __MATH_FUNCTIONS_DECL__ double ceil(double x) {
  35. return ::ceil(x);
  36. }
  37. __MATH_FUNCTIONS_DECL__ float copysign(float x, float y) {
  38. #if defined(__CUDA_ARCH__) || defined(__HIPCC__)
  39. return ::copysignf(x, y);
  40. #else
  41. // std::copysign gets ICE/Segfaults with gcc 7.5/8 on arm64
  42. // (e.g. Jetson), see PyTorch PR #51834
  43. // This host function needs to be here for the compiler but is never used
  44. TORCH_INTERNAL_ASSERT(
  45. false, "CUDAMathCompat copysign should not run on the CPU");
  46. #endif
  47. }
  48. __MATH_FUNCTIONS_DECL__ double copysign(double x, double y) {
  49. #if defined(__CUDA_ARCH__) || defined(__HIPCC__)
  50. return ::copysign(x, y);
  51. #else
  52. // see above
  53. TORCH_INTERNAL_ASSERT(
  54. false, "CUDAMathCompat copysign should not run on the CPU");
  55. #endif
  56. }
  57. __MATH_FUNCTIONS_DECL__ float floor(float x) {
  58. return ::floorf(x);
  59. }
  60. __MATH_FUNCTIONS_DECL__ double floor(double x) {
  61. return ::floor(x);
  62. }
  63. __MATH_FUNCTIONS_DECL__ float log(float x) {
  64. return ::logf(x);
  65. }
  66. __MATH_FUNCTIONS_DECL__ double log(double x) {
  67. return ::log(x);
  68. }
  69. __MATH_FUNCTIONS_DECL__ float log1p(float x) {
  70. return ::log1pf(x);
  71. }
  72. __MATH_FUNCTIONS_DECL__ double log1p(double x) {
  73. return ::log1p(x);
  74. }
  75. __MATH_FUNCTIONS_DECL__ float max(float x, float y) {
  76. return ::fmaxf(x, y);
  77. }
  78. __MATH_FUNCTIONS_DECL__ double max(double x, double y) {
  79. return ::fmax(x, y);
  80. }
  81. __MATH_FUNCTIONS_DECL__ float min(float x, float y) {
  82. return ::fminf(x, y);
  83. }
  84. __MATH_FUNCTIONS_DECL__ double min(double x, double y) {
  85. return ::fmin(x, y);
  86. }
  87. __MATH_FUNCTIONS_DECL__ float pow(float x, float y) {
  88. return ::powf(x, y);
  89. }
  90. __MATH_FUNCTIONS_DECL__ double pow(double x, double y) {
  91. return ::pow(x, y);
  92. }
  93. __MATH_FUNCTIONS_DECL__ void sincos(float x, float* sptr, float* cptr) {
  94. return ::sincosf(x, sptr, cptr);
  95. }
  96. __MATH_FUNCTIONS_DECL__ void sincos(double x, double* sptr, double* cptr) {
  97. return ::sincos(x, sptr, cptr);
  98. }
  99. __MATH_FUNCTIONS_DECL__ float sqrt(float x) {
  100. return ::sqrtf(x);
  101. }
  102. __MATH_FUNCTIONS_DECL__ double sqrt(double x) {
  103. return ::sqrt(x);
  104. }
  105. __MATH_FUNCTIONS_DECL__ float rsqrt(float x) {
  106. return ::rsqrtf(x);
  107. }
  108. __MATH_FUNCTIONS_DECL__ double rsqrt(double x) {
  109. return ::rsqrt(x);
  110. }
  111. __MATH_FUNCTIONS_DECL__ float tan(float x) {
  112. return ::tanf(x);
  113. }
  114. __MATH_FUNCTIONS_DECL__ double tan(double x) {
  115. return ::tan(x);
  116. }
  117. __MATH_FUNCTIONS_DECL__ float tanh(float x) {
  118. return ::tanhf(x);
  119. }
  120. __MATH_FUNCTIONS_DECL__ double tanh(double x) {
  121. return ::tanh(x);
  122. }
  123. __MATH_FUNCTIONS_DECL__ float normcdf(float x) {
  124. return ::normcdff(x);
  125. }
  126. __MATH_FUNCTIONS_DECL__ double normcdf(double x) {
  127. return ::normcdf(x);
  128. }
  129. } // namespace c10::cuda::compat
  130. #endif
  131. #else
  132. #error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined."
  133. #endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION)