XPUDeviceProp.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212
  1. #if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION)
  2. #pragma once
  3. #include <c10/xpu/XPUMacros.h>
  4. #include <sycl/sycl.hpp>
  5. namespace c10::xpu {
  6. #define AT_FORALL_XPU_DEVICE_PROPERTIES(_) \
  7. /* the device name of this SYCL device. */ \
  8. _(name) \
  9. \
  10. /* the device type associated with the device. */ \
  11. _(device_type) \
  12. \
  13. /* the vendor of this SYCL device. */ \
  14. _(vendor) \
  15. \
  16. /* a backend-defined driver version as a std::string. */ \
  17. _(driver_version) \
  18. \
  19. /* the SYCL version as a std::string in the form <major>.<minor> */ \
  20. _(version) \
  21. \
  22. /* true if the SYCL device is available. Otherwise, return false. */ \
  23. _(is_available) \
  24. \
  25. /* the maximum size in bytes of the arguments that can be passed to a \
  26. * kernel. */ \
  27. _(max_parameter_size) \
  28. \
  29. /* the number of parallel compute units available to the device. */ \
  30. _(max_compute_units) \
  31. \
  32. /* the maximum dimensions that specify the global and local work-item IDs \
  33. * used by the data parallel execution model. */ \
  34. _(max_work_item_dimensions) \
  35. \
  36. /* the maximum number of workitems that are permitted in a work-group \
  37. * executing a kernel on a single compute unit. */ \
  38. _(max_work_group_size) \
  39. \
  40. /* the maximum number of subgroups in a work-group for any kernel executed \
  41. * on the device. */ \
  42. _(max_num_sub_groups) \
  43. \
  44. /* a std::vector of size_t containing the set of sub-group sizes supported \
  45. * by the device. */ \
  46. _(sub_group_sizes) \
  47. \
  48. /* the maximum configured clock frequency of this SYCL device in MHz. */ \
  49. _(max_clock_frequency) \
  50. \
  51. /* the default compute device address space size specified as an unsigned \
  52. * integer value in bits. Must return either 32 or 64. */ \
  53. _(address_bits) \
  54. \
  55. /* the maximum size of memory object allocation in bytes. */ \
  56. _(max_mem_alloc_size) \
  57. \
  58. /* the minimum value in bits of the largest supported SYCL built-in data \
  59. * type if this SYCL device is not of device type \
  60. * sycl::info::device_type::custom. */ \
  61. _(mem_base_addr_align) \
  62. \
  63. /* a std::vector of info::fp_config describing the half/single/double \
  64. * precision floating-point capability of this SYCL device. */ \
  65. _(half_fp_config) \
  66. _(single_fp_config) \
  67. _(double_fp_config) \
  68. \
  69. /* the size of global device memory in bytes. */ \
  70. _(global_mem_size) \
  71. \
  72. /* the type of global memory cache supported. */ \
  73. _(global_mem_cache_type) \
  74. \
  75. /* the size of global memory cache in bytes. */ \
  76. _(global_mem_cache_size) \
  77. \
  78. /* the size of global memory cache line in bytes. */ \
  79. _(global_mem_cache_line_size) \
  80. \
  81. /* the type of local memory supported. */ \
  82. _(local_mem_type) \
  83. \
  84. /* the size of local memory arena in bytes. */ \
  85. _(local_mem_size) \
  86. \
  87. /* the maximum number of sub-devices that can be created when this device is \
  88. * partitioned. */ \
  89. _(partition_max_sub_devices) \
  90. \
  91. /* the resolution of device timer in nanoseconds. */ \
  92. _(profiling_timer_resolution) \
  93. \
  94. /* the preferred native vector width size for built-in scalar types that can \
  95. * be put into vectors. */ \
  96. _(preferred_vector_width_char) \
  97. _(preferred_vector_width_short) \
  98. _(preferred_vector_width_int) \
  99. _(preferred_vector_width_long) \
  100. _(preferred_vector_width_float) \
  101. _(preferred_vector_width_double) \
  102. _(preferred_vector_width_half) \
  103. \
  104. /* the native ISA vector width. The vector width is defined as the number of \
  105. * scalar elements that can be stored in the vector. */ \
  106. _(native_vector_width_char) \
  107. _(native_vector_width_short) \
  108. _(native_vector_width_int) \
  109. _(native_vector_width_long) \
  110. _(native_vector_width_float) \
  111. _(native_vector_width_double) \
  112. _(native_vector_width_half)
  113. #define AT_FORALL_XPU_EXT_DEVICE_PROPERTIES(_) \
  114. /* the number of EUs associated with the Intel GPU. */ \
  115. _(gpu_eu_count, gpu_eu_count, 512) \
  116. \
  117. /* the number of EUs in a subslice. */ \
  118. _(gpu_eu_count_per_subslice, gpu_eu_count_per_subslice, 8) \
  119. \
  120. /* the simd width of EU of GPU. */ \
  121. _(gpu_eu_simd_width, gpu_eu_simd_width, 8) \
  122. \
  123. /* the number of hardware threads per EU of GPU. */ \
  124. _(gpu_hw_threads_per_eu, gpu_hw_threads_per_eu, 8) \
  125. \
  126. /* the device identifier of the Intel GPU, also known as the product ID. */ \
  127. _(device_id, device_id, 0) \
  128. \
  129. /* the device descriptor for device Universal Unique ID, 16 bytes*/ \
  130. _(uuid, device_info_uuid, (std::array<unsigned char, 16>{}))
  131. #define AT_FORALL_XPU_DEVICE_ASPECT(_) \
  132. /* sycl::half is supported on device. */ \
  133. _(fp16) \
  134. \
  135. /* double is supported on device. */ \
  136. _(fp64) \
  137. \
  138. /* 64-bit atomic operation is supported on device. */ \
  139. _(atomic64)
  140. #define AT_FORALL_XPU_EXP_CL_ASPECT(_) \
  141. /* conversion between single-precision 32-bit floating-point values and \
  142. * 16-bit bfloat16 values is supported on device. */ \
  143. _(bfloat16_conversions) \
  144. \
  145. /* specialized hardware to compute MMA is supported on device. */ \
  146. _(subgroup_matrix_multiply_accumulate) \
  147. \
  148. /* specialized hardware to compute MMA for 32-bit floating-point is \
  149. * supported on device. */ \
  150. _(subgroup_matrix_multiply_accumulate_tensor_float32) \
  151. \
  152. /* block read operations for efficient matrix multiplication is supported on \
  153. * device. */ \
  154. _(subgroup_2d_block_io)
  155. #define AT_FORALL_XPU_EXP_DEVICE_PROPERTIES(_) \
  156. /* the device architecture of this SYCL device. */ \
  157. _(architecture)
  158. #define _DEFINE_SYCL_PROP(ns, property, member) \
  159. ns::property::return_type member;
  160. #define DEFINE_DEVICE_PROP(property) \
  161. _DEFINE_SYCL_PROP(sycl::info::device, property, property)
  162. #define DEFINE_PLATFORM_PROP(property, member) \
  163. _DEFINE_SYCL_PROP(sycl::info::platform, property, member)
  164. #define DEFINE_EXT_DEVICE_PROP(property, ...) \
  165. _DEFINE_SYCL_PROP(sycl::ext::intel::info::device, property, property)
  166. #define DEFINE_DEVICE_ASPECT(member) bool has_##member;
  167. #define DEFINE_EXP_DEVICE_PROP(property) \
  168. _DEFINE_SYCL_PROP( \
  169. sycl::ext::oneapi::experimental::info::device, property, property)
  170. struct C10_XPU_API DeviceProp {
  171. AT_FORALL_XPU_DEVICE_PROPERTIES(DEFINE_DEVICE_PROP);
  172. // the platform name.
  173. DEFINE_PLATFORM_PROP(name, platform_name);
  174. AT_FORALL_XPU_EXT_DEVICE_PROPERTIES(DEFINE_EXT_DEVICE_PROP);
  175. AT_FORALL_XPU_DEVICE_ASPECT(DEFINE_DEVICE_ASPECT);
  176. AT_FORALL_XPU_EXP_CL_ASPECT(DEFINE_DEVICE_ASPECT);
  177. #if SYCL_COMPILER_VERSION >= 20250000
  178. AT_FORALL_XPU_EXP_DEVICE_PROPERTIES(DEFINE_EXP_DEVICE_PROP);
  179. #endif
  180. };
  181. #undef _DEFINE_SYCL_PROP
  182. #undef DEFINE_DEVICE_PROP
  183. #undef DEFINE_PLATFORM_PROP
  184. #undef DEFINE_EXT_DEVICE_PROP
  185. #undef DEFINE_DEVICE_ASPECT
  186. #undef DEFINE_EXP_DEVICE_PROP
  187. } // namespace c10::xpu
  188. #else
  189. #error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined."
  190. #endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION)