__init__.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418
  1. # -------------------------------------------------------------------------
  2. # Copyright (c) Microsoft Corporation. All rights reserved.
  3. # Licensed under the MIT License.
  4. # --------------------------------------------------------------------------
  5. """
  6. ONNX Runtime is a performance-focused scoring engine for Open Neural Network Exchange (ONNX) models.
  7. For more information on ONNX Runtime, please see `aka.ms/onnxruntime <https://aka.ms/onnxruntime/>`_
  8. or the `Github project <https://github.com/microsoft/onnxruntime/>`_.
  9. """
  10. import contextlib
  11. __version__ = "1.24.4"
  12. __author__ = "Microsoft"
  13. # we need to do device version validation (for example to check Cuda version for an onnxruntime-training package).
  14. # in order to know whether the onnxruntime package is for training it needs
  15. # to do import onnxruntime.training.ortmodule first.
  16. # onnxruntime.capi._pybind_state is required before import onnxruntime.training.ortmodule.
  17. # however, import onnxruntime.capi._pybind_state will already raise an exception if a required Cuda version
  18. # is not found.
  19. # here we need to save the exception and continue with Cuda version validation in order to post
  20. # meaningful messages to the user.
  21. # the saved exception is raised after device version validation.
  22. try:
  23. from onnxruntime.capi._pybind_state import (
  24. ExecutionMode, # noqa: F401
  25. ExecutionOrder, # noqa: F401
  26. GraphOptimizationLevel, # noqa: F401
  27. LoraAdapter, # noqa: F401
  28. ModelMetadata, # noqa: F401
  29. NodeArg, # noqa: F401
  30. OrtAllocatorType, # noqa: F401
  31. OrtArenaCfg, # noqa: F401
  32. OrtCompileApiFlags, # noqa: F401
  33. OrtDeviceMemoryType, # noqa: F401
  34. OrtEpAssignedNode, # noqa: F401
  35. OrtEpAssignedSubgraph, # noqa: F401
  36. OrtEpDevice, # noqa: F401
  37. OrtExecutionProviderDevicePolicy, # noqa: F401
  38. OrtExternalInitializerInfo, # noqa: F401
  39. OrtHardwareDevice, # noqa: F401
  40. OrtHardwareDeviceType, # noqa: F401
  41. OrtMemoryInfo, # noqa: F401
  42. OrtMemoryInfoDeviceType, # noqa: F401
  43. OrtMemType, # noqa: F401
  44. OrtSparseFormat, # noqa: F401
  45. OrtSyncStream, # noqa: F401
  46. RunOptions, # noqa: F401
  47. SessionIOBinding, # noqa: F401
  48. SessionOptions, # noqa: F401
  49. create_and_register_allocator, # noqa: F401
  50. create_and_register_allocator_v2, # noqa: F401
  51. disable_telemetry_events, # noqa: F401
  52. enable_telemetry_events, # noqa: F401
  53. get_all_providers, # noqa: F401
  54. get_available_providers, # noqa: F401
  55. get_build_info, # noqa: F401
  56. get_device, # noqa: F401
  57. get_ep_devices, # noqa: F401
  58. get_version_string, # noqa: F401
  59. has_collective_ops, # noqa: F401
  60. register_execution_provider_library, # noqa: F401
  61. set_default_logger_severity, # noqa: F401
  62. set_default_logger_verbosity, # noqa: F401
  63. set_global_thread_pool_sizes, # noqa: F401
  64. set_seed, # noqa: F401
  65. unregister_execution_provider_library, # noqa: F401
  66. )
  67. import_capi_exception = None
  68. except Exception as e:
  69. import_capi_exception = e
  70. from onnxruntime.capi import onnxruntime_validation
  71. if import_capi_exception:
  72. raise import_capi_exception
  73. from onnxruntime.capi.onnxruntime_inference_collection import (
  74. AdapterFormat, # noqa: F401
  75. InferenceSession, # noqa: F401
  76. IOBinding, # noqa: F401
  77. ModelCompiler, # noqa: F401
  78. OrtDevice, # noqa: F401
  79. OrtValue, # noqa: F401
  80. SparseTensor, # noqa: F401
  81. copy_tensors, # noqa: F401
  82. )
  83. # TODO: thiagofc: Temporary experimental namespace for new PyTorch front-end
  84. try: # noqa: SIM105
  85. from . import experimental # noqa: F401
  86. except ImportError:
  87. pass
  88. package_name, version, cuda_version = onnxruntime_validation.get_package_name_and_version_info()
  89. if version:
  90. __version__ = version
  91. onnxruntime_validation.check_distro_info()
  92. def _get_package_version(package_name: str):
  93. from importlib.metadata import PackageNotFoundError, version # noqa: PLC0415
  94. try:
  95. package_version = version(package_name)
  96. except PackageNotFoundError:
  97. package_version = None
  98. return package_version
  99. def _get_package_root(package_name: str, directory_name: str | None = None):
  100. from importlib.metadata import PackageNotFoundError, distribution # noqa: PLC0415
  101. root_directory_name = directory_name or package_name
  102. try:
  103. dist = distribution(package_name)
  104. files = dist.files or []
  105. for file in files:
  106. if file.name.endswith("__init__.py") and root_directory_name in file.parts:
  107. return file.locate().parent
  108. # Fallback to the first __init__.py
  109. if not directory_name:
  110. for file in files:
  111. if file.name.endswith("__init__.py"):
  112. return file.locate().parent
  113. except PackageNotFoundError:
  114. # package not found, do nothing
  115. pass
  116. return None
  117. def _extract_cuda_major_version(version_str: str) -> str:
  118. """Extract CUDA major version from version string (e.g., '12.1' -> '12').
  119. Args:
  120. version_str: CUDA version string to parse
  121. Returns:
  122. Major version as string, or "12" if parsing fails
  123. """
  124. return version_str.split(".")[0] if version_str else "12"
  125. def _get_cufft_version(cuda_major: str) -> str:
  126. """Get cufft library version based on CUDA major version.
  127. Args:
  128. cuda_major: CUDA major version as string (e.g., "12", "13")
  129. Returns:
  130. cufft version as string
  131. """
  132. # cufft versions: CUDA 12.x -> 11, CUDA 13.x -> 12
  133. return "12" if cuda_major == "13" else "11"
  134. def _get_nvidia_dll_paths(is_windows: bool, cuda: bool = True, cudnn: bool = True):
  135. # Dynamically determine CUDA major version from build info
  136. cuda_major_version = _extract_cuda_major_version(cuda_version)
  137. cufft_version = _get_cufft_version(cuda_major_version)
  138. if is_windows:
  139. # Path is relative to site-packages directory.
  140. cuda_dll_paths = [
  141. ("nvidia", "cublas", "bin", f"cublasLt64_{cuda_major_version}.dll"),
  142. ("nvidia", "cublas", "bin", f"cublas64_{cuda_major_version}.dll"),
  143. ("nvidia", "cufft", "bin", f"cufft64_{cufft_version}.dll"),
  144. ("nvidia", "cuda_runtime", "bin", f"cudart64_{cuda_major_version}.dll"),
  145. ]
  146. cudnn_dll_paths = [
  147. ("nvidia", "cudnn", "bin", "cudnn_engines_runtime_compiled64_9.dll"),
  148. ("nvidia", "cudnn", "bin", "cudnn_engines_precompiled64_9.dll"),
  149. ("nvidia", "cudnn", "bin", "cudnn_heuristic64_9.dll"),
  150. ("nvidia", "cudnn", "bin", "cudnn_ops64_9.dll"),
  151. ("nvidia", "cudnn", "bin", "cudnn_adv64_9.dll"),
  152. ("nvidia", "cudnn", "bin", "cudnn_graph64_9.dll"),
  153. ("nvidia", "cudnn", "bin", "cudnn64_9.dll"),
  154. ]
  155. else: # Linux
  156. # cublas64 depends on cublasLt64, so cublasLt64 should be loaded first.
  157. cuda_dll_paths = [
  158. ("nvidia", "cublas", "lib", f"libcublasLt.so.{cuda_major_version}"),
  159. ("nvidia", "cublas", "lib", f"libcublas.so.{cuda_major_version}"),
  160. ("nvidia", "cuda_nvrtc", "lib", f"libnvrtc.so.{cuda_major_version}"),
  161. ("nvidia", "curand", "lib", "libcurand.so.10"),
  162. ("nvidia", "cufft", "lib", f"libcufft.so.{cufft_version}"),
  163. ("nvidia", "cuda_runtime", "lib", f"libcudart.so.{cuda_major_version}"),
  164. ]
  165. # Do not load cudnn sub DLLs (they will be dynamically loaded later) to be consistent with PyTorch in Linux.
  166. cudnn_dll_paths = [
  167. ("nvidia", "cudnn", "lib", "libcudnn.so.9"),
  168. ]
  169. return (cuda_dll_paths if cuda else []) + (cudnn_dll_paths if cudnn else [])
  170. def print_debug_info():
  171. """Print information to help debugging."""
  172. import importlib.util # noqa: PLC0415
  173. import os # noqa: PLC0415
  174. import platform # noqa: PLC0415
  175. from importlib.metadata import distributions # noqa: PLC0415
  176. print(f"{package_name} version: {__version__}")
  177. if cuda_version:
  178. print(f"CUDA version used in build: {cuda_version}")
  179. print("platform:", platform.platform())
  180. print("\nPython package, version and location:")
  181. ort_packages = []
  182. for dist in distributions():
  183. package = dist.metadata["Name"]
  184. if package == "onnxruntime" or package.startswith(("onnxruntime-", "ort-")):
  185. # Exclude packages whose root directory name is not onnxruntime.
  186. location = _get_package_root(package, "onnxruntime")
  187. if location and (package not in ort_packages):
  188. ort_packages.append(package)
  189. print(f"{package}=={dist.version} at {location}")
  190. if len(ort_packages) > 1:
  191. print(
  192. "\033[33mWARNING: multiple onnxruntime packages are installed to the same location. "
  193. "Please 'pip uninstall` all above packages, then `pip install` only one of them.\033[0m"
  194. )
  195. if cuda_version:
  196. # Print version of installed packages that is related to CUDA or cuDNN DLLs.
  197. cuda_major = _extract_cuda_major_version(cuda_version)
  198. packages = [
  199. "torch",
  200. f"nvidia-cuda-runtime-cu{cuda_major}",
  201. f"nvidia-cudnn-cu{cuda_major}",
  202. f"nvidia-cublas-cu{cuda_major}",
  203. f"nvidia-cufft-cu{cuda_major}",
  204. f"nvidia-curand-cu{cuda_major}",
  205. f"nvidia-cuda-nvrtc-cu{cuda_major}",
  206. f"nvidia-nvjitlink-cu{cuda_major}",
  207. ]
  208. for package in packages:
  209. directory_name = "nvidia" if package.startswith("nvidia-") else None
  210. version = _get_package_version(package)
  211. if version:
  212. print(f"{package}=={version} at {_get_package_root(package, directory_name)}")
  213. else:
  214. print(f"{package} not installed")
  215. if platform.system() == "Windows":
  216. print(f"\nEnvironment variable:\nPATH={os.environ.get('PATH', '(unset)')}")
  217. elif platform.system() == "Linux":
  218. print(f"\nEnvironment variable:\nLD_LIBRARY_PATH={os.environ.get('LD_LIBRARY_PATH', '(unset)')}")
  219. if importlib.util.find_spec("psutil"):
  220. def is_target_dll(path: str):
  221. target_keywords = ["vcruntime140", "msvcp140"]
  222. if cuda_version:
  223. target_keywords = ["cufft", "cublas", "cudart", "nvrtc", "curand", "cudnn", *target_keywords]
  224. return any(keyword in path for keyword in target_keywords)
  225. import psutil # noqa: PLC0415
  226. p = psutil.Process(os.getpid())
  227. print("\nList of loaded DLLs:")
  228. for lib in p.memory_maps():
  229. if is_target_dll(lib.path.lower()):
  230. print(lib.path)
  231. if cuda_version:
  232. if importlib.util.find_spec("cpuinfo") and importlib.util.find_spec("py3nvml"):
  233. from .transformers.machine_info import get_device_info # noqa: PLC0415
  234. print("\nDevice information:")
  235. print(get_device_info())
  236. else:
  237. print("please `pip install py-cpuinfo py3nvml` to show device information.")
  238. else:
  239. print("please `pip install psutil` to show loaded DLLs.")
  240. def preload_dlls(cuda: bool = True, cudnn: bool = True, msvc: bool = True, directory=None):
  241. """Preload CUDA 12.x+ and cuDNN 9.x DLLs in Windows or Linux, and MSVC runtime DLLs in Windows.
  242. When the installed PyTorch is compatible (using same major version of CUDA and cuDNN),
  243. there is no need to call this function if `import torch` is done before `import onnxruntime`.
  244. Args:
  245. cuda (bool, optional): enable loading CUDA DLLs. Defaults to True.
  246. cudnn (bool, optional): enable loading cuDNN DLLs. Defaults to True.
  247. msvc (bool, optional): enable loading MSVC DLLs in Windows. Defaults to True.
  248. directory(str, optional): a directory contains CUDA or cuDNN DLLs. It can be an absolute path,
  249. or a path relative to the directory of this file.
  250. If directory is None (default value), the search order: the lib directory of compatible PyTorch in Windows,
  251. nvidia site packages, default DLL loading paths.
  252. If directory is empty string (""), the search order: nvidia site packages, default DLL loading paths.
  253. If directory is a path, the search order: the directory, default DLL loading paths.
  254. """
  255. import ctypes # noqa: PLC0415
  256. import os # noqa: PLC0415
  257. import platform # noqa: PLC0415
  258. import sys # noqa: PLC0415
  259. if platform.system() not in ["Windows", "Linux"]:
  260. return
  261. is_windows = platform.system() == "Windows"
  262. if is_windows and msvc:
  263. try:
  264. ctypes.CDLL("vcruntime140.dll")
  265. ctypes.CDLL("msvcp140.dll")
  266. if platform.machine() != "ARM64":
  267. ctypes.CDLL("vcruntime140_1.dll")
  268. except OSError:
  269. print("Microsoft Visual C++ Redistributable is not installed, this may lead to the DLL load failure.")
  270. print("It can be downloaded at https://aka.ms/vs/17/release/vc_redist.x64.exe.")
  271. # Check if CUDA version is supported (12.x or 13.x+)
  272. ort_cuda_major = None
  273. if cuda_version:
  274. try:
  275. ort_cuda_major = int(cuda_version.split(".")[0])
  276. if ort_cuda_major < 12 and (cuda or cudnn):
  277. print(
  278. f"\033[33mWARNING: {package_name} is built with CUDA {cuda_version}, which is not supported for preloading. "
  279. f"CUDA 12.x or newer is required. Call preload_dlls with cuda=False and cudnn=False.\033[0m"
  280. )
  281. return
  282. except ValueError:
  283. print(
  284. f"\033[33mWARNING: Unable to parse CUDA version '{cuda_version}'. "
  285. "Skipping DLL preloading. Call preload_dlls with cuda=False and cudnn=False.\033[0m"
  286. )
  287. return
  288. elif cuda or cudnn:
  289. # No CUDA version info available but CUDA/cuDNN preloading requested
  290. return
  291. is_cuda_cudnn_imported_by_torch = False
  292. if is_windows:
  293. torch_version = _get_package_version("torch")
  294. # Check if torch CUDA version matches onnxruntime CUDA version
  295. torch_cuda_major = None
  296. if torch_version and "+cu" in torch_version:
  297. with contextlib.suppress(ValueError):
  298. # Extract CUDA version from torch (e.g., "2.0.0+cu121" -> 12)
  299. cu_part = torch_version.split("+cu")[1]
  300. torch_cuda_major = int(cu_part[:2]) # First 2 digits are major version
  301. is_torch_cuda_compatible = (
  302. torch_cuda_major == ort_cuda_major if (torch_cuda_major and ort_cuda_major) else False
  303. )
  304. if "torch" in sys.modules:
  305. is_cuda_cudnn_imported_by_torch = is_torch_cuda_compatible
  306. if torch_cuda_major and ort_cuda_major and torch_cuda_major != ort_cuda_major:
  307. print(
  308. f"\033[33mWARNING: The installed PyTorch {torch_version} uses CUDA {torch_cuda_major}.x, "
  309. f"but {package_name} is built with CUDA {ort_cuda_major}.x. "
  310. f"Please install PyTorch for CUDA {ort_cuda_major}.x to be compatible.\033[0m"
  311. )
  312. if is_torch_cuda_compatible and directory is None:
  313. torch_root = _get_package_root("torch", "torch")
  314. if torch_root:
  315. directory = os.path.join(torch_root, "lib")
  316. base_directory = directory or ".."
  317. if not os.path.isabs(base_directory):
  318. base_directory = os.path.join(os.path.dirname(__file__), base_directory)
  319. base_directory = os.path.normpath(base_directory)
  320. if not os.path.isdir(base_directory):
  321. raise RuntimeError(f"Invalid parameter of directory={directory}. The directory does not exist!")
  322. if is_cuda_cudnn_imported_by_torch:
  323. # In Windows, PyTorch has loaded CUDA and cuDNN DLLs during `import torch`, no need to load them again.
  324. print("Skip loading CUDA and cuDNN DLLs since torch is imported.")
  325. return
  326. # Try load DLLs from nvidia site packages.
  327. dll_paths = _get_nvidia_dll_paths(is_windows, cuda, cudnn)
  328. loaded_dlls = []
  329. for relative_path in dll_paths:
  330. dll_path = (
  331. os.path.join(base_directory, relative_path[-1])
  332. if directory
  333. else os.path.join(base_directory, *relative_path)
  334. )
  335. if os.path.isfile(dll_path):
  336. try:
  337. _ = ctypes.CDLL(dll_path)
  338. loaded_dlls.append(relative_path[-1])
  339. except Exception as e:
  340. print(f"Failed to load {dll_path}: {e}")
  341. # Try load DLLs with default path settings.
  342. has_failure = False
  343. for relative_path in dll_paths:
  344. dll_filename = relative_path[-1]
  345. if dll_filename not in loaded_dlls:
  346. try:
  347. _ = ctypes.CDLL(dll_filename)
  348. except Exception as e:
  349. has_failure = True
  350. print(f"Failed to load {dll_filename}: {e}")
  351. if has_failure:
  352. print("Please follow https://onnxruntime.ai/docs/install/#cuda-and-cudnn to install CUDA and CuDNN.")