internals.h 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804
  1. #if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION)
  2. /*
  3. pybind11/detail/internals.h: Internal data structure and related functions
  4. Copyright (c) 2017 Wenzel Jakob <wenzel.jakob@epfl.ch>
  5. All rights reserved. Use of this source code is governed by a
  6. BSD-style license that can be found in the LICENSE file.
  7. */
  8. #pragma once
  9. #include <pybind11/conduit/pybind11_platform_abi_id.h>
  10. #include <pybind11/gil_simple.h>
  11. #include <pybind11/pytypes.h>
  12. #include <pybind11/trampoline_self_life_support.h>
  13. #include "common.h"
  14. #include "struct_smart_holder.h"
  15. #include <atomic>
  16. #include <cstdint>
  17. #include <exception>
  18. #include <limits>
  19. #include <mutex>
  20. #include <thread>
  21. /// Tracks the `internals` and `type_info` ABI version independent of the main library version.
  22. ///
  23. /// Some portions of the code use an ABI that is conditional depending on this
  24. /// version number. That allows ABI-breaking changes to be "pre-implemented".
  25. /// Once the default version number is incremented, the conditional logic that
  26. /// no longer applies can be removed. Additionally, users that need not
  27. /// maintain ABI compatibility can increase the version number in order to take
  28. /// advantage of any functionality/efficiency improvements that depend on the
  29. /// newer ABI.
  30. ///
  31. /// WARNING: If you choose to manually increase the ABI version, note that
  32. /// pybind11 may not be tested as thoroughly with a non-default ABI version, and
  33. /// further ABI-incompatible changes may be made before the ABI is officially
  34. /// changed to the new version.
  35. #ifndef PYBIND11_INTERNALS_VERSION
  36. # define PYBIND11_INTERNALS_VERSION 11
  37. #endif
  38. #if PYBIND11_INTERNALS_VERSION < 11
  39. # error "PYBIND11_INTERNALS_VERSION 11 is the minimum for all platforms for pybind11v3."
  40. #endif
  41. PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
  42. using ExceptionTranslator = void (*)(std::exception_ptr);
  43. // The old Python Thread Local Storage (TLS) API is deprecated in Python 3.7 in favor of the new
  44. // Thread Specific Storage (TSS) API.
  45. // Avoid unnecessary allocation of `Py_tss_t`, since we cannot use
  46. // `Py_LIMITED_API` anyway.
  47. #define PYBIND11_TLS_KEY_REF Py_tss_t &
  48. #if defined(__clang__)
  49. # define PYBIND11_TLS_KEY_INIT(var) \
  50. _Pragma("clang diagnostic push") /**/ \
  51. _Pragma("clang diagnostic ignored \"-Wmissing-field-initializers\"") /**/ \
  52. Py_tss_t var \
  53. = Py_tss_NEEDS_INIT; \
  54. _Pragma("clang diagnostic pop")
  55. #elif defined(__GNUC__) && !defined(__INTEL_COMPILER)
  56. # define PYBIND11_TLS_KEY_INIT(var) \
  57. _Pragma("GCC diagnostic push") /**/ \
  58. _Pragma("GCC diagnostic ignored \"-Wmissing-field-initializers\"") /**/ \
  59. Py_tss_t var \
  60. = Py_tss_NEEDS_INIT; \
  61. _Pragma("GCC diagnostic pop")
  62. #else
  63. # define PYBIND11_TLS_KEY_INIT(var) Py_tss_t var = Py_tss_NEEDS_INIT;
  64. #endif
  65. #define PYBIND11_TLS_KEY_CREATE(var) (PyThread_tss_create(&(var)) == 0)
  66. #define PYBIND11_TLS_GET_VALUE(key) PyThread_tss_get(&(key))
  67. #define PYBIND11_TLS_REPLACE_VALUE(key, value) PyThread_tss_set(&(key), (value))
  68. #define PYBIND11_TLS_DELETE_VALUE(key) PyThread_tss_set(&(key), nullptr)
  69. #define PYBIND11_TLS_FREE(key) PyThread_tss_delete(&(key))
  70. /// A smart-pointer-like wrapper around a thread-specific value. get/set of the pointer applies to
  71. /// the current thread only.
  72. template <typename T>
  73. class thread_specific_storage {
  74. public:
  75. thread_specific_storage() {
  76. // NOLINTNEXTLINE(bugprone-assignment-in-if-condition)
  77. if (!PYBIND11_TLS_KEY_CREATE(key_)) {
  78. pybind11_fail(
  79. "thread_specific_storage constructor: could not initialize the TSS key!");
  80. }
  81. }
  82. ~thread_specific_storage() {
  83. // This destructor is often called *after* Py_Finalize(). That *SHOULD BE* fine on most
  84. // platforms. The following details what happens when PyThread_tss_free is called in
  85. // CPython. PYBIND11_TLS_FREE is PyThread_tss_free on python 3.7+. On older python, it does
  86. // nothing. PyThread_tss_free calls PyThread_tss_delete and PyMem_RawFree.
  87. // PyThread_tss_delete just calls TlsFree (on Windows) or pthread_key_delete (on *NIX).
  88. // Neither of those have anything to do with CPython internals. PyMem_RawFree *requires*
  89. // that the `key` be allocated with the CPython allocator (as it is by
  90. // PyThread_tss_create).
  91. // However, in GraalPy (as of v24.2 or older), TSS is implemented by Java and this call
  92. // requires a living Python interpreter.
  93. #ifdef GRAALVM_PYTHON
  94. if (!Py_IsInitialized() || _Py_IsFinalizing()) {
  95. return;
  96. }
  97. #endif
  98. PYBIND11_TLS_FREE(key_);
  99. }
  100. thread_specific_storage(thread_specific_storage const &) = delete;
  101. thread_specific_storage(thread_specific_storage &&) = delete;
  102. thread_specific_storage &operator=(thread_specific_storage const &) = delete;
  103. thread_specific_storage &operator=(thread_specific_storage &&) = delete;
  104. T *get() const { return reinterpret_cast<T *>(PYBIND11_TLS_GET_VALUE(key_)); }
  105. T &operator*() const { return *get(); }
  106. explicit operator T *() const { return get(); }
  107. explicit operator bool() const { return get() != nullptr; }
  108. void set(T *val) { PYBIND11_TLS_REPLACE_VALUE(key_, reinterpret_cast<void *>(val)); }
  109. void reset(T *p = nullptr) { set(p); }
  110. thread_specific_storage &operator=(T *pval) {
  111. set(pval);
  112. return *this;
  113. }
  114. private:
  115. PYBIND11_TLS_KEY_INIT(mutable key_)
  116. };
  117. PYBIND11_NAMESPACE_BEGIN(detail)
  118. // This does NOT actually exist as a module.
  119. #define PYBIND11_DUMMY_MODULE_NAME "pybind11_builtins"
  120. // Forward declarations
  121. inline PyTypeObject *make_static_property_type();
  122. inline PyTypeObject *make_default_metaclass();
  123. inline PyObject *make_object_base_type(PyTypeObject *metaclass);
  124. inline void translate_exception(std::exception_ptr p);
  125. // Python loads modules by default with dlopen with the RTLD_LOCAL flag; under libc++ and possibly
  126. // other STLs, this means `typeid(A)` from one module won't equal `typeid(A)` from another module
  127. // even when `A` is the same, non-hidden-visibility type (e.g. from a common include). Under
  128. // libstdc++, this doesn't happen: equality and the type_index hash are based on the type name,
  129. // which works. If not under a known-good stl, provide our own name-based hash and equality
  130. // functions that use the type name.
  131. #if !defined(_LIBCPP_VERSION)
  132. inline bool same_type(const std::type_info &lhs, const std::type_info &rhs) { return lhs == rhs; }
  133. using type_hash = std::hash<std::type_index>;
  134. using type_equal_to = std::equal_to<std::type_index>;
  135. #else
  136. inline bool same_type(const std::type_info &lhs, const std::type_info &rhs) {
  137. return lhs.name() == rhs.name() || std::strcmp(lhs.name(), rhs.name()) == 0;
  138. }
  139. struct type_hash {
  140. size_t operator()(const std::type_index &t) const {
  141. size_t hash = 5381;
  142. const char *ptr = t.name();
  143. while (auto c = static_cast<unsigned char>(*ptr++)) {
  144. hash = (hash * 33) ^ c;
  145. }
  146. return hash;
  147. }
  148. };
  149. struct type_equal_to {
  150. bool operator()(const std::type_index &lhs, const std::type_index &rhs) const {
  151. return lhs.name() == rhs.name() || std::strcmp(lhs.name(), rhs.name()) == 0;
  152. }
  153. };
  154. #endif
  155. template <typename value_type>
  156. using type_map = std::unordered_map<std::type_index, value_type, type_hash, type_equal_to>;
  157. struct override_hash {
  158. inline size_t operator()(const std::pair<const PyObject *, const char *> &v) const {
  159. size_t value = std::hash<const void *>()(v.first);
  160. value ^= std::hash<const void *>()(v.second) + 0x9e3779b9 + (value << 6) + (value >> 2);
  161. return value;
  162. }
  163. };
  164. using instance_map = std::unordered_multimap<const void *, instance *>;
  165. #ifdef Py_GIL_DISABLED
  166. // Wrapper around PyMutex to provide BasicLockable semantics
  167. class pymutex {
  168. PyMutex mutex;
  169. public:
  170. pymutex() : mutex({}) {}
  171. void lock() { PyMutex_Lock(&mutex); }
  172. void unlock() { PyMutex_Unlock(&mutex); }
  173. };
  174. // Instance map shards are used to reduce mutex contention in free-threaded Python.
  175. struct instance_map_shard {
  176. instance_map registered_instances;
  177. pymutex mutex;
  178. // alignas(64) would be better, but causes compile errors in macOS before 10.14 (see #5200)
  179. char padding[64 - (sizeof(instance_map) + sizeof(pymutex)) % 64];
  180. };
  181. static_assert(sizeof(instance_map_shard) % 64 == 0,
  182. "instance_map_shard size is not a multiple of 64 bytes");
  183. inline uint64_t round_up_to_next_pow2(uint64_t x) {
  184. // Round-up to the next power of two.
  185. // See https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
  186. x--;
  187. x |= (x >> 1);
  188. x |= (x >> 2);
  189. x |= (x >> 4);
  190. x |= (x >> 8);
  191. x |= (x >> 16);
  192. x |= (x >> 32);
  193. x++;
  194. return x;
  195. }
  196. #endif
  197. class loader_life_support;
  198. /// Internal data structure used to track registered instances and types.
  199. /// Whenever binary incompatible changes are made to this structure,
  200. /// `PYBIND11_INTERNALS_VERSION` must be incremented.
  201. struct internals {
  202. #ifdef Py_GIL_DISABLED
  203. pymutex mutex;
  204. pymutex exception_translator_mutex;
  205. #endif
  206. // std::type_index -> pybind11's type information
  207. type_map<type_info *> registered_types_cpp;
  208. // PyTypeObject* -> base type_info(s)
  209. std::unordered_map<PyTypeObject *, std::vector<type_info *>> registered_types_py;
  210. #ifdef Py_GIL_DISABLED
  211. std::unique_ptr<instance_map_shard[]> instance_shards; // void * -> instance*
  212. size_t instance_shards_mask = 0;
  213. #else
  214. instance_map registered_instances; // void * -> instance*
  215. #endif
  216. std::unordered_set<std::pair<const PyObject *, const char *>, override_hash>
  217. inactive_override_cache;
  218. type_map<std::vector<bool (*)(PyObject *, void *&)>> direct_conversions;
  219. std::unordered_map<const PyObject *, std::vector<PyObject *>> patients;
  220. std::forward_list<ExceptionTranslator> registered_exception_translators;
  221. std::unordered_map<std::string, void *> shared_data; // Custom data to be shared across
  222. // extensions
  223. std::forward_list<std::string> static_strings; // Stores the std::strings backing
  224. // detail::c_str()
  225. PyTypeObject *static_property_type = nullptr;
  226. PyTypeObject *default_metaclass = nullptr;
  227. PyObject *instance_base = nullptr;
  228. // Unused if PYBIND11_SIMPLE_GIL_MANAGEMENT is defined:
  229. thread_specific_storage<PyThreadState> tstate;
  230. thread_specific_storage<loader_life_support> loader_life_support_tls;
  231. // Unused if PYBIND11_SIMPLE_GIL_MANAGEMENT is defined:
  232. PyInterpreterState *istate = nullptr;
  233. type_map<PyObject *> native_enum_type_map;
  234. internals()
  235. : static_property_type(make_static_property_type()),
  236. default_metaclass(make_default_metaclass()) {
  237. PyThreadState *cur_tstate = PyThreadState_Get();
  238. tstate = cur_tstate;
  239. istate = cur_tstate->interp;
  240. registered_exception_translators.push_front(&translate_exception);
  241. #ifdef Py_GIL_DISABLED
  242. // Scale proportional to the number of cores. 2x is a heuristic to reduce contention.
  243. // Make sure the number isn't unreasonable by limiting it to 16 bits (65K)
  244. auto num_shards = static_cast<std::uint16_t>(
  245. std::min<std::size_t>(round_up_to_next_pow2(2 * std::thread::hardware_concurrency()),
  246. std::numeric_limits<std::uint16_t>::max()));
  247. if (num_shards == 0) {
  248. num_shards = 1;
  249. }
  250. instance_shards.reset(new instance_map_shard[num_shards]);
  251. instance_shards_mask = num_shards - 1;
  252. #endif
  253. }
  254. internals(const internals &other) = delete;
  255. internals(internals &&other) = delete;
  256. internals &operator=(const internals &other) = delete;
  257. internals &operator=(internals &&other) = delete;
  258. ~internals() = default;
  259. };
  260. // the internals struct (above) is shared between all the modules. local_internals are only
  261. // for a single module. Any changes made to internals may require an update to
  262. // PYBIND11_INTERNALS_VERSION, breaking backwards compatibility. local_internals is, by design,
  263. // restricted to a single module. Whether a module has local internals or not should not
  264. // impact any other modules, because the only things accessing the local internals is the
  265. // module that contains them.
  266. struct local_internals {
  267. type_map<type_info *> registered_types_cpp;
  268. std::forward_list<ExceptionTranslator> registered_exception_translators;
  269. PyTypeObject *function_record_py_type = nullptr;
  270. };
  271. enum class holder_enum_t : uint8_t {
  272. undefined,
  273. std_unique_ptr, // Default, lacking interop with std::shared_ptr.
  274. std_shared_ptr, // Lacking interop with std::unique_ptr.
  275. smart_holder, // Full std::unique_ptr / std::shared_ptr interop.
  276. custom_holder,
  277. };
  278. /// Additional type information which does not fit into the PyTypeObject.
  279. /// Changes to this struct also require bumping `PYBIND11_INTERNALS_VERSION`.
  280. struct type_info {
  281. PyTypeObject *type;
  282. const std::type_info *cpptype;
  283. size_t type_size, type_align, holder_size_in_ptrs;
  284. void *(*operator_new)(size_t);
  285. void (*init_instance)(instance *, const void *);
  286. void (*dealloc)(value_and_holder &v_h);
  287. // Cross-DSO-safe function pointers, to sidestep cross-DSO RTTI issues
  288. // on platforms like macOS (see PR #5728 for details):
  289. memory::get_guarded_delete_fn get_memory_guarded_delete = memory::get_guarded_delete;
  290. get_trampoline_self_life_support_fn get_trampoline_self_life_support = nullptr;
  291. std::vector<PyObject *(*) (PyObject *, PyTypeObject *)> implicit_conversions;
  292. std::vector<std::pair<const std::type_info *, void *(*) (void *)>> implicit_casts;
  293. std::vector<bool (*)(PyObject *, void *&)> *direct_conversions;
  294. buffer_info *(*get_buffer)(PyObject *, void *) = nullptr;
  295. void *get_buffer_data = nullptr;
  296. void *(*module_local_load)(PyObject *, const type_info *) = nullptr;
  297. holder_enum_t holder_enum_v = holder_enum_t::undefined;
  298. /* A simple type never occurs as a (direct or indirect) parent
  299. * of a class that makes use of multiple inheritance.
  300. * A type can be simple even if it has non-simple ancestors as long as it has no descendants.
  301. */
  302. bool simple_type : 1;
  303. /* True if there is no multiple inheritance in this type's inheritance tree */
  304. bool simple_ancestors : 1;
  305. /* true if this is a type registered with py::module_local */
  306. bool module_local : 1;
  307. };
  308. #define PYBIND11_INTERNALS_ID \
  309. "__pybind11_internals_v" PYBIND11_TOSTRING(PYBIND11_INTERNALS_VERSION) \
  310. PYBIND11_COMPILER_TYPE_LEADING_UNDERSCORE PYBIND11_PLATFORM_ABI_ID "__"
  311. #define PYBIND11_MODULE_LOCAL_ID \
  312. "__pybind11_module_local_v" PYBIND11_TOSTRING(PYBIND11_INTERNALS_VERSION) \
  313. PYBIND11_COMPILER_TYPE_LEADING_UNDERSCORE PYBIND11_PLATFORM_ABI_ID "__"
  314. inline PyThreadState *get_thread_state_unchecked() {
  315. #if defined(PYPY_VERSION) || defined(GRAALVM_PYTHON)
  316. return PyThreadState_GET();
  317. #elif PY_VERSION_HEX < 0x030D0000
  318. return _PyThreadState_UncheckedGet();
  319. #else
  320. return PyThreadState_GetUnchecked();
  321. #endif
  322. }
  323. /// We use this counter to figure out if there are or have been multiple subinterpreters active at
  324. /// any point. This must never decrease while any interpreter may be running in any thread!
  325. inline std::atomic<int> &get_num_interpreters_seen() {
  326. static std::atomic<int> counter(0);
  327. return counter;
  328. }
  329. template <class T,
  330. enable_if_t<std::is_same<std::nested_exception, remove_cvref_t<T>>::value, int> = 0>
  331. bool handle_nested_exception(const T &exc, const std::exception_ptr &p) {
  332. std::exception_ptr nested = exc.nested_ptr();
  333. if (nested != nullptr && nested != p) {
  334. translate_exception(nested);
  335. return true;
  336. }
  337. return false;
  338. }
  339. template <class T,
  340. enable_if_t<!std::is_same<std::nested_exception, remove_cvref_t<T>>::value, int> = 0>
  341. bool handle_nested_exception(const T &exc, const std::exception_ptr &p) {
  342. if (const auto *nep = dynamic_cast<const std::nested_exception *>(std::addressof(exc))) {
  343. return handle_nested_exception(*nep, p);
  344. }
  345. return false;
  346. }
  347. inline bool raise_err(PyObject *exc_type, const char *msg) {
  348. if (PyErr_Occurred()) {
  349. raise_from(exc_type, msg);
  350. return true;
  351. }
  352. set_error(exc_type, msg);
  353. return false;
  354. }
  355. inline void translate_exception(std::exception_ptr p) {
  356. if (!p) {
  357. return;
  358. }
  359. try {
  360. std::rethrow_exception(p);
  361. } catch (error_already_set &e) {
  362. handle_nested_exception(e, p);
  363. e.restore();
  364. return;
  365. } catch (const builtin_exception &e) {
  366. // Could not use template since it's an abstract class.
  367. if (const auto *nep = dynamic_cast<const std::nested_exception *>(std::addressof(e))) {
  368. handle_nested_exception(*nep, p);
  369. }
  370. e.set_error();
  371. return;
  372. } catch (const std::bad_alloc &e) {
  373. handle_nested_exception(e, p);
  374. raise_err(PyExc_MemoryError, e.what());
  375. return;
  376. } catch (const std::domain_error &e) {
  377. handle_nested_exception(e, p);
  378. raise_err(PyExc_ValueError, e.what());
  379. return;
  380. } catch (const std::invalid_argument &e) {
  381. handle_nested_exception(e, p);
  382. raise_err(PyExc_ValueError, e.what());
  383. return;
  384. } catch (const std::length_error &e) {
  385. handle_nested_exception(e, p);
  386. raise_err(PyExc_ValueError, e.what());
  387. return;
  388. } catch (const std::out_of_range &e) {
  389. handle_nested_exception(e, p);
  390. raise_err(PyExc_IndexError, e.what());
  391. return;
  392. } catch (const std::range_error &e) {
  393. handle_nested_exception(e, p);
  394. raise_err(PyExc_ValueError, e.what());
  395. return;
  396. } catch (const std::overflow_error &e) {
  397. handle_nested_exception(e, p);
  398. raise_err(PyExc_OverflowError, e.what());
  399. return;
  400. } catch (const std::exception &e) {
  401. handle_nested_exception(e, p);
  402. raise_err(PyExc_RuntimeError, e.what());
  403. return;
  404. } catch (const std::nested_exception &e) {
  405. handle_nested_exception(e, p);
  406. raise_err(PyExc_RuntimeError, "Caught an unknown nested exception!");
  407. return;
  408. } catch (...) {
  409. raise_err(PyExc_RuntimeError, "Caught an unknown exception!");
  410. return;
  411. }
  412. }
  413. #if !defined(__GLIBCXX__)
  414. inline void translate_local_exception(std::exception_ptr p) {
  415. try {
  416. if (p) {
  417. std::rethrow_exception(p);
  418. }
  419. } catch (error_already_set &e) {
  420. e.restore();
  421. return;
  422. } catch (const builtin_exception &e) {
  423. e.set_error();
  424. return;
  425. }
  426. }
  427. #endif
  428. inline object get_python_state_dict() {
  429. object state_dict;
  430. #if defined(PYPY_VERSION) || defined(GRAALVM_PYTHON)
  431. state_dict = reinterpret_borrow<object>(PyEval_GetBuiltins());
  432. #else
  433. # if PY_VERSION_HEX < 0x03090000
  434. PyInterpreterState *istate = _PyInterpreterState_Get();
  435. # else
  436. PyInterpreterState *istate = PyInterpreterState_Get();
  437. # endif
  438. if (istate) {
  439. state_dict = reinterpret_borrow<object>(PyInterpreterState_GetDict(istate));
  440. }
  441. #endif
  442. if (!state_dict) {
  443. raise_from(PyExc_SystemError, "pybind11::detail::get_python_state_dict() FAILED");
  444. throw error_already_set();
  445. }
  446. return state_dict;
  447. }
  448. template <typename InternalsType>
  449. class internals_pp_manager {
  450. public:
  451. using on_fetch_function = void(InternalsType *);
  452. internals_pp_manager(char const *id, on_fetch_function *on_fetch)
  453. : holder_id_(id), on_fetch_(on_fetch) {}
  454. /// Get the current pointer-to-pointer, allocating it if it does not already exist. May
  455. /// acquire the GIL. Will never return nullptr.
  456. std::unique_ptr<InternalsType> *get_pp() {
  457. #ifdef PYBIND11_HAS_SUBINTERPRETER_SUPPORT
  458. if (get_num_interpreters_seen() > 1) {
  459. // Whenever the interpreter changes on the current thread we need to invalidate the
  460. // internals_pp so that it can be pulled from the interpreter's state dict. That is
  461. // slow, so we use the current PyThreadState to check if it is necessary.
  462. auto *tstate = get_thread_state_unchecked();
  463. if (!tstate || tstate->interp != last_istate_.get()) {
  464. gil_scoped_acquire_simple gil;
  465. if (!tstate) {
  466. tstate = get_thread_state_unchecked();
  467. }
  468. last_istate_ = tstate->interp;
  469. internals_tls_p_ = get_or_create_pp_in_state_dict();
  470. }
  471. return internals_tls_p_.get();
  472. }
  473. #endif
  474. if (!internals_singleton_pp_) {
  475. gil_scoped_acquire_simple gil;
  476. internals_singleton_pp_ = get_or_create_pp_in_state_dict();
  477. }
  478. return internals_singleton_pp_;
  479. }
  480. /// Drop all the references we're currently holding.
  481. void unref() {
  482. #ifdef PYBIND11_HAS_SUBINTERPRETER_SUPPORT
  483. if (get_num_interpreters_seen() > 1) {
  484. last_istate_.reset();
  485. internals_tls_p_.reset();
  486. return;
  487. }
  488. #endif
  489. internals_singleton_pp_ = nullptr;
  490. }
  491. void destroy() {
  492. #ifdef PYBIND11_HAS_SUBINTERPRETER_SUPPORT
  493. if (get_num_interpreters_seen() > 1) {
  494. auto *tstate = get_thread_state_unchecked();
  495. // this could be called without an active interpreter, just use what was cached
  496. if (!tstate || tstate->interp == last_istate_.get()) {
  497. auto tpp = internals_tls_p_.get();
  498. if (tpp) {
  499. delete tpp;
  500. }
  501. }
  502. unref();
  503. return;
  504. }
  505. #endif
  506. delete internals_singleton_pp_;
  507. unref();
  508. }
  509. private:
  510. std::unique_ptr<InternalsType> *get_or_create_pp_in_state_dict() {
  511. error_scope err_scope;
  512. dict state_dict = get_python_state_dict();
  513. auto internals_obj
  514. = reinterpret_steal<object>(dict_getitemstringref(state_dict.ptr(), holder_id_));
  515. std::unique_ptr<InternalsType> *pp = nullptr;
  516. if (internals_obj) {
  517. void *raw_ptr = PyCapsule_GetPointer(internals_obj.ptr(), /*name=*/nullptr);
  518. if (!raw_ptr) {
  519. raise_from(PyExc_SystemError,
  520. "pybind11::detail::internals_pp_manager::get_pp_from_dict() FAILED");
  521. throw error_already_set();
  522. }
  523. pp = reinterpret_cast<std::unique_ptr<InternalsType> *>(raw_ptr);
  524. if (on_fetch_ && pp) {
  525. on_fetch_(pp->get());
  526. }
  527. } else {
  528. pp = new std::unique_ptr<InternalsType>;
  529. // NOLINTNEXTLINE(bugprone-casting-through-void)
  530. state_dict[holder_id_] = capsule(reinterpret_cast<void *>(pp));
  531. }
  532. return pp;
  533. }
  534. char const *holder_id_ = nullptr;
  535. on_fetch_function *on_fetch_ = nullptr;
  536. #ifdef PYBIND11_HAS_SUBINTERPRETER_SUPPORT
  537. thread_specific_storage<PyInterpreterState> last_istate_;
  538. thread_specific_storage<std::unique_ptr<InternalsType>> internals_tls_p_;
  539. #endif
  540. std::unique_ptr<InternalsType> *internals_singleton_pp_;
  541. };
  542. // If We loaded the internals through `state_dict`, our `error_already_set`
  543. // and `builtin_exception` may be different local classes than the ones set up in the
  544. // initial exception translator, below, so add another for our local exception classes.
  545. //
  546. // libstdc++ doesn't require this (types there are identified only by name)
  547. // libc++ with CPython doesn't require this (types are explicitly exported)
  548. // libc++ with PyPy still need it, awaiting further investigation
  549. #if !defined(__GLIBCXX__)
  550. inline void check_internals_local_exception_translator(internals *internals_ptr) {
  551. if (internals_ptr) {
  552. for (auto et : internals_ptr->registered_exception_translators) {
  553. if (et == &translate_local_exception) {
  554. return;
  555. }
  556. }
  557. internals_ptr->registered_exception_translators.push_front(&translate_local_exception);
  558. }
  559. }
  560. #endif
  561. inline internals_pp_manager<internals> &get_internals_pp_manager() {
  562. #if defined(__GLIBCXX__)
  563. # define ON_FETCH_FN nullptr
  564. #else
  565. # define ON_FETCH_FN &check_internals_local_exception_translator
  566. #endif
  567. static internals_pp_manager<internals> internals_pp_manager(PYBIND11_INTERNALS_ID,
  568. ON_FETCH_FN);
  569. #undef ON_FETCH_FN
  570. return internals_pp_manager;
  571. }
  572. /// Return a reference to the current `internals` data
  573. PYBIND11_NOINLINE internals &get_internals() {
  574. auto &ppmgr = get_internals_pp_manager();
  575. auto &internals_ptr = *ppmgr.get_pp();
  576. if (!internals_ptr) {
  577. // Slow path, something needs fetched from the state dict or created
  578. gil_scoped_acquire_simple gil;
  579. error_scope err_scope;
  580. internals_ptr.reset(new internals());
  581. if (!internals_ptr->instance_base) {
  582. // This calls get_internals, so cannot be called from within the internals constructor
  583. // called above because internals_ptr must be set before get_internals is called again
  584. internals_ptr->instance_base = make_object_base_type(internals_ptr->default_metaclass);
  585. }
  586. }
  587. return *internals_ptr;
  588. }
  589. inline internals_pp_manager<local_internals> &get_local_internals_pp_manager() {
  590. // Use the address of this static itself as part of the key, so that the value is uniquely tied
  591. // to where the module is loaded in memory
  592. static const std::string this_module_idstr
  593. = PYBIND11_MODULE_LOCAL_ID
  594. + std::to_string(reinterpret_cast<uintptr_t>(&this_module_idstr));
  595. static internals_pp_manager<local_internals> local_internals_pp_manager(
  596. this_module_idstr.c_str(), nullptr);
  597. return local_internals_pp_manager;
  598. }
  599. /// Works like `get_internals`, but for things which are locally registered.
  600. inline local_internals &get_local_internals() {
  601. auto &ppmgr = get_local_internals_pp_manager();
  602. auto &internals_ptr = *ppmgr.get_pp();
  603. if (!internals_ptr) {
  604. internals_ptr.reset(new local_internals());
  605. }
  606. return *internals_ptr;
  607. }
  608. #ifdef Py_GIL_DISABLED
  609. # define PYBIND11_LOCK_INTERNALS(internals) std::unique_lock<pymutex> lock((internals).mutex)
  610. #else
  611. # define PYBIND11_LOCK_INTERNALS(internals)
  612. #endif
  613. template <typename F>
  614. inline auto with_internals(const F &cb) -> decltype(cb(get_internals())) {
  615. auto &internals = get_internals();
  616. PYBIND11_LOCK_INTERNALS(internals);
  617. return cb(internals);
  618. }
  619. template <typename F>
  620. inline auto with_exception_translators(const F &cb)
  621. -> decltype(cb(get_internals().registered_exception_translators,
  622. get_local_internals().registered_exception_translators)) {
  623. auto &internals = get_internals();
  624. #ifdef Py_GIL_DISABLED
  625. std::unique_lock<pymutex> lock((internals).exception_translator_mutex);
  626. #endif
  627. auto &local_internals = get_local_internals();
  628. return cb(internals.registered_exception_translators,
  629. local_internals.registered_exception_translators);
  630. }
  631. inline std::uint64_t mix64(std::uint64_t z) {
  632. // David Stafford's variant 13 of the MurmurHash3 finalizer popularized
  633. // by the SplitMix PRNG.
  634. // https://zimbry.blogspot.com/2011/09/better-bit-mixing-improving-on.html
  635. z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
  636. z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
  637. return z ^ (z >> 31);
  638. }
  639. template <typename F>
  640. inline auto with_instance_map(const void *ptr, const F &cb)
  641. -> decltype(cb(std::declval<instance_map &>())) {
  642. auto &internals = get_internals();
  643. #ifdef Py_GIL_DISABLED
  644. // Hash address to compute shard, but ignore low bits. We'd like allocations
  645. // from the same thread/core to map to the same shard and allocations from
  646. // other threads/cores to map to other shards. Using the high bits is a good
  647. // heuristic because memory allocators often have a per-thread
  648. // arena/superblock/segment from which smaller allocations are served.
  649. auto addr = reinterpret_cast<std::uintptr_t>(ptr);
  650. auto hash = mix64(static_cast<std::uint64_t>(addr >> 20));
  651. auto idx = static_cast<size_t>(hash & internals.instance_shards_mask);
  652. auto &shard = internals.instance_shards[idx];
  653. std::unique_lock<pymutex> lock(shard.mutex);
  654. return cb(shard.registered_instances);
  655. #else
  656. (void) ptr;
  657. return cb(internals.registered_instances);
  658. #endif
  659. }
  660. // Returns the number of registered instances for testing purposes. The result may not be
  661. // consistent if other threads are registering or unregistering instances concurrently.
  662. inline size_t num_registered_instances() {
  663. auto &internals = get_internals();
  664. #ifdef Py_GIL_DISABLED
  665. size_t count = 0;
  666. for (size_t i = 0; i <= internals.instance_shards_mask; ++i) {
  667. auto &shard = internals.instance_shards[i];
  668. std::unique_lock<pymutex> lock(shard.mutex);
  669. count += shard.registered_instances.size();
  670. }
  671. return count;
  672. #else
  673. return internals.registered_instances.size();
  674. #endif
  675. }
  676. /// Constructs a std::string with the given arguments, stores it in `internals`, and returns its
  677. /// `c_str()`. Such strings objects have a long storage duration -- the internal strings are only
  678. /// cleared when the program exits or after interpreter shutdown (when embedding), and so are
  679. /// suitable for c-style strings needed by Python internals (such as PyTypeObject's tp_name).
  680. template <typename... Args>
  681. const char *c_str(Args &&...args) {
  682. // GCC 4.8 doesn't like parameter unpack within lambda capture, so use
  683. // PYBIND11_LOCK_INTERNALS.
  684. auto &internals = get_internals();
  685. PYBIND11_LOCK_INTERNALS(internals);
  686. auto &strings = internals.static_strings;
  687. strings.emplace_front(std::forward<Args>(args)...);
  688. return strings.front().c_str();
  689. }
  690. PYBIND11_NAMESPACE_END(detail)
  691. /// Returns a named pointer that is shared among all extension modules (using the same
  692. /// pybind11 version) running in the current interpreter. Names starting with underscores
  693. /// are reserved for internal usage. Returns `nullptr` if no matching entry was found.
  694. PYBIND11_NOINLINE void *get_shared_data(const std::string &name) {
  695. return detail::with_internals([&](detail::internals &internals) {
  696. auto it = internals.shared_data.find(name);
  697. return it != internals.shared_data.end() ? it->second : nullptr;
  698. });
  699. }
  700. /// Set the shared data that can be later recovered by `get_shared_data()`.
  701. PYBIND11_NOINLINE void *set_shared_data(const std::string &name, void *data) {
  702. return detail::with_internals([&](detail::internals &internals) {
  703. internals.shared_data[name] = data;
  704. return data;
  705. });
  706. }
  707. /// Returns a typed reference to a shared data entry (by using `get_shared_data()`) if
  708. /// such entry exists. Otherwise, a new object of default-constructible type `T` is
  709. /// added to the shared data under the given name and a reference to it is returned.
  710. template <typename T>
  711. T &get_or_create_shared_data(const std::string &name) {
  712. return *detail::with_internals([&](detail::internals &internals) {
  713. auto it = internals.shared_data.find(name);
  714. T *ptr = (T *) (it != internals.shared_data.end() ? it->second : nullptr);
  715. if (!ptr) {
  716. ptr = new T();
  717. internals.shared_data[name] = ptr;
  718. }
  719. return ptr;
  720. });
  721. }
  722. PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
  723. #else
  724. #error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined."
  725. #endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION)