spawn.py 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244
  1. ###############################################################################
  2. # Prepares and processes the data to setup the new process environment
  3. #
  4. # author: Thomas Moreau and Olivier Grisel
  5. #
  6. # adapted from multiprocessing/spawn.py (17/02/2017)
  7. # * Improve logging data
  8. #
  9. import os
  10. import sys
  11. import runpy
  12. import textwrap
  13. import types
  14. from multiprocessing import process, util
  15. if sys.platform != "win32":
  16. WINEXE = False
  17. WINSERVICE = False
  18. else:
  19. import msvcrt
  20. from multiprocessing.reduction import duplicate
  21. WINEXE = sys.platform == "win32" and getattr(sys, "frozen", False)
  22. WINSERVICE = sys.executable.lower().endswith("pythonservice.exe")
  23. if WINSERVICE:
  24. _python_exe = os.path.join(sys.exec_prefix, "python.exe")
  25. else:
  26. _python_exe = sys.executable
  27. def get_executable():
  28. return _python_exe
  29. def _check_not_importing_main():
  30. if getattr(process.current_process(), "_inheriting", False):
  31. raise RuntimeError(
  32. textwrap.dedent(
  33. """\
  34. An attempt has been made to start a new process before the
  35. current process has finished its bootstrapping phase.
  36. This probably means that you are not using fork to start your
  37. child processes and you have forgotten to use the proper idiom
  38. in the main module:
  39. if __name__ == '__main__':
  40. freeze_support()
  41. ...
  42. The "freeze_support()" line can be omitted if the program
  43. is not going to be frozen to produce an executable."""
  44. )
  45. )
  46. def get_preparation_data(name, init_main_module=True):
  47. """Return info about parent needed by child to unpickle process object."""
  48. _check_not_importing_main()
  49. d = dict(
  50. log_to_stderr=util._log_to_stderr,
  51. authkey=bytes(process.current_process().authkey),
  52. name=name,
  53. sys_argv=sys.argv,
  54. orig_dir=process.ORIGINAL_DIR,
  55. dir=os.getcwd(),
  56. )
  57. # Send sys_path and make sure the current directory will not be changed
  58. d["sys_path"] = [p if p != "" else process.ORIGINAL_DIR for p in sys.path]
  59. # Make sure to pass the information if the multiprocessing logger is active
  60. if util._logger is not None:
  61. d["log_level"] = util._logger.getEffectiveLevel()
  62. if util._logger.handlers:
  63. h = util._logger.handlers[0]
  64. d["log_fmt"] = h.formatter._fmt
  65. # Tell the child how to communicate with the resource_tracker
  66. from .resource_tracker import _resource_tracker
  67. _resource_tracker.ensure_running()
  68. if sys.platform == "win32":
  69. d["tracker_fd"] = msvcrt.get_osfhandle(_resource_tracker._fd)
  70. else:
  71. d["tracker_fd"] = _resource_tracker._fd
  72. if os.name == "posix":
  73. # joblib/loky#242: allow loky processes to retrieve the resource
  74. # tracker of their parent in case the child processes depickles
  75. # shared_memory objects, that are still tracked by multiprocessing's
  76. # resource_tracker by default.
  77. # XXX: this is a workaround that may be error prone: in the future, it
  78. # would be better to have loky subclass multiprocessing's shared_memory
  79. # to force registration of shared_memory segments via loky's
  80. # resource_tracker.
  81. from multiprocessing.resource_tracker import (
  82. _resource_tracker as mp_resource_tracker,
  83. )
  84. # multiprocessing's resource_tracker must be running before loky
  85. # process is created (othewise the child won't be able to use it if it
  86. # is created later on)
  87. mp_resource_tracker.ensure_running()
  88. d["mp_tracker_fd"] = mp_resource_tracker._fd
  89. # Figure out whether to initialise main in the subprocess as a module
  90. # or through direct execution (or to leave it alone entirely)
  91. if init_main_module:
  92. main_module = sys.modules["__main__"]
  93. try:
  94. main_mod_name = getattr(main_module.__spec__, "name", None)
  95. except BaseException:
  96. main_mod_name = None
  97. if main_mod_name is not None:
  98. d["init_main_from_name"] = main_mod_name
  99. elif sys.platform != "win32" or (not WINEXE and not WINSERVICE):
  100. main_path = getattr(main_module, "__file__", None)
  101. if main_path is not None:
  102. if (
  103. not os.path.isabs(main_path)
  104. and process.ORIGINAL_DIR is not None
  105. ):
  106. main_path = os.path.join(process.ORIGINAL_DIR, main_path)
  107. d["init_main_from_path"] = os.path.normpath(main_path)
  108. return d
  109. #
  110. # Prepare current process
  111. #
  112. old_main_modules = []
  113. def prepare(data, parent_sentinel=None):
  114. """Try to get current process ready to unpickle process object."""
  115. if "name" in data:
  116. process.current_process().name = data["name"]
  117. if "authkey" in data:
  118. process.current_process().authkey = data["authkey"]
  119. if "log_to_stderr" in data and data["log_to_stderr"]:
  120. util.log_to_stderr()
  121. if "log_level" in data:
  122. util.get_logger().setLevel(data["log_level"])
  123. if "log_fmt" in data:
  124. import logging
  125. util.get_logger().handlers[0].setFormatter(
  126. logging.Formatter(data["log_fmt"])
  127. )
  128. if "sys_path" in data:
  129. sys.path = data["sys_path"]
  130. if "sys_argv" in data:
  131. sys.argv = data["sys_argv"]
  132. if "dir" in data:
  133. os.chdir(data["dir"])
  134. if "orig_dir" in data:
  135. process.ORIGINAL_DIR = data["orig_dir"]
  136. if "mp_tracker_fd" in data:
  137. from multiprocessing.resource_tracker import (
  138. _resource_tracker as mp_resource_tracker,
  139. )
  140. mp_resource_tracker._fd = data["mp_tracker_fd"]
  141. if "tracker_fd" in data:
  142. from .resource_tracker import _resource_tracker
  143. if sys.platform == "win32":
  144. handle = data["tracker_fd"]
  145. handle = duplicate(handle, source_process=parent_sentinel)
  146. _resource_tracker._fd = msvcrt.open_osfhandle(handle, os.O_RDONLY)
  147. else:
  148. _resource_tracker._fd = data["tracker_fd"]
  149. if "init_main_from_name" in data:
  150. _fixup_main_from_name(data["init_main_from_name"])
  151. elif "init_main_from_path" in data:
  152. _fixup_main_from_path(data["init_main_from_path"])
  153. # Multiprocessing module helpers to fix up the main module in
  154. # spawned subprocesses
  155. def _fixup_main_from_name(mod_name):
  156. # __main__.py files for packages, directories, zip archives, etc, run
  157. # their "main only" code unconditionally, so we don't even try to
  158. # populate anything in __main__, nor do we make any changes to
  159. # __main__ attributes
  160. current_main = sys.modules["__main__"]
  161. if mod_name == "__main__" or mod_name.endswith(".__main__"):
  162. return
  163. # If this process was forked, __main__ may already be populated
  164. if getattr(current_main.__spec__, "name", None) == mod_name:
  165. return
  166. # Otherwise, __main__ may contain some non-main code where we need to
  167. # support unpickling it properly. We rerun it as __mp_main__ and make
  168. # the normal __main__ an alias to that
  169. old_main_modules.append(current_main)
  170. main_module = types.ModuleType("__mp_main__")
  171. main_content = runpy.run_module(
  172. mod_name, run_name="__mp_main__", alter_sys=True
  173. )
  174. main_module.__dict__.update(main_content)
  175. sys.modules["__main__"] = sys.modules["__mp_main__"] = main_module
  176. def _fixup_main_from_path(main_path):
  177. # If this process was forked, __main__ may already be populated
  178. current_main = sys.modules["__main__"]
  179. # Unfortunately, the main ipython launch script historically had no
  180. # "if __name__ == '__main__'" guard, so we work around that
  181. # by treating it like a __main__.py file
  182. # See https://github.com/ipython/ipython/issues/4698
  183. main_name = os.path.splitext(os.path.basename(main_path))[0]
  184. if main_name == "ipython":
  185. return
  186. # Otherwise, if __file__ already has the setting we expect,
  187. # there's nothing more to do
  188. if getattr(current_main, "__file__", None) == main_path:
  189. return
  190. # If the parent process has sent a path through rather than a module
  191. # name we assume it is an executable script that may contain
  192. # non-main code that needs to be executed
  193. old_main_modules.append(current_main)
  194. main_module = types.ModuleType("__mp_main__")
  195. main_content = runpy.run_path(main_path, run_name="__mp_main__")
  196. main_module.__dict__.update(main_content)
  197. sys.modules["__main__"] = sys.modules["__mp_main__"] = main_module