cache.py 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. import time
  2. import os
  3. import sys
  4. import hashlib
  5. import gc
  6. import shutil
  7. import platform
  8. import logging
  9. import warnings
  10. import pickle
  11. from pathlib import Path
  12. from typing import Dict, Any
  13. LOG = logging.getLogger(__name__)
  14. _CACHED_FILE_MINIMUM_SURVIVAL = 60 * 10 # 10 minutes
  15. """
  16. Cached files should survive at least a few minutes.
  17. """
  18. _CACHED_FILE_MAXIMUM_SURVIVAL = 60 * 60 * 24 * 30
  19. """
  20. Maximum time for a cached file to survive if it is not
  21. accessed within.
  22. """
  23. _CACHED_SIZE_TRIGGER = 600
  24. """
  25. This setting limits the amount of cached files. It's basically a way to start
  26. garbage collection.
  27. The reasoning for this limit being as big as it is, is the following:
  28. Numpy, Pandas, Matplotlib and Tensorflow together use about 500 files. This
  29. makes Jedi use ~500mb of memory. Since we might want a bit more than those few
  30. libraries, we just increase it a bit.
  31. """
  32. _PICKLE_VERSION = 33
  33. """
  34. Version number (integer) for file system cache.
  35. Increment this number when there are any incompatible changes in
  36. the parser tree classes. For example, the following changes
  37. are regarded as incompatible.
  38. - A class name is changed.
  39. - A class is moved to another module.
  40. - A __slot__ of a class is changed.
  41. """
  42. _VERSION_TAG = '%s-%s%s-%s' % (
  43. platform.python_implementation(),
  44. sys.version_info[0],
  45. sys.version_info[1],
  46. _PICKLE_VERSION
  47. )
  48. """
  49. Short name for distinguish Python implementations and versions.
  50. It's a bit similar to `sys.implementation.cache_tag`.
  51. See: http://docs.python.org/3/library/sys.html#sys.implementation
  52. """
  53. def _get_default_cache_path():
  54. if platform.system().lower() == 'windows':
  55. dir_ = Path(os.getenv('LOCALAPPDATA') or '~', 'Parso', 'Parso')
  56. elif platform.system().lower() == 'darwin':
  57. dir_ = Path('~', 'Library', 'Caches', 'Parso')
  58. else:
  59. dir_ = Path(os.getenv('XDG_CACHE_HOME') or '~/.cache', 'parso')
  60. return dir_.expanduser()
  61. _default_cache_path = _get_default_cache_path()
  62. """
  63. The path where the cache is stored.
  64. On Linux, this defaults to ``~/.cache/parso/``, on OS X to
  65. ``~/Library/Caches/Parso/`` and on Windows to ``%LOCALAPPDATA%\\Parso\\Parso\\``.
  66. On Linux, if environment variable ``$XDG_CACHE_HOME`` is set,
  67. ``$XDG_CACHE_HOME/parso`` is used instead of the default one.
  68. """
  69. _CACHE_CLEAR_THRESHOLD = 60 * 60 * 24
  70. def _get_cache_clear_lock_path(cache_path=None):
  71. """
  72. The path where the cache lock is stored.
  73. Cache lock will prevent continous cache clearing and only allow garbage
  74. collection once a day (can be configured in _CACHE_CLEAR_THRESHOLD).
  75. """
  76. cache_path = cache_path or _default_cache_path
  77. return cache_path.joinpath("PARSO-CACHE-LOCK")
  78. parser_cache: Dict[str, Any] = {}
  79. class _NodeCacheItem:
  80. def __init__(self, node, lines, change_time=None):
  81. self.node = node
  82. self.lines = lines
  83. if change_time is None:
  84. change_time = time.time()
  85. self.change_time = change_time
  86. self.last_used = change_time
  87. def load_module(hashed_grammar, file_io, cache_path=None):
  88. """
  89. Returns a module or None, if it fails.
  90. """
  91. p_time = file_io.get_last_modified()
  92. if p_time is None:
  93. return None
  94. try:
  95. module_cache_item = parser_cache[hashed_grammar][file_io.path]
  96. if p_time <= module_cache_item.change_time:
  97. module_cache_item.last_used = time.time()
  98. return module_cache_item.node
  99. except KeyError:
  100. return _load_from_file_system(
  101. hashed_grammar,
  102. file_io.path,
  103. p_time,
  104. cache_path=cache_path
  105. )
  106. def _load_from_file_system(hashed_grammar, path, p_time, cache_path=None):
  107. cache_path = _get_hashed_path(hashed_grammar, path, cache_path=cache_path)
  108. try:
  109. if p_time > os.path.getmtime(cache_path):
  110. # Cache is outdated
  111. return None
  112. with open(cache_path, 'rb') as f:
  113. gc.disable()
  114. try:
  115. module_cache_item = pickle.load(f)
  116. finally:
  117. gc.enable()
  118. except FileNotFoundError:
  119. return None
  120. else:
  121. _set_cache_item(hashed_grammar, path, module_cache_item)
  122. LOG.debug('pickle loaded: %s', path)
  123. return module_cache_item.node
  124. def _set_cache_item(hashed_grammar, path, module_cache_item):
  125. if sum(len(v) for v in parser_cache.values()) >= _CACHED_SIZE_TRIGGER:
  126. # Garbage collection of old cache files.
  127. # We are basically throwing everything away that hasn't been accessed
  128. # in 10 minutes.
  129. cutoff_time = time.time() - _CACHED_FILE_MINIMUM_SURVIVAL
  130. for key, path_to_item_map in parser_cache.items():
  131. parser_cache[key] = {
  132. path: node_item
  133. for path, node_item in path_to_item_map.items()
  134. if node_item.last_used > cutoff_time
  135. }
  136. parser_cache.setdefault(hashed_grammar, {})[path] = module_cache_item
  137. def try_to_save_module(hashed_grammar, file_io, module, lines, pickling=True, cache_path=None):
  138. path = file_io.path
  139. try:
  140. p_time = None if path is None else file_io.get_last_modified()
  141. except OSError:
  142. p_time = None
  143. pickling = False
  144. item = _NodeCacheItem(module, lines, p_time)
  145. _set_cache_item(hashed_grammar, path, item)
  146. if pickling and path is not None:
  147. try:
  148. _save_to_file_system(hashed_grammar, path, item, cache_path=cache_path)
  149. except PermissionError:
  150. # It's not really a big issue if the cache cannot be saved to the
  151. # file system. It's still in RAM in that case. However we should
  152. # still warn the user that this is happening.
  153. warnings.warn(
  154. 'Tried to save a file to %s, but got permission denied.' % path,
  155. Warning
  156. )
  157. else:
  158. _remove_cache_and_update_lock(cache_path=cache_path)
  159. def _save_to_file_system(hashed_grammar, path, item, cache_path=None):
  160. with open(_get_hashed_path(hashed_grammar, path, cache_path=cache_path), 'wb') as f:
  161. pickle.dump(item, f, pickle.HIGHEST_PROTOCOL)
  162. def clear_cache(cache_path=None):
  163. if cache_path is None:
  164. cache_path = _default_cache_path
  165. shutil.rmtree(cache_path)
  166. parser_cache.clear()
  167. def clear_inactive_cache(
  168. cache_path=None,
  169. inactivity_threshold=_CACHED_FILE_MAXIMUM_SURVIVAL,
  170. ):
  171. if cache_path is None:
  172. cache_path = _default_cache_path
  173. if not cache_path.exists():
  174. return False
  175. for dirname in os.listdir(cache_path):
  176. version_path = cache_path.joinpath(dirname)
  177. if not version_path.is_dir():
  178. continue
  179. for file in os.scandir(version_path):
  180. if file.stat().st_atime + _CACHED_FILE_MAXIMUM_SURVIVAL <= time.time():
  181. try:
  182. os.remove(file.path)
  183. except OSError: # silently ignore all failures
  184. continue
  185. else:
  186. return True
  187. def _touch(path):
  188. try:
  189. os.utime(path, None)
  190. except FileNotFoundError:
  191. try:
  192. file = open(path, 'a')
  193. file.close()
  194. except (OSError, IOError): # TODO Maybe log this?
  195. return False
  196. return True
  197. def _remove_cache_and_update_lock(cache_path=None):
  198. lock_path = _get_cache_clear_lock_path(cache_path=cache_path)
  199. try:
  200. clear_lock_time = os.path.getmtime(lock_path)
  201. except FileNotFoundError:
  202. clear_lock_time = None
  203. if (
  204. clear_lock_time is None # first time
  205. or clear_lock_time + _CACHE_CLEAR_THRESHOLD <= time.time()
  206. ):
  207. if not _touch(lock_path):
  208. # First make sure that as few as possible other cleanup jobs also
  209. # get started. There is still a race condition but it's probably
  210. # not a big problem.
  211. return False
  212. clear_inactive_cache(cache_path=cache_path)
  213. def _get_hashed_path(hashed_grammar, path, cache_path=None):
  214. directory = _get_cache_directory_path(cache_path=cache_path)
  215. file_hash = hashlib.sha256(str(path).encode("utf-8")).hexdigest()
  216. return os.path.join(directory, '%s-%s.pkl' % (hashed_grammar, file_hash))
  217. def _get_cache_directory_path(cache_path=None):
  218. if cache_path is None:
  219. cache_path = _default_cache_path
  220. directory = cache_path.joinpath(_VERSION_TAG)
  221. if not directory.exists():
  222. os.makedirs(directory)
  223. return directory