| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275 |
- import time
- import os
- import sys
- import hashlib
- import gc
- import shutil
- import platform
- import logging
- import warnings
- import pickle
- from pathlib import Path
- from typing import Dict, Any
- LOG = logging.getLogger(__name__)
- _CACHED_FILE_MINIMUM_SURVIVAL = 60 * 10 # 10 minutes
- """
- Cached files should survive at least a few minutes.
- """
- _CACHED_FILE_MAXIMUM_SURVIVAL = 60 * 60 * 24 * 30
- """
- Maximum time for a cached file to survive if it is not
- accessed within.
- """
- _CACHED_SIZE_TRIGGER = 600
- """
- This setting limits the amount of cached files. It's basically a way to start
- garbage collection.
- The reasoning for this limit being as big as it is, is the following:
- Numpy, Pandas, Matplotlib and Tensorflow together use about 500 files. This
- makes Jedi use ~500mb of memory. Since we might want a bit more than those few
- libraries, we just increase it a bit.
- """
- _PICKLE_VERSION = 33
- """
- Version number (integer) for file system cache.
- Increment this number when there are any incompatible changes in
- the parser tree classes. For example, the following changes
- are regarded as incompatible.
- - A class name is changed.
- - A class is moved to another module.
- - A __slot__ of a class is changed.
- """
- _VERSION_TAG = '%s-%s%s-%s' % (
- platform.python_implementation(),
- sys.version_info[0],
- sys.version_info[1],
- _PICKLE_VERSION
- )
- """
- Short name for distinguish Python implementations and versions.
- It's a bit similar to `sys.implementation.cache_tag`.
- See: http://docs.python.org/3/library/sys.html#sys.implementation
- """
- def _get_default_cache_path():
- if platform.system().lower() == 'windows':
- dir_ = Path(os.getenv('LOCALAPPDATA') or '~', 'Parso', 'Parso')
- elif platform.system().lower() == 'darwin':
- dir_ = Path('~', 'Library', 'Caches', 'Parso')
- else:
- dir_ = Path(os.getenv('XDG_CACHE_HOME') or '~/.cache', 'parso')
- return dir_.expanduser()
- _default_cache_path = _get_default_cache_path()
- """
- The path where the cache is stored.
- On Linux, this defaults to ``~/.cache/parso/``, on OS X to
- ``~/Library/Caches/Parso/`` and on Windows to ``%LOCALAPPDATA%\\Parso\\Parso\\``.
- On Linux, if environment variable ``$XDG_CACHE_HOME`` is set,
- ``$XDG_CACHE_HOME/parso`` is used instead of the default one.
- """
- _CACHE_CLEAR_THRESHOLD = 60 * 60 * 24
- def _get_cache_clear_lock_path(cache_path=None):
- """
- The path where the cache lock is stored.
- Cache lock will prevent continous cache clearing and only allow garbage
- collection once a day (can be configured in _CACHE_CLEAR_THRESHOLD).
- """
- cache_path = cache_path or _default_cache_path
- return cache_path.joinpath("PARSO-CACHE-LOCK")
- parser_cache: Dict[str, Any] = {}
- class _NodeCacheItem:
- def __init__(self, node, lines, change_time=None):
- self.node = node
- self.lines = lines
- if change_time is None:
- change_time = time.time()
- self.change_time = change_time
- self.last_used = change_time
- def load_module(hashed_grammar, file_io, cache_path=None):
- """
- Returns a module or None, if it fails.
- """
- p_time = file_io.get_last_modified()
- if p_time is None:
- return None
- try:
- module_cache_item = parser_cache[hashed_grammar][file_io.path]
- if p_time <= module_cache_item.change_time:
- module_cache_item.last_used = time.time()
- return module_cache_item.node
- except KeyError:
- return _load_from_file_system(
- hashed_grammar,
- file_io.path,
- p_time,
- cache_path=cache_path
- )
- def _load_from_file_system(hashed_grammar, path, p_time, cache_path=None):
- cache_path = _get_hashed_path(hashed_grammar, path, cache_path=cache_path)
- try:
- if p_time > os.path.getmtime(cache_path):
- # Cache is outdated
- return None
- with open(cache_path, 'rb') as f:
- gc.disable()
- try:
- module_cache_item = pickle.load(f)
- finally:
- gc.enable()
- except FileNotFoundError:
- return None
- else:
- _set_cache_item(hashed_grammar, path, module_cache_item)
- LOG.debug('pickle loaded: %s', path)
- return module_cache_item.node
- def _set_cache_item(hashed_grammar, path, module_cache_item):
- if sum(len(v) for v in parser_cache.values()) >= _CACHED_SIZE_TRIGGER:
- # Garbage collection of old cache files.
- # We are basically throwing everything away that hasn't been accessed
- # in 10 minutes.
- cutoff_time = time.time() - _CACHED_FILE_MINIMUM_SURVIVAL
- for key, path_to_item_map in parser_cache.items():
- parser_cache[key] = {
- path: node_item
- for path, node_item in path_to_item_map.items()
- if node_item.last_used > cutoff_time
- }
- parser_cache.setdefault(hashed_grammar, {})[path] = module_cache_item
- def try_to_save_module(hashed_grammar, file_io, module, lines, pickling=True, cache_path=None):
- path = file_io.path
- try:
- p_time = None if path is None else file_io.get_last_modified()
- except OSError:
- p_time = None
- pickling = False
- item = _NodeCacheItem(module, lines, p_time)
- _set_cache_item(hashed_grammar, path, item)
- if pickling and path is not None:
- try:
- _save_to_file_system(hashed_grammar, path, item, cache_path=cache_path)
- except PermissionError:
- # It's not really a big issue if the cache cannot be saved to the
- # file system. It's still in RAM in that case. However we should
- # still warn the user that this is happening.
- warnings.warn(
- 'Tried to save a file to %s, but got permission denied.' % path,
- Warning
- )
- else:
- _remove_cache_and_update_lock(cache_path=cache_path)
- def _save_to_file_system(hashed_grammar, path, item, cache_path=None):
- with open(_get_hashed_path(hashed_grammar, path, cache_path=cache_path), 'wb') as f:
- pickle.dump(item, f, pickle.HIGHEST_PROTOCOL)
- def clear_cache(cache_path=None):
- if cache_path is None:
- cache_path = _default_cache_path
- shutil.rmtree(cache_path)
- parser_cache.clear()
- def clear_inactive_cache(
- cache_path=None,
- inactivity_threshold=_CACHED_FILE_MAXIMUM_SURVIVAL,
- ):
- if cache_path is None:
- cache_path = _default_cache_path
- if not cache_path.exists():
- return False
- for dirname in os.listdir(cache_path):
- version_path = cache_path.joinpath(dirname)
- if not version_path.is_dir():
- continue
- for file in os.scandir(version_path):
- if file.stat().st_atime + _CACHED_FILE_MAXIMUM_SURVIVAL <= time.time():
- try:
- os.remove(file.path)
- except OSError: # silently ignore all failures
- continue
- else:
- return True
- def _touch(path):
- try:
- os.utime(path, None)
- except FileNotFoundError:
- try:
- file = open(path, 'a')
- file.close()
- except (OSError, IOError): # TODO Maybe log this?
- return False
- return True
- def _remove_cache_and_update_lock(cache_path=None):
- lock_path = _get_cache_clear_lock_path(cache_path=cache_path)
- try:
- clear_lock_time = os.path.getmtime(lock_path)
- except FileNotFoundError:
- clear_lock_time = None
- if (
- clear_lock_time is None # first time
- or clear_lock_time + _CACHE_CLEAR_THRESHOLD <= time.time()
- ):
- if not _touch(lock_path):
- # First make sure that as few as possible other cleanup jobs also
- # get started. There is still a race condition but it's probably
- # not a big problem.
- return False
- clear_inactive_cache(cache_path=cache_path)
- def _get_hashed_path(hashed_grammar, path, cache_path=None):
- directory = _get_cache_directory_path(cache_path=cache_path)
- file_hash = hashlib.sha256(str(path).encode("utf-8")).hexdigest()
- return os.path.join(directory, '%s-%s.pkl' % (hashed_grammar, file_hash))
- def _get_cache_directory_path(cache_path=None):
- if cache_path is None:
- cache_path = _default_cache_path
- directory = cache_path.joinpath(_VERSION_TAG)
- if not directory.exists():
- os.makedirs(directory)
- return directory
|