util.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600
  1. # encoding: utf-8
  2. """
  3. This module provides utility methods for dealing with path-specs.
  4. """
  5. import os
  6. import os.path
  7. import posixpath
  8. import stat
  9. from .compat import Collection, Iterable, string_types, unicode
  10. NORMALIZE_PATH_SEPS = [sep for sep in [os.sep, os.altsep] if sep and sep != posixpath.sep]
  11. """
  12. *NORMALIZE_PATH_SEPS* (:class:`list` of :class:`str`) contains the path
  13. separators that need to be normalized to the POSIX separator for the
  14. current operating system. The separators are determined by examining
  15. :data:`os.sep` and :data:`os.altsep`.
  16. """
  17. _registered_patterns = {}
  18. """
  19. *_registered_patterns* (:class:`dict`) maps a name (:class:`str`) to the
  20. registered pattern factory (:class:`~collections.abc.Callable`).
  21. """
  22. def detailed_match_files(patterns, files, all_matches=None):
  23. """
  24. Matches the files to the patterns, and returns which patterns matched
  25. the files.
  26. *patterns* (:class:`~collections.abc.Iterable` of :class:`~pathspec.pattern.Pattern`)
  27. contains the patterns to use.
  28. *files* (:class:`~collections.abc.Iterable` of :class:`str`) contains
  29. the normalized file paths to be matched against *patterns*.
  30. *all_matches* (:class:`boot` or :data:`None`) is whether to return all
  31. matches patterns (:data:`True`), or only the last matched pattern
  32. (:data:`False`). Default is :data:`None` for :data:`False`.
  33. Returns the matched files (:class:`dict`) which maps each matched file
  34. (:class:`str`) to the patterns that matched in order (:class:`.MatchDetail`).
  35. """
  36. all_files = files if isinstance(files, Collection) else list(files)
  37. return_files = {}
  38. for pattern in patterns:
  39. if pattern.include is not None:
  40. result_files = pattern.match(all_files)
  41. if pattern.include:
  42. # Add files and record pattern.
  43. for result_file in result_files:
  44. if result_file in return_files:
  45. if all_matches:
  46. return_files[result_file].patterns.append(pattern)
  47. else:
  48. return_files[result_file].patterns[0] = pattern
  49. else:
  50. return_files[result_file] = MatchDetail([pattern])
  51. else:
  52. # Remove files.
  53. for file in result_files:
  54. del return_files[file]
  55. return return_files
  56. def _is_iterable(value):
  57. """
  58. Check whether the value is an iterable (excludes strings).
  59. *value* is the value to check,
  60. Returns whether *value* is a iterable (:class:`bool`).
  61. """
  62. return isinstance(value, Iterable) and not isinstance(value, (unicode, bytes))
  63. def iter_tree_entries(root, on_error=None, follow_links=None):
  64. """
  65. Walks the specified directory for all files and directories.
  66. *root* (:class:`str`) is the root directory to search.
  67. *on_error* (:class:`~collections.abc.Callable` or :data:`None`)
  68. optionally is the error handler for file-system exceptions. It will be
  69. called with the exception (:exc:`OSError`). Reraise the exception to
  70. abort the walk. Default is :data:`None` to ignore file-system
  71. exceptions.
  72. *follow_links* (:class:`bool` or :data:`None`) optionally is whether
  73. to walk symbolic links that resolve to directories. Default is
  74. :data:`None` for :data:`True`.
  75. Raises :exc:`RecursionError` if recursion is detected.
  76. Returns an :class:`~collections.abc.Iterable` yielding each file or
  77. directory entry (:class:`.TreeEntry`) relative to *root*.
  78. """
  79. if on_error is not None and not callable(on_error):
  80. raise TypeError("on_error:{!r} is not callable.".format(on_error))
  81. if follow_links is None:
  82. follow_links = True
  83. for entry in _iter_tree_entries_next(os.path.abspath(root), '', {}, on_error, follow_links):
  84. yield entry
  85. def iter_tree_files(root, on_error=None, follow_links=None):
  86. """
  87. Walks the specified directory for all files.
  88. *root* (:class:`str`) is the root directory to search for files.
  89. *on_error* (:class:`~collections.abc.Callable` or :data:`None`)
  90. optionally is the error handler for file-system exceptions. It will be
  91. called with the exception (:exc:`OSError`). Reraise the exception to
  92. abort the walk. Default is :data:`None` to ignore file-system
  93. exceptions.
  94. *follow_links* (:class:`bool` or :data:`None`) optionally is whether
  95. to walk symbolic links that resolve to directories. Default is
  96. :data:`None` for :data:`True`.
  97. Raises :exc:`RecursionError` if recursion is detected.
  98. Returns an :class:`~collections.abc.Iterable` yielding the path to
  99. each file (:class:`str`) relative to *root*.
  100. """
  101. if on_error is not None and not callable(on_error):
  102. raise TypeError("on_error:{!r} is not callable.".format(on_error))
  103. if follow_links is None:
  104. follow_links = True
  105. for entry in _iter_tree_entries_next(os.path.abspath(root), '', {}, on_error, follow_links):
  106. if not entry.is_dir(follow_links):
  107. yield entry.path
  108. # Alias `iter_tree_files()` as `iter_tree()`.
  109. iter_tree = iter_tree_files
  110. def _iter_tree_entries_next(root_full, dir_rel, memo, on_error, follow_links):
  111. """
  112. Scan the directory for all descendant files.
  113. *root_full* (:class:`str`) the absolute path to the root directory.
  114. *dir_rel* (:class:`str`) the path to the directory to scan relative to
  115. *root_full*.
  116. *memo* (:class:`dict`) keeps track of ancestor directories
  117. encountered. Maps each ancestor real path (:class:`str`) to relative
  118. path (:class:`str`).
  119. *on_error* (:class:`~collections.abc.Callable` or :data:`None`)
  120. optionally is the error handler for file-system exceptions.
  121. *follow_links* (:class:`bool`) is whether to walk symbolic links that
  122. resolve to directories.
  123. Yields each entry (:class:`.TreeEntry`).
  124. """
  125. dir_full = os.path.join(root_full, dir_rel)
  126. dir_real = os.path.realpath(dir_full)
  127. # Remember each encountered ancestor directory and its canonical
  128. # (real) path. If a canonical path is encountered more than once,
  129. # recursion has occurred.
  130. if dir_real not in memo:
  131. memo[dir_real] = dir_rel
  132. else:
  133. raise RecursionError(real_path=dir_real, first_path=memo[dir_real], second_path=dir_rel)
  134. for node_name in os.listdir(dir_full):
  135. node_rel = os.path.join(dir_rel, node_name)
  136. node_full = os.path.join(root_full, node_rel)
  137. # Inspect child node.
  138. try:
  139. node_lstat = os.lstat(node_full)
  140. except OSError as e:
  141. if on_error is not None:
  142. on_error(e)
  143. continue
  144. if stat.S_ISLNK(node_lstat.st_mode):
  145. # Child node is a link, inspect the target node.
  146. is_link = True
  147. try:
  148. node_stat = os.stat(node_full)
  149. except OSError as e:
  150. if on_error is not None:
  151. on_error(e)
  152. continue
  153. else:
  154. is_link = False
  155. node_stat = node_lstat
  156. if stat.S_ISDIR(node_stat.st_mode) and (follow_links or not is_link):
  157. # Child node is a directory, recurse into it and yield its
  158. # descendant files.
  159. yield TreeEntry(node_name, node_rel, node_lstat, node_stat)
  160. for entry in _iter_tree_entries_next(root_full, node_rel, memo, on_error, follow_links):
  161. yield entry
  162. elif stat.S_ISREG(node_stat.st_mode) or is_link:
  163. # Child node is either a file or an unfollowed link, yield it.
  164. yield TreeEntry(node_name, node_rel, node_lstat, node_stat)
  165. # NOTE: Make sure to remove the canonical (real) path of the directory
  166. # from the ancestors memo once we are done with it. This allows the
  167. # same directory to appear multiple times. If this is not done, the
  168. # second occurrence of the directory will be incorrectly interpreted
  169. # as a recursion. See <https://github.com/cpburnz/python-path-specification/pull/7>.
  170. del memo[dir_real]
  171. def lookup_pattern(name):
  172. """
  173. Lookups a registered pattern factory by name.
  174. *name* (:class:`str`) is the name of the pattern factory.
  175. Returns the registered pattern factory (:class:`~collections.abc.Callable`).
  176. If no pattern factory is registered, raises :exc:`KeyError`.
  177. """
  178. return _registered_patterns[name]
  179. def match_file(patterns, file):
  180. """
  181. Matches the file to the patterns.
  182. *patterns* (:class:`~collections.abc.Iterable` of :class:`~pathspec.pattern.Pattern`)
  183. contains the patterns to use.
  184. *file* (:class:`str`) is the normalized file path to be matched
  185. against *patterns*.
  186. Returns :data:`True` if *file* matched; otherwise, :data:`False`.
  187. """
  188. matched = False
  189. for pattern in patterns:
  190. if pattern.include is not None:
  191. if file in pattern.match((file,)):
  192. matched = pattern.include
  193. return matched
  194. def match_files(patterns, files):
  195. """
  196. Matches the files to the patterns.
  197. *patterns* (:class:`~collections.abc.Iterable` of :class:`~pathspec.pattern.Pattern`)
  198. contains the patterns to use.
  199. *files* (:class:`~collections.abc.Iterable` of :class:`str`) contains
  200. the normalized file paths to be matched against *patterns*.
  201. Returns the matched files (:class:`set` of :class:`str`).
  202. """
  203. all_files = files if isinstance(files, Collection) else list(files)
  204. return_files = set()
  205. for pattern in patterns:
  206. if pattern.include is not None:
  207. result_files = pattern.match(all_files)
  208. if pattern.include:
  209. return_files.update(result_files)
  210. else:
  211. return_files.difference_update(result_files)
  212. return return_files
  213. def _normalize_entries(entries, separators=None):
  214. """
  215. Normalizes the entry paths to use the POSIX path separator.
  216. *entries* (:class:`~collections.abc.Iterable` of :class:`.TreeEntry`)
  217. contains the entries to be normalized.
  218. *separators* (:class:`~collections.abc.Collection` of :class:`str`; or
  219. :data:`None`) optionally contains the path separators to normalize.
  220. See :func:`normalize_file` for more information.
  221. Returns a :class:`dict` mapping the each normalized file path (:class:`str`)
  222. to the entry (:class:`.TreeEntry`)
  223. """
  224. norm_files = {}
  225. for entry in entries:
  226. norm_files[normalize_file(entry.path, separators=separators)] = entry
  227. return norm_files
  228. def normalize_file(file, separators=None):
  229. """
  230. Normalizes the file path to use the POSIX path separator (i.e., ``'/'``).
  231. *file* (:class:`str` or :class:`pathlib.PurePath`) is the file path.
  232. *separators* (:class:`~collections.abc.Collection` of :class:`str`; or
  233. :data:`None`) optionally contains the path separators to normalize.
  234. This does not need to include the POSIX path separator (``'/'``), but
  235. including it will not affect the results. Default is :data:`None` for
  236. :data:`NORMALIZE_PATH_SEPS`. To prevent normalization, pass an empty
  237. container (e.g., an empty tuple ``()``).
  238. Returns the normalized file path (:class:`str`).
  239. """
  240. # Normalize path separators.
  241. if separators is None:
  242. separators = NORMALIZE_PATH_SEPS
  243. # Convert path object to string.
  244. norm_file = str(file)
  245. for sep in separators:
  246. norm_file = norm_file.replace(sep, posixpath.sep)
  247. # Remove current directory prefix.
  248. if norm_file.startswith('./'):
  249. norm_file = norm_file[2:]
  250. return norm_file
  251. def normalize_files(files, separators=None):
  252. """
  253. Normalizes the file paths to use the POSIX path separator.
  254. *files* (:class:`~collections.abc.Iterable` of :class:`str` or
  255. :class:`pathlib.PurePath`) contains the file paths to be normalized.
  256. *separators* (:class:`~collections.abc.Collection` of :class:`str`; or
  257. :data:`None`) optionally contains the path separators to normalize.
  258. See :func:`normalize_file` for more information.
  259. Returns a :class:`dict` mapping the each normalized file path (:class:`str`)
  260. to the original file path (:class:`str`)
  261. """
  262. norm_files = {}
  263. for path in files:
  264. norm_files[normalize_file(path, separators=separators)] = path
  265. return norm_files
  266. def register_pattern(name, pattern_factory, override=None):
  267. """
  268. Registers the specified pattern factory.
  269. *name* (:class:`str`) is the name to register the pattern factory
  270. under.
  271. *pattern_factory* (:class:`~collections.abc.Callable`) is used to
  272. compile patterns. It must accept an uncompiled pattern (:class:`str`)
  273. and return the compiled pattern (:class:`.Pattern`).
  274. *override* (:class:`bool` or :data:`None`) optionally is whether to
  275. allow overriding an already registered pattern under the same name
  276. (:data:`True`), instead of raising an :exc:`AlreadyRegisteredError`
  277. (:data:`False`). Default is :data:`None` for :data:`False`.
  278. """
  279. if not isinstance(name, string_types):
  280. raise TypeError("name:{!r} is not a string.".format(name))
  281. if not callable(pattern_factory):
  282. raise TypeError("pattern_factory:{!r} is not callable.".format(pattern_factory))
  283. if name in _registered_patterns and not override:
  284. raise AlreadyRegisteredError(name, _registered_patterns[name])
  285. _registered_patterns[name] = pattern_factory
  286. class AlreadyRegisteredError(Exception):
  287. """
  288. The :exc:`AlreadyRegisteredError` exception is raised when a pattern
  289. factory is registered under a name already in use.
  290. """
  291. def __init__(self, name, pattern_factory):
  292. """
  293. Initializes the :exc:`AlreadyRegisteredError` instance.
  294. *name* (:class:`str`) is the name of the registered pattern.
  295. *pattern_factory* (:class:`~collections.abc.Callable`) is the
  296. registered pattern factory.
  297. """
  298. super(AlreadyRegisteredError, self).__init__(name, pattern_factory)
  299. @property
  300. def message(self):
  301. """
  302. *message* (:class:`str`) is the error message.
  303. """
  304. return "{name!r} is already registered for pattern factory:{pattern_factory!r}.".format(
  305. name=self.name,
  306. pattern_factory=self.pattern_factory,
  307. )
  308. @property
  309. def name(self):
  310. """
  311. *name* (:class:`str`) is the name of the registered pattern.
  312. """
  313. return self.args[0]
  314. @property
  315. def pattern_factory(self):
  316. """
  317. *pattern_factory* (:class:`~collections.abc.Callable`) is the
  318. registered pattern factory.
  319. """
  320. return self.args[1]
  321. class RecursionError(Exception):
  322. """
  323. The :exc:`RecursionError` exception is raised when recursion is
  324. detected.
  325. """
  326. def __init__(self, real_path, first_path, second_path):
  327. """
  328. Initializes the :exc:`RecursionError` instance.
  329. *real_path* (:class:`str`) is the real path that recursion was
  330. encountered on.
  331. *first_path* (:class:`str`) is the first path encountered for
  332. *real_path*.
  333. *second_path* (:class:`str`) is the second path encountered for
  334. *real_path*.
  335. """
  336. super(RecursionError, self).__init__(real_path, first_path, second_path)
  337. @property
  338. def first_path(self):
  339. """
  340. *first_path* (:class:`str`) is the first path encountered for
  341. :attr:`self.real_path <RecursionError.real_path>`.
  342. """
  343. return self.args[1]
  344. @property
  345. def message(self):
  346. """
  347. *message* (:class:`str`) is the error message.
  348. """
  349. return "Real path {real!r} was encountered at {first!r} and then {second!r}.".format(
  350. real=self.real_path,
  351. first=self.first_path,
  352. second=self.second_path,
  353. )
  354. @property
  355. def real_path(self):
  356. """
  357. *real_path* (:class:`str`) is the real path that recursion was
  358. encountered on.
  359. """
  360. return self.args[0]
  361. @property
  362. def second_path(self):
  363. """
  364. *second_path* (:class:`str`) is the second path encountered for
  365. :attr:`self.real_path <RecursionError.real_path>`.
  366. """
  367. return self.args[2]
  368. class MatchDetail(object):
  369. """
  370. The :class:`.MatchDetail` class contains information about
  371. """
  372. #: Make the class dict-less.
  373. __slots__ = ('patterns',)
  374. def __init__(self, patterns):
  375. """
  376. Initialize the :class:`.MatchDetail` instance.
  377. *patterns* (:class:`~collections.abc.Sequence` of :class:`~pathspec.pattern.Pattern`)
  378. contains the patterns that matched the file in the order they were
  379. encountered.
  380. """
  381. self.patterns = patterns
  382. """
  383. *patterns* (:class:`~collections.abc.Sequence` of :class:`~pathspec.pattern.Pattern`)
  384. contains the patterns that matched the file in the order they were
  385. encountered.
  386. """
  387. class TreeEntry(object):
  388. """
  389. The :class:`.TreeEntry` class contains information about a file-system
  390. entry.
  391. """
  392. #: Make the class dict-less.
  393. __slots__ = ('_lstat', 'name', 'path', '_stat')
  394. def __init__(self, name, path, lstat, stat):
  395. """
  396. Initialize the :class:`.TreeEntry` instance.
  397. *name* (:class:`str`) is the base name of the entry.
  398. *path* (:class:`str`) is the relative path of the entry.
  399. *lstat* (:class:`~os.stat_result`) is the stat result of the direct
  400. entry.
  401. *stat* (:class:`~os.stat_result`) is the stat result of the entry,
  402. potentially linked.
  403. """
  404. self._lstat = lstat
  405. """
  406. *_lstat* (:class:`~os.stat_result`) is the stat result of the direct
  407. entry.
  408. """
  409. self.name = name
  410. """
  411. *name* (:class:`str`) is the base name of the entry.
  412. """
  413. self.path = path
  414. """
  415. *path* (:class:`str`) is the path of the entry.
  416. """
  417. self._stat = stat
  418. """
  419. *_stat* (:class:`~os.stat_result`) is the stat result of the linked
  420. entry.
  421. """
  422. def is_dir(self, follow_links=None):
  423. """
  424. Get whether the entry is a directory.
  425. *follow_links* (:class:`bool` or :data:`None`) is whether to follow
  426. symbolic links. If this is :data:`True`, a symlink to a directory
  427. will result in :data:`True`. Default is :data:`None` for :data:`True`.
  428. Returns whether the entry is a directory (:class:`bool`).
  429. """
  430. if follow_links is None:
  431. follow_links = True
  432. node_stat = self._stat if follow_links else self._lstat
  433. return stat.S_ISDIR(node_stat.st_mode)
  434. def is_file(self, follow_links=None):
  435. """
  436. Get whether the entry is a regular file.
  437. *follow_links* (:class:`bool` or :data:`None`) is whether to follow
  438. symbolic links. If this is :data:`True`, a symlink to a regular file
  439. will result in :data:`True`. Default is :data:`None` for :data:`True`.
  440. Returns whether the entry is a regular file (:class:`bool`).
  441. """
  442. if follow_links is None:
  443. follow_links = True
  444. node_stat = self._stat if follow_links else self._lstat
  445. return stat.S_ISREG(node_stat.st_mode)
  446. def is_symlink(self):
  447. """
  448. Returns whether the entry is a symbolic link (:class:`bool`).
  449. """
  450. return stat.S_ISLNK(self._lstat.st_mode)
  451. def stat(self, follow_links=None):
  452. """
  453. Get the cached stat result for the entry.
  454. *follow_links* (:class:`bool` or :data:`None`) is whether to follow
  455. symbolic links. If this is :data:`True`, the stat result of the
  456. linked file will be returned. Default is :data:`None` for :data:`True`.
  457. Returns that stat result (:class:`~os.stat_result`).
  458. """
  459. if follow_links is None:
  460. follow_links = True
  461. return self._stat if follow_links else self._lstat