glob.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. import os
  2. import re
  3. from .compat.py313 import legacy_end_marker
  4. _default_seps = os.sep + str(os.altsep) * bool(os.altsep)
  5. class Translator:
  6. """
  7. >>> Translator('xyz')
  8. Traceback (most recent call last):
  9. ...
  10. AssertionError: Invalid separators
  11. >>> Translator('')
  12. Traceback (most recent call last):
  13. ...
  14. AssertionError: Invalid separators
  15. """
  16. seps: str
  17. def __init__(self, seps: str = _default_seps):
  18. assert seps and set(seps) <= set(_default_seps), "Invalid separators"
  19. self.seps = seps
  20. def translate(self, pattern):
  21. """
  22. Given a glob pattern, produce a regex that matches it.
  23. """
  24. return self.extend(self.match_dirs(self.translate_core(pattern)))
  25. @legacy_end_marker
  26. def extend(self, pattern):
  27. r"""
  28. Extend regex for pattern-wide concerns.
  29. Apply '(?s:)' to create a non-matching group that
  30. matches newlines (valid on Unix).
  31. Append '\z' to imply fullmatch even when match is used.
  32. """
  33. return rf'(?s:{pattern})\z'
  34. def match_dirs(self, pattern):
  35. """
  36. Ensure that zipfile.Path directory names are matched.
  37. zipfile.Path directory names always end in a slash.
  38. """
  39. return rf'{pattern}[/]?'
  40. def translate_core(self, pattern):
  41. r"""
  42. Given a glob pattern, produce a regex that matches it.
  43. >>> t = Translator()
  44. >>> t.translate_core('*.txt').replace('\\\\', '')
  45. '[^/]*\\.txt'
  46. >>> t.translate_core('a?txt')
  47. 'a[^/]txt'
  48. >>> t.translate_core('**/*').replace('\\\\', '')
  49. '.*/[^/][^/]*'
  50. """
  51. self.restrict_rglob(pattern)
  52. return ''.join(map(self.replace, separate(self.star_not_empty(pattern))))
  53. def replace(self, match):
  54. """
  55. Perform the replacements for a match from :func:`separate`.
  56. """
  57. return match.group('set') or (
  58. re.escape(match.group(0))
  59. .replace('\\*\\*', r'.*')
  60. .replace('\\*', rf'[^{re.escape(self.seps)}]*')
  61. .replace('\\?', r'[^/]')
  62. )
  63. def restrict_rglob(self, pattern):
  64. """
  65. Raise ValueError if ** appears in anything but a full path segment.
  66. >>> Translator().translate('**foo')
  67. Traceback (most recent call last):
  68. ...
  69. ValueError: ** must appear alone in a path segment
  70. """
  71. seps_pattern = rf'[{re.escape(self.seps)}]+'
  72. segments = re.split(seps_pattern, pattern)
  73. if any('**' in segment and segment != '**' for segment in segments):
  74. raise ValueError("** must appear alone in a path segment")
  75. def star_not_empty(self, pattern):
  76. """
  77. Ensure that * will not match an empty segment.
  78. """
  79. def handle_segment(match):
  80. segment = match.group(0)
  81. return '?*' if segment == '*' else segment
  82. not_seps_pattern = rf'[^{re.escape(self.seps)}]+'
  83. return re.sub(not_seps_pattern, handle_segment, pattern)
  84. def separate(pattern):
  85. """
  86. Separate out character sets to avoid translating their contents.
  87. >>> [m.group(0) for m in separate('*.txt')]
  88. ['*.txt']
  89. >>> [m.group(0) for m in separate('a[?]txt')]
  90. ['a', '[?]', 'txt']
  91. """
  92. return re.finditer(r'([^\[]+)|(?P<set>[\[].*?[\]])|([\[][^\]]*$)', pattern)