_glob.py 2.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. # Copyright (c) Microsoft Corporation.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions#escaping
  15. escaped_chars = {"$", "^", "+", ".", "*", "(", ")", "|", "\\", "?", "{", "}", "[", "]"}
  16. def glob_to_regex_pattern(glob: str) -> str:
  17. tokens = ["^"]
  18. in_group = False
  19. i = 0
  20. while i < len(glob):
  21. c = glob[i]
  22. if c == "\\" and i + 1 < len(glob):
  23. char = glob[i + 1]
  24. tokens.append("\\" + char if char in escaped_chars else char)
  25. i += 1
  26. elif c == "*":
  27. char_before = glob[i - 1] if i > 0 else None
  28. star_count = 1
  29. while i + 1 < len(glob) and glob[i + 1] == "*":
  30. star_count += 1
  31. i += 1
  32. if star_count > 1:
  33. char_after = glob[i + 1] if i + 1 < len(glob) else None
  34. if char_after == "/":
  35. if char_before == "/":
  36. tokens.append("((.+/)|)")
  37. else:
  38. tokens.append("(.*/)")
  39. i += 1
  40. else:
  41. tokens.append("(.*)")
  42. else:
  43. tokens.append("([^/]*)")
  44. else:
  45. if c == "{":
  46. in_group = True
  47. tokens.append("(")
  48. elif c == "}":
  49. in_group = False
  50. tokens.append(")")
  51. elif c == ",":
  52. if in_group:
  53. tokens.append("|")
  54. else:
  55. tokens.append("\\" + c)
  56. else:
  57. tokens.append("\\" + c if c in escaped_chars else c)
  58. i += 1
  59. tokens.append("$")
  60. return "".join(tokens)