parsing.py 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. import keyword
  2. import warnings
  3. from typing import List, Optional, Set, Tuple, Union
  4. from einops import EinopsError
  5. _ellipsis: str = "…" # NB, this is a single unicode symbol. String is used as it is not a list, but can be iterated
  6. class AnonymousAxis:
  7. """Important thing: all instances of this class are not equal to each other"""
  8. def __init__(self, value: str):
  9. self.value = int(value)
  10. if self.value <= 1:
  11. if self.value == 1:
  12. raise EinopsError("No need to create anonymous axis of length 1. Report this as an issue")
  13. else:
  14. raise EinopsError(f"Anonymous axis should have positive length, not {self.value}")
  15. def __repr__(self):
  16. return f"{str(self.value)}-axis"
  17. class ParsedExpression:
  18. """
  19. non-mutable structure that contains information about one side of expression (e.g. 'b c (h w)')
  20. and keeps some information important for downstream
  21. """
  22. def __init__(self, expression: str, *, allow_underscore: bool = False, allow_duplicates: bool = False):
  23. self.has_ellipsis: bool = False
  24. self.has_ellipsis_parenthesized: Optional[bool] = None
  25. self.identifiers: Set[str] = set()
  26. # that's axes like 2, 3, 4 or 5. Axes with size 1 are exceptional and replaced with empty composition
  27. self.has_non_unitary_anonymous_axes: bool = False
  28. # composition keeps structure of composite axes, see how different corner cases are handled in tests
  29. self.composition: List[Union[List[str], str]] = []
  30. if "." in expression:
  31. if "..." not in expression:
  32. raise EinopsError("Expression may contain dots only inside ellipsis (...)")
  33. if str.count(expression, "...") != 1 or str.count(expression, ".") != 3:
  34. raise EinopsError(
  35. "Expression may contain dots only inside ellipsis (...); only one ellipsis for tensor "
  36. )
  37. expression = expression.replace("...", _ellipsis)
  38. self.has_ellipsis = True
  39. bracket_group: Optional[List[str]] = None
  40. def add_axis_name(x):
  41. if x in self.identifiers:
  42. if not (allow_underscore and x == "_") and not allow_duplicates:
  43. raise EinopsError(f'Indexing expression contains duplicate dimension "{x}"')
  44. if x == _ellipsis:
  45. self.identifiers.add(_ellipsis)
  46. if bracket_group is None:
  47. self.composition.append(_ellipsis)
  48. self.has_ellipsis_parenthesized = False
  49. else:
  50. bracket_group.append(_ellipsis)
  51. self.has_ellipsis_parenthesized = True
  52. else:
  53. is_number = str.isdecimal(x)
  54. if is_number and int(x) == 1:
  55. # handling the case of anonymous axis of length 1
  56. if bracket_group is None:
  57. self.composition.append([])
  58. else:
  59. pass # no need to think about 1s inside parenthesis
  60. return
  61. is_axis_name, reason = self.check_axis_name_return_reason(x, allow_underscore=allow_underscore)
  62. if not (is_number or is_axis_name):
  63. raise EinopsError(f"Invalid axis identifier: {x}\n{reason}")
  64. if is_number:
  65. x = AnonymousAxis(x)
  66. self.identifiers.add(x)
  67. if is_number:
  68. self.has_non_unitary_anonymous_axes = True
  69. if bracket_group is None:
  70. self.composition.append([x])
  71. else:
  72. bracket_group.append(x)
  73. current_identifier = None
  74. for char in expression:
  75. if char in "() ":
  76. if current_identifier is not None:
  77. add_axis_name(current_identifier)
  78. current_identifier = None
  79. if char == "(":
  80. if bracket_group is not None:
  81. raise EinopsError("Axis composition is one-level (brackets inside brackets not allowed)")
  82. bracket_group = []
  83. elif char == ")":
  84. if bracket_group is None:
  85. raise EinopsError("Brackets are not balanced")
  86. self.composition.append(bracket_group)
  87. bracket_group = None
  88. elif str.isalnum(char) or char in ["_", _ellipsis]:
  89. if current_identifier is None:
  90. current_identifier = char
  91. else:
  92. current_identifier += char
  93. else:
  94. raise EinopsError(f"Unknown character '{char}'")
  95. if bracket_group is not None:
  96. raise EinopsError(f'Imbalanced parentheses in expression: "{expression}"')
  97. if current_identifier is not None:
  98. add_axis_name(current_identifier)
  99. def flat_axes_order(self) -> List:
  100. result = []
  101. for composed_axis in self.composition:
  102. assert isinstance(composed_axis, list), "does not work with ellipsis"
  103. for axis in composed_axis:
  104. result.append(axis)
  105. return result
  106. def has_composed_axes(self) -> bool:
  107. # this will ignore 1 inside brackets
  108. for axes in self.composition:
  109. if isinstance(axes, list) and len(axes) > 1:
  110. return True
  111. return False
  112. @staticmethod
  113. def check_axis_name_return_reason(name: str, allow_underscore: bool = False) -> Tuple[bool, str]:
  114. if not str.isidentifier(name):
  115. return False, "not a valid python identifier"
  116. elif name[0] == "_" or name[-1] == "_":
  117. if name == "_" and allow_underscore:
  118. return True, ""
  119. return False, "axis name should should not start or end with underscore"
  120. else:
  121. if keyword.iskeyword(name):
  122. warnings.warn(
  123. f"It is discouraged to use axes names that are keywords: {name}",
  124. RuntimeWarning,
  125. stacklevel=2,
  126. )
  127. if name in ["axis"]:
  128. warnings.warn(
  129. "It is discouraged to use 'axis' as an axis name and will raise an error in future",
  130. FutureWarning,
  131. stacklevel=2,
  132. )
  133. return True, ""
  134. @staticmethod
  135. def check_axis_name(name: str) -> bool:
  136. """
  137. Valid axes names are python identifiers except keywords,
  138. and additionally should not start or end with underscore
  139. """
  140. is_valid, _reason = ParsedExpression.check_axis_name_return_reason(name)
  141. return is_valid