| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158 |
- import keyword
- import warnings
- from typing import List, Optional, Set, Tuple, Union
- from einops import EinopsError
- _ellipsis: str = "…" # NB, this is a single unicode symbol. String is used as it is not a list, but can be iterated
- class AnonymousAxis:
- """Important thing: all instances of this class are not equal to each other"""
- def __init__(self, value: str):
- self.value = int(value)
- if self.value <= 1:
- if self.value == 1:
- raise EinopsError("No need to create anonymous axis of length 1. Report this as an issue")
- else:
- raise EinopsError(f"Anonymous axis should have positive length, not {self.value}")
- def __repr__(self):
- return f"{str(self.value)}-axis"
- class ParsedExpression:
- """
- non-mutable structure that contains information about one side of expression (e.g. 'b c (h w)')
- and keeps some information important for downstream
- """
- def __init__(self, expression: str, *, allow_underscore: bool = False, allow_duplicates: bool = False):
- self.has_ellipsis: bool = False
- self.has_ellipsis_parenthesized: Optional[bool] = None
- self.identifiers: Set[str] = set()
- # that's axes like 2, 3, 4 or 5. Axes with size 1 are exceptional and replaced with empty composition
- self.has_non_unitary_anonymous_axes: bool = False
- # composition keeps structure of composite axes, see how different corner cases are handled in tests
- self.composition: List[Union[List[str], str]] = []
- if "." in expression:
- if "..." not in expression:
- raise EinopsError("Expression may contain dots only inside ellipsis (...)")
- if str.count(expression, "...") != 1 or str.count(expression, ".") != 3:
- raise EinopsError(
- "Expression may contain dots only inside ellipsis (...); only one ellipsis for tensor "
- )
- expression = expression.replace("...", _ellipsis)
- self.has_ellipsis = True
- bracket_group: Optional[List[str]] = None
- def add_axis_name(x):
- if x in self.identifiers:
- if not (allow_underscore and x == "_") and not allow_duplicates:
- raise EinopsError(f'Indexing expression contains duplicate dimension "{x}"')
- if x == _ellipsis:
- self.identifiers.add(_ellipsis)
- if bracket_group is None:
- self.composition.append(_ellipsis)
- self.has_ellipsis_parenthesized = False
- else:
- bracket_group.append(_ellipsis)
- self.has_ellipsis_parenthesized = True
- else:
- is_number = str.isdecimal(x)
- if is_number and int(x) == 1:
- # handling the case of anonymous axis of length 1
- if bracket_group is None:
- self.composition.append([])
- else:
- pass # no need to think about 1s inside parenthesis
- return
- is_axis_name, reason = self.check_axis_name_return_reason(x, allow_underscore=allow_underscore)
- if not (is_number or is_axis_name):
- raise EinopsError(f"Invalid axis identifier: {x}\n{reason}")
- if is_number:
- x = AnonymousAxis(x)
- self.identifiers.add(x)
- if is_number:
- self.has_non_unitary_anonymous_axes = True
- if bracket_group is None:
- self.composition.append([x])
- else:
- bracket_group.append(x)
- current_identifier = None
- for char in expression:
- if char in "() ":
- if current_identifier is not None:
- add_axis_name(current_identifier)
- current_identifier = None
- if char == "(":
- if bracket_group is not None:
- raise EinopsError("Axis composition is one-level (brackets inside brackets not allowed)")
- bracket_group = []
- elif char == ")":
- if bracket_group is None:
- raise EinopsError("Brackets are not balanced")
- self.composition.append(bracket_group)
- bracket_group = None
- elif str.isalnum(char) or char in ["_", _ellipsis]:
- if current_identifier is None:
- current_identifier = char
- else:
- current_identifier += char
- else:
- raise EinopsError(f"Unknown character '{char}'")
- if bracket_group is not None:
- raise EinopsError(f'Imbalanced parentheses in expression: "{expression}"')
- if current_identifier is not None:
- add_axis_name(current_identifier)
- def flat_axes_order(self) -> List:
- result = []
- for composed_axis in self.composition:
- assert isinstance(composed_axis, list), "does not work with ellipsis"
- for axis in composed_axis:
- result.append(axis)
- return result
- def has_composed_axes(self) -> bool:
- # this will ignore 1 inside brackets
- for axes in self.composition:
- if isinstance(axes, list) and len(axes) > 1:
- return True
- return False
- @staticmethod
- def check_axis_name_return_reason(name: str, allow_underscore: bool = False) -> Tuple[bool, str]:
- if not str.isidentifier(name):
- return False, "not a valid python identifier"
- elif name[0] == "_" or name[-1] == "_":
- if name == "_" and allow_underscore:
- return True, ""
- return False, "axis name should should not start or end with underscore"
- else:
- if keyword.iskeyword(name):
- warnings.warn(
- f"It is discouraged to use axes names that are keywords: {name}",
- RuntimeWarning,
- stacklevel=2,
- )
- if name in ["axis"]:
- warnings.warn(
- "It is discouraged to use 'axis' as an axis name and will raise an error in future",
- FutureWarning,
- stacklevel=2,
- )
- return True, ""
- @staticmethod
- def check_axis_name(name: str) -> bool:
- """
- Valid axes names are python identifiers except keywords,
- and additionally should not start or end with underscore
- """
- is_valid, _reason = ParsedExpression.check_axis_name_return_reason(name)
- return is_valid
|