| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204 |
- from sympy.external import import_module
- from sympy.utilities.decorator import doctest_depends_on
- from re import compile as rcompile
- from sympy.parsing.latex.lark import LarkLaTeXParser, TransformToSymPyExpr, parse_latex_lark # noqa
- from .errors import LaTeXParsingError # noqa
- IGNORE_L = r"\s*[{]*\s*"
- IGNORE_R = r"\s*[}]*\s*"
- NO_LEFT = r"(?<!\\left)"
- BEGIN_AMS_MAT = r"\\begin{matrix}"
- END_AMS_MAT = r"\\end{matrix}"
- BEGIN_ARR = r"\\begin{array}{.*?}"
- END_ARR = r"\\end{array}"
- # begin_delim_regex: end_delim_regex
- MATRIX_DELIMS = {fr"\\left\({IGNORE_L}{BEGIN_AMS_MAT}": fr"{END_AMS_MAT}{IGNORE_R}\\right\)",
- fr"{NO_LEFT}\({IGNORE_L}{BEGIN_AMS_MAT}": fr"{END_AMS_MAT}{IGNORE_R}\)",
- fr"\\left\[{IGNORE_L}{BEGIN_AMS_MAT}": fr"{END_AMS_MAT}{IGNORE_R}\\right\]",
- fr"{NO_LEFT}\[{IGNORE_L}{BEGIN_AMS_MAT}": fr"{END_AMS_MAT}{IGNORE_R}\]",
- fr"\\left\|{IGNORE_L}{BEGIN_AMS_MAT}": fr"{END_AMS_MAT}{IGNORE_R}\\right\|",
- fr"{NO_LEFT}\|{IGNORE_L}{BEGIN_AMS_MAT}": fr"{END_AMS_MAT}{IGNORE_R}\|",
- r"\\begin{pmatrix}": r"\\end{pmatrix}",
- r"\\begin{bmatrix}": r"\\end{bmatrix}",
- r"\\begin{vmatrix}": r"\\end{vmatrix}",
- fr"\\left\({IGNORE_L}{BEGIN_ARR}": fr"{END_ARR}{IGNORE_R}\\right\)",
- fr"{NO_LEFT}\({IGNORE_L}{BEGIN_ARR}": fr"{END_ARR}{IGNORE_R}\)",
- fr"\\left\[{IGNORE_L}{BEGIN_ARR}": fr"{END_ARR}{IGNORE_R}\\right\]",
- fr"{NO_LEFT}\[{IGNORE_L}{BEGIN_ARR}": fr"{END_ARR}{IGNORE_R}\]",
- fr"\\left\|{IGNORE_L}{BEGIN_ARR}": fr"{END_ARR}{IGNORE_R}\\right\|",
- fr"{NO_LEFT}\|{IGNORE_L}{BEGIN_ARR}": fr"{END_ARR}{IGNORE_R}\|"
- }
- MATRIX_DELIMS_INV = {v: k for k, v in MATRIX_DELIMS.items()}
- # begin_delim_regex: ideal_begin_delim_representative
- BEGIN_DELIM_REPR = {fr"\\left\({IGNORE_L}{BEGIN_AMS_MAT}": "\\left(\\begin{matrix}",
- fr"{NO_LEFT}\({IGNORE_L}{BEGIN_AMS_MAT}": "(\\begin{matrix}",
- fr"\\left\[{IGNORE_L}{BEGIN_AMS_MAT}": "\\left[\\begin{matrix}",
- fr"{NO_LEFT}\[{IGNORE_L}{BEGIN_AMS_MAT}": "[\\begin{matrix}",
- fr"\\left\|{IGNORE_L}{BEGIN_AMS_MAT}": "\\left|\\begin{matrix}",
- fr"{NO_LEFT}\|{IGNORE_L}{BEGIN_AMS_MAT}": "|\\begin{matrix}",
- r"\\begin{pmatrix}": "\\begin{pmatrix}",
- r"\\begin{bmatrix}": "\\begin{bmatrix}",
- r"\\begin{vmatrix}": "\\begin{vmatrix}",
- fr"\\left\({IGNORE_L}{BEGIN_ARR}": "\\left(\\begin{array}{COLUMN_SPECIFIERS}",
- fr"{NO_LEFT}\({IGNORE_L}{BEGIN_ARR}": "(\\begin{array}{COLUMN_SPECIFIERS}",
- fr"\\left\[{IGNORE_L}{BEGIN_ARR}": "\\left[\\begin{array}{COLUMN_SPECIFIERS}",
- fr"{NO_LEFT}\[{IGNORE_L}{BEGIN_ARR}": "[\\begin{array}{COLUMN_SPECIFIERS}",
- fr"\\left\|{IGNORE_L}{BEGIN_ARR}": "\\left|\\begin{array}{COLUMN_SPECIFIERS}",
- fr"{NO_LEFT}\|{IGNORE_L}{BEGIN_ARR}": "|\\begin{array}{COLUMN_SPECIFIERS}"
- }
- # end_delim_regex: ideal_end_delim_representative
- END_DELIM_REPR = {fr"{END_AMS_MAT}{IGNORE_R}\\right\)": "\\end{matrix}\\right)",
- fr"{END_AMS_MAT}{IGNORE_R}\)": "\\end{matrix})",
- fr"{END_AMS_MAT}{IGNORE_R}\\right\]": "\\end{matrix}\\right]",
- fr"{END_AMS_MAT}{IGNORE_R}\]": "\\end{matrix}]",
- fr"{END_AMS_MAT}{IGNORE_R}\\right\|": "\\end{matrix}\\right|",
- fr"{END_AMS_MAT}{IGNORE_R}\|": "\\end{matrix}|",
- r"\\end{pmatrix}": "\\end{pmatrix}",
- r"\\end{bmatrix}": "\\end{bmatrix}",
- r"\\end{vmatrix}": "\\end{vmatrix}",
- fr"{END_ARR}{IGNORE_R}\\right\)": "\\end{array}\\right)",
- fr"{END_ARR}{IGNORE_R}\)": "\\end{array})",
- fr"{END_ARR}{IGNORE_R}\\right\]": "\\end{array}\\right]",
- fr"{END_ARR}{IGNORE_R}\]": "\\end{array}]",
- fr"{END_ARR}{IGNORE_R}\\right\|": "\\end{array}\\right|",
- fr"{END_ARR}{IGNORE_R}\|": "\\end{array}|"
- }
- def check_matrix_delimiters(latex_str):
- """Report mismatched, excess, or missing matrix delimiters."""
- spans = []
- for begin_delim in MATRIX_DELIMS:
- end_delim = MATRIX_DELIMS[begin_delim]
- p = rcompile(begin_delim)
- q = rcompile(end_delim)
- spans.extend([(*m.span(), m.group(),
- begin_delim) for m in p.finditer(latex_str)])
- spans.extend([(*m.span(), m.group(),
- end_delim) for m in q.finditer(latex_str)])
- spans.sort(key=(lambda x: x[0]))
- if len(spans) % 2 == 1:
- # Odd number of delimiters; therefore something
- # is wrong. We do not complain yet; let's see if
- # we can pinpoint the actual error.
- spans.append((None, None, None, None))
- spans = [(*x, *y) for (x, y) in zip(spans[::2], spans[1::2])]
- for x in spans:
- # x is supposed to be an 8-tuple of the following form:
- #
- # (begin_delim_span_start, begin_delim_span_end,
- # begin_delim_match, begin_delim_regex,
- # end_delim_span_start, end_delim_span_end,
- # end_delim_match, end_delim_regex)
- sellipsis = "..."
- s = x[0] - 10
- if s < 0:
- s = 0
- sellipsis = ""
- eellipsis = "..."
- e = x[1] + 10
- if e > len(latex_str):
- e = len(latex_str)
- eellipsis = ""
- if x[3] in END_DELIM_REPR:
- err = (f"Extra '{x[2]}' at index {x[0]} or "
- "missing corresponding "
- f"'{BEGIN_DELIM_REPR[MATRIX_DELIMS_INV[x[3]]]}' "
- f"in LaTeX string: {sellipsis}{latex_str[s:e]}"
- f"{eellipsis}")
- raise LaTeXParsingError(err)
- if x[7] is None:
- err = (f"Extra '{x[2]}' at index {x[0]} or "
- "missing corresponding "
- f"'{END_DELIM_REPR[MATRIX_DELIMS[x[3]]]}' "
- f"in LaTeX string: {sellipsis}{latex_str[s:e]}"
- f"{eellipsis}")
- raise LaTeXParsingError(err)
- correct_end_regex = MATRIX_DELIMS[x[3]]
- sellipsis = "..." if x[0] > 0 else ""
- eellipsis = "..." if x[5] < len(latex_str) else ""
- if x[7] != correct_end_regex:
- err = ("Expected "
- f"'{END_DELIM_REPR[correct_end_regex]}' "
- f"to close the '{x[2]}' at index {x[0]} but "
- f"found '{x[6]}' at index {x[4]} of LaTeX "
- f"string instead: {sellipsis}{latex_str[x[0]:x[5]]}"
- f"{eellipsis}")
- raise LaTeXParsingError(err)
- __doctest_requires__ = {('parse_latex',): ['antlr4', 'lark']}
- @doctest_depends_on(modules=('antlr4', 'lark'))
- def parse_latex(s, strict=False, backend="antlr"):
- r"""Converts the input LaTeX string ``s`` to a SymPy ``Expr``.
- Parameters
- ==========
- s : str
- The LaTeX string to parse. In Python source containing LaTeX,
- *raw strings* (denoted with ``r"``, like this one) are preferred,
- as LaTeX makes liberal use of the ``\`` character, which would
- trigger escaping in normal Python strings.
- backend : str, optional
- Currently, there are two backends supported: ANTLR, and Lark.
- The default setting is to use the ANTLR backend, which can be
- changed to Lark if preferred.
- Use ``backend="antlr"`` for the ANTLR-based parser, and
- ``backend="lark"`` for the Lark-based parser.
- The ``backend`` option is case-sensitive, and must be in
- all lowercase.
- strict : bool, optional
- This option is only available with the ANTLR backend.
- If True, raise an exception if the string cannot be parsed as
- valid LaTeX. If False, try to recover gracefully from common
- mistakes.
- Examples
- ========
- >>> from sympy.parsing.latex import parse_latex
- >>> expr = parse_latex(r"\frac {1 + \sqrt {\a}} {\b}")
- >>> expr
- (sqrt(a) + 1)/b
- >>> expr.evalf(4, subs=dict(a=5, b=2))
- 1.618
- >>> func = parse_latex(r"\int_1^\alpha \dfrac{\mathrm{d}t}{t}", backend="lark")
- >>> func.evalf(subs={"alpha": 2})
- 0.693147180559945
- """
- check_matrix_delimiters(s)
- if backend == "antlr":
- _latex = import_module(
- 'sympy.parsing.latex._parse_latex_antlr',
- import_kwargs={'fromlist': ['X']})
- if _latex is not None:
- return _latex.parse_latex(s, strict)
- elif backend == "lark":
- return parse_latex_lark(s)
- else:
- raise NotImplementedError(f"Using the '{backend}' backend in the LaTeX" \
- " parser is not supported.")
|