__init__.py 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. from sympy.external import import_module
  2. from sympy.utilities.decorator import doctest_depends_on
  3. from re import compile as rcompile
  4. from sympy.parsing.latex.lark import LarkLaTeXParser, TransformToSymPyExpr, parse_latex_lark # noqa
  5. from .errors import LaTeXParsingError # noqa
  6. IGNORE_L = r"\s*[{]*\s*"
  7. IGNORE_R = r"\s*[}]*\s*"
  8. NO_LEFT = r"(?<!\\left)"
  9. BEGIN_AMS_MAT = r"\\begin{matrix}"
  10. END_AMS_MAT = r"\\end{matrix}"
  11. BEGIN_ARR = r"\\begin{array}{.*?}"
  12. END_ARR = r"\\end{array}"
  13. # begin_delim_regex: end_delim_regex
  14. MATRIX_DELIMS = {fr"\\left\({IGNORE_L}{BEGIN_AMS_MAT}": fr"{END_AMS_MAT}{IGNORE_R}\\right\)",
  15. fr"{NO_LEFT}\({IGNORE_L}{BEGIN_AMS_MAT}": fr"{END_AMS_MAT}{IGNORE_R}\)",
  16. fr"\\left\[{IGNORE_L}{BEGIN_AMS_MAT}": fr"{END_AMS_MAT}{IGNORE_R}\\right\]",
  17. fr"{NO_LEFT}\[{IGNORE_L}{BEGIN_AMS_MAT}": fr"{END_AMS_MAT}{IGNORE_R}\]",
  18. fr"\\left\|{IGNORE_L}{BEGIN_AMS_MAT}": fr"{END_AMS_MAT}{IGNORE_R}\\right\|",
  19. fr"{NO_LEFT}\|{IGNORE_L}{BEGIN_AMS_MAT}": fr"{END_AMS_MAT}{IGNORE_R}\|",
  20. r"\\begin{pmatrix}": r"\\end{pmatrix}",
  21. r"\\begin{bmatrix}": r"\\end{bmatrix}",
  22. r"\\begin{vmatrix}": r"\\end{vmatrix}",
  23. fr"\\left\({IGNORE_L}{BEGIN_ARR}": fr"{END_ARR}{IGNORE_R}\\right\)",
  24. fr"{NO_LEFT}\({IGNORE_L}{BEGIN_ARR}": fr"{END_ARR}{IGNORE_R}\)",
  25. fr"\\left\[{IGNORE_L}{BEGIN_ARR}": fr"{END_ARR}{IGNORE_R}\\right\]",
  26. fr"{NO_LEFT}\[{IGNORE_L}{BEGIN_ARR}": fr"{END_ARR}{IGNORE_R}\]",
  27. fr"\\left\|{IGNORE_L}{BEGIN_ARR}": fr"{END_ARR}{IGNORE_R}\\right\|",
  28. fr"{NO_LEFT}\|{IGNORE_L}{BEGIN_ARR}": fr"{END_ARR}{IGNORE_R}\|"
  29. }
  30. MATRIX_DELIMS_INV = {v: k for k, v in MATRIX_DELIMS.items()}
  31. # begin_delim_regex: ideal_begin_delim_representative
  32. BEGIN_DELIM_REPR = {fr"\\left\({IGNORE_L}{BEGIN_AMS_MAT}": "\\left(\\begin{matrix}",
  33. fr"{NO_LEFT}\({IGNORE_L}{BEGIN_AMS_MAT}": "(\\begin{matrix}",
  34. fr"\\left\[{IGNORE_L}{BEGIN_AMS_MAT}": "\\left[\\begin{matrix}",
  35. fr"{NO_LEFT}\[{IGNORE_L}{BEGIN_AMS_MAT}": "[\\begin{matrix}",
  36. fr"\\left\|{IGNORE_L}{BEGIN_AMS_MAT}": "\\left|\\begin{matrix}",
  37. fr"{NO_LEFT}\|{IGNORE_L}{BEGIN_AMS_MAT}": "|\\begin{matrix}",
  38. r"\\begin{pmatrix}": "\\begin{pmatrix}",
  39. r"\\begin{bmatrix}": "\\begin{bmatrix}",
  40. r"\\begin{vmatrix}": "\\begin{vmatrix}",
  41. fr"\\left\({IGNORE_L}{BEGIN_ARR}": "\\left(\\begin{array}{COLUMN_SPECIFIERS}",
  42. fr"{NO_LEFT}\({IGNORE_L}{BEGIN_ARR}": "(\\begin{array}{COLUMN_SPECIFIERS}",
  43. fr"\\left\[{IGNORE_L}{BEGIN_ARR}": "\\left[\\begin{array}{COLUMN_SPECIFIERS}",
  44. fr"{NO_LEFT}\[{IGNORE_L}{BEGIN_ARR}": "[\\begin{array}{COLUMN_SPECIFIERS}",
  45. fr"\\left\|{IGNORE_L}{BEGIN_ARR}": "\\left|\\begin{array}{COLUMN_SPECIFIERS}",
  46. fr"{NO_LEFT}\|{IGNORE_L}{BEGIN_ARR}": "|\\begin{array}{COLUMN_SPECIFIERS}"
  47. }
  48. # end_delim_regex: ideal_end_delim_representative
  49. END_DELIM_REPR = {fr"{END_AMS_MAT}{IGNORE_R}\\right\)": "\\end{matrix}\\right)",
  50. fr"{END_AMS_MAT}{IGNORE_R}\)": "\\end{matrix})",
  51. fr"{END_AMS_MAT}{IGNORE_R}\\right\]": "\\end{matrix}\\right]",
  52. fr"{END_AMS_MAT}{IGNORE_R}\]": "\\end{matrix}]",
  53. fr"{END_AMS_MAT}{IGNORE_R}\\right\|": "\\end{matrix}\\right|",
  54. fr"{END_AMS_MAT}{IGNORE_R}\|": "\\end{matrix}|",
  55. r"\\end{pmatrix}": "\\end{pmatrix}",
  56. r"\\end{bmatrix}": "\\end{bmatrix}",
  57. r"\\end{vmatrix}": "\\end{vmatrix}",
  58. fr"{END_ARR}{IGNORE_R}\\right\)": "\\end{array}\\right)",
  59. fr"{END_ARR}{IGNORE_R}\)": "\\end{array})",
  60. fr"{END_ARR}{IGNORE_R}\\right\]": "\\end{array}\\right]",
  61. fr"{END_ARR}{IGNORE_R}\]": "\\end{array}]",
  62. fr"{END_ARR}{IGNORE_R}\\right\|": "\\end{array}\\right|",
  63. fr"{END_ARR}{IGNORE_R}\|": "\\end{array}|"
  64. }
  65. def check_matrix_delimiters(latex_str):
  66. """Report mismatched, excess, or missing matrix delimiters."""
  67. spans = []
  68. for begin_delim in MATRIX_DELIMS:
  69. end_delim = MATRIX_DELIMS[begin_delim]
  70. p = rcompile(begin_delim)
  71. q = rcompile(end_delim)
  72. spans.extend([(*m.span(), m.group(),
  73. begin_delim) for m in p.finditer(latex_str)])
  74. spans.extend([(*m.span(), m.group(),
  75. end_delim) for m in q.finditer(latex_str)])
  76. spans.sort(key=(lambda x: x[0]))
  77. if len(spans) % 2 == 1:
  78. # Odd number of delimiters; therefore something
  79. # is wrong. We do not complain yet; let's see if
  80. # we can pinpoint the actual error.
  81. spans.append((None, None, None, None))
  82. spans = [(*x, *y) for (x, y) in zip(spans[::2], spans[1::2])]
  83. for x in spans:
  84. # x is supposed to be an 8-tuple of the following form:
  85. #
  86. # (begin_delim_span_start, begin_delim_span_end,
  87. # begin_delim_match, begin_delim_regex,
  88. # end_delim_span_start, end_delim_span_end,
  89. # end_delim_match, end_delim_regex)
  90. sellipsis = "..."
  91. s = x[0] - 10
  92. if s < 0:
  93. s = 0
  94. sellipsis = ""
  95. eellipsis = "..."
  96. e = x[1] + 10
  97. if e > len(latex_str):
  98. e = len(latex_str)
  99. eellipsis = ""
  100. if x[3] in END_DELIM_REPR:
  101. err = (f"Extra '{x[2]}' at index {x[0]} or "
  102. "missing corresponding "
  103. f"'{BEGIN_DELIM_REPR[MATRIX_DELIMS_INV[x[3]]]}' "
  104. f"in LaTeX string: {sellipsis}{latex_str[s:e]}"
  105. f"{eellipsis}")
  106. raise LaTeXParsingError(err)
  107. if x[7] is None:
  108. err = (f"Extra '{x[2]}' at index {x[0]} or "
  109. "missing corresponding "
  110. f"'{END_DELIM_REPR[MATRIX_DELIMS[x[3]]]}' "
  111. f"in LaTeX string: {sellipsis}{latex_str[s:e]}"
  112. f"{eellipsis}")
  113. raise LaTeXParsingError(err)
  114. correct_end_regex = MATRIX_DELIMS[x[3]]
  115. sellipsis = "..." if x[0] > 0 else ""
  116. eellipsis = "..." if x[5] < len(latex_str) else ""
  117. if x[7] != correct_end_regex:
  118. err = ("Expected "
  119. f"'{END_DELIM_REPR[correct_end_regex]}' "
  120. f"to close the '{x[2]}' at index {x[0]} but "
  121. f"found '{x[6]}' at index {x[4]} of LaTeX "
  122. f"string instead: {sellipsis}{latex_str[x[0]:x[5]]}"
  123. f"{eellipsis}")
  124. raise LaTeXParsingError(err)
  125. __doctest_requires__ = {('parse_latex',): ['antlr4', 'lark']}
  126. @doctest_depends_on(modules=('antlr4', 'lark'))
  127. def parse_latex(s, strict=False, backend="antlr"):
  128. r"""Converts the input LaTeX string ``s`` to a SymPy ``Expr``.
  129. Parameters
  130. ==========
  131. s : str
  132. The LaTeX string to parse. In Python source containing LaTeX,
  133. *raw strings* (denoted with ``r"``, like this one) are preferred,
  134. as LaTeX makes liberal use of the ``\`` character, which would
  135. trigger escaping in normal Python strings.
  136. backend : str, optional
  137. Currently, there are two backends supported: ANTLR, and Lark.
  138. The default setting is to use the ANTLR backend, which can be
  139. changed to Lark if preferred.
  140. Use ``backend="antlr"`` for the ANTLR-based parser, and
  141. ``backend="lark"`` for the Lark-based parser.
  142. The ``backend`` option is case-sensitive, and must be in
  143. all lowercase.
  144. strict : bool, optional
  145. This option is only available with the ANTLR backend.
  146. If True, raise an exception if the string cannot be parsed as
  147. valid LaTeX. If False, try to recover gracefully from common
  148. mistakes.
  149. Examples
  150. ========
  151. >>> from sympy.parsing.latex import parse_latex
  152. >>> expr = parse_latex(r"\frac {1 + \sqrt {\a}} {\b}")
  153. >>> expr
  154. (sqrt(a) + 1)/b
  155. >>> expr.evalf(4, subs=dict(a=5, b=2))
  156. 1.618
  157. >>> func = parse_latex(r"\int_1^\alpha \dfrac{\mathrm{d}t}{t}", backend="lark")
  158. >>> func.evalf(subs={"alpha": 2})
  159. 0.693147180559945
  160. """
  161. check_matrix_delimiters(s)
  162. if backend == "antlr":
  163. _latex = import_module(
  164. 'sympy.parsing.latex._parse_latex_antlr',
  165. import_kwargs={'fromlist': ['X']})
  166. if _latex is not None:
  167. return _latex.parse_latex(s, strict)
  168. elif backend == "lark":
  169. return parse_latex_lark(s)
  170. else:
  171. raise NotImplementedError(f"Using the '{backend}' backend in the LaTeX" \
  172. " parser is not supported.")