latex_parser.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. import os
  2. import logging
  3. import re
  4. from pathlib import Path
  5. from sympy.external import import_module
  6. from sympy.parsing.latex.lark.transformer import TransformToSymPyExpr
  7. _lark = import_module("lark")
  8. class LarkLaTeXParser:
  9. r"""Class for converting input `\mathrm{\LaTeX}` strings into SymPy Expressions.
  10. It holds all the necessary internal data for doing so, and exposes hooks for
  11. customizing its behavior.
  12. Parameters
  13. ==========
  14. print_debug_output : bool, optional
  15. If set to ``True``, prints debug output to the logger. Defaults to ``False``.
  16. transform : bool, optional
  17. If set to ``True``, the class runs the Transformer class on the parse tree
  18. generated by running ``Lark.parse`` on the input string. Defaults to ``True``.
  19. Setting it to ``False`` can help with debugging the `\mathrm{\LaTeX}` grammar.
  20. grammar_file : str, optional
  21. The path to the grammar file that the parser should use. If set to ``None``,
  22. it uses the default grammar, which is in ``grammar/latex.lark``, relative to
  23. the ``sympy/parsing/latex/lark/`` directory.
  24. transformer : str, optional
  25. The name of the Transformer class to use. If set to ``None``, it uses the
  26. default transformer class, which is :py:func:`TransformToSymPyExpr`.
  27. """
  28. def __init__(self, print_debug_output=False, transform=True, grammar_file=None, transformer=None):
  29. grammar_dir_path = os.path.join(os.path.dirname(__file__), "grammar/")
  30. if grammar_file is None:
  31. latex_grammar = Path(os.path.join(grammar_dir_path, "latex.lark")).read_text(encoding="utf-8")
  32. else:
  33. latex_grammar = Path(grammar_file).read_text(encoding="utf-8")
  34. self.parser = _lark.Lark(
  35. latex_grammar,
  36. source_path=grammar_dir_path,
  37. parser="earley",
  38. start="latex_string",
  39. lexer="auto",
  40. ambiguity="explicit",
  41. propagate_positions=False,
  42. maybe_placeholders=False,
  43. keep_all_tokens=True)
  44. self.print_debug_output = print_debug_output
  45. self.transform_expr = transform
  46. if transformer is None:
  47. self.transformer = TransformToSymPyExpr()
  48. else:
  49. self.transformer = transformer()
  50. def doparse(self, s: str):
  51. if self.print_debug_output:
  52. _lark.logger.setLevel(logging.DEBUG)
  53. parse_tree = self.parser.parse(s)
  54. if not self.transform_expr:
  55. # exit early and return the parse tree
  56. _lark.logger.debug("expression = %s", s)
  57. _lark.logger.debug(parse_tree)
  58. _lark.logger.debug(parse_tree.pretty())
  59. return parse_tree
  60. if self.print_debug_output:
  61. # print this stuff before attempting to run the transformer
  62. _lark.logger.debug("expression = %s", s)
  63. # print the `parse_tree` variable
  64. _lark.logger.debug(parse_tree.pretty())
  65. sympy_expression = self.transformer.transform(parse_tree)
  66. if self.print_debug_output:
  67. _lark.logger.debug("SymPy expression = %s", sympy_expression)
  68. return sympy_expression
  69. if _lark is not None:
  70. _lark_latex_parser = LarkLaTeXParser()
  71. def parse_latex_lark(s: str):
  72. """
  73. Experimental LaTeX parser using Lark.
  74. This function is still under development and its API may change with the
  75. next releases of SymPy.
  76. """
  77. if _lark is None:
  78. raise ImportError("Lark is probably not installed")
  79. return _lark_latex_parser.doparse(s)
  80. def _pretty_print_lark_trees(tree, indent=0, show_expr=True):
  81. if isinstance(tree, _lark.Token):
  82. return tree.value
  83. data = str(tree.data)
  84. is_expr = data.startswith("expression")
  85. if is_expr:
  86. data = re.sub(r"^expression", "E", data)
  87. is_ambig = (data == "_ambig")
  88. if is_ambig:
  89. new_indent = indent + 2
  90. else:
  91. new_indent = indent
  92. output = ""
  93. show_node = not is_expr or show_expr
  94. if show_node:
  95. output += str(data) + "("
  96. if is_ambig:
  97. output += "\n" + "\n".join([" " * new_indent + _pretty_print_lark_trees(i, new_indent, show_expr) for i in tree.children])
  98. else:
  99. output += ",".join([_pretty_print_lark_trees(i, new_indent, show_expr) for i in tree.children])
  100. if show_node:
  101. output += ")"
  102. return output