| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196 |
- ###{standalone
- #
- #
- # Lark Stand-alone Generator Tool
- # ----------------------------------
- # Generates a stand-alone LALR(1) parser
- #
- # Git: https://github.com/erezsh/lark
- # Author: Erez Shinan (erezshin@gmail.com)
- #
- #
- # >>> LICENSE
- #
- # This tool and its generated code use a separate license from Lark,
- # and are subject to the terms of the Mozilla Public License, v. 2.0.
- # If a copy of the MPL was not distributed with this
- # file, You can obtain one at https://mozilla.org/MPL/2.0/.
- #
- # If you wish to purchase a commercial license for this tool and its
- # generated code, you may contact me via email or otherwise.
- #
- # If MPL2 is incompatible with your free or open-source project,
- # contact me and we'll work it out.
- #
- #
- from copy import deepcopy
- from abc import ABC, abstractmethod
- from types import ModuleType
- from typing import (
- TypeVar, Generic, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any,
- Union, Iterable, IO, TYPE_CHECKING, overload, Sequence,
- Pattern as REPattern, ClassVar, Set, Mapping
- )
- ###}
- import sys
- import token, tokenize
- import os
- from os import path
- from collections import defaultdict
- from functools import partial
- from argparse import ArgumentParser
- import lark
- from lark.tools import lalr_argparser, build_lalr, make_warnings_comments
- from lark.grammar import Rule
- from lark.lexer import TerminalDef
- _dir = path.dirname(__file__)
- _larkdir = path.join(_dir, path.pardir)
- EXTRACT_STANDALONE_FILES = [
- 'tools/standalone.py',
- 'exceptions.py',
- 'utils.py',
- 'tree.py',
- 'visitors.py',
- 'grammar.py',
- 'lexer.py',
- 'common.py',
- 'parse_tree_builder.py',
- 'parsers/lalr_analysis.py',
- 'parsers/lalr_parser_state.py',
- 'parsers/lalr_parser.py',
- 'parsers/lalr_interactive_parser.py',
- 'parser_frontends.py',
- 'lark.py',
- 'indenter.py',
- ]
- def extract_sections(lines):
- section = None
- text = []
- sections = defaultdict(list)
- for line in lines:
- if line.startswith('###'):
- if line[3] == '{':
- section = line[4:].strip()
- elif line[3] == '}':
- sections[section] += text
- section = None
- text = []
- else:
- raise ValueError(line)
- elif section:
- text.append(line)
- return {name: ''.join(text) for name, text in sections.items()}
- def strip_docstrings(line_gen):
- """ Strip comments and docstrings from a file.
- Based on code from: https://stackoverflow.com/questions/1769332/script-to-remove-python-comments-docstrings
- """
- res = []
- prev_toktype = token.INDENT
- last_lineno = -1
- last_col = 0
- tokgen = tokenize.generate_tokens(line_gen)
- for toktype, ttext, (slineno, scol), (elineno, ecol), ltext in tokgen:
- if slineno > last_lineno:
- last_col = 0
- if scol > last_col:
- res.append(" " * (scol - last_col))
- if toktype == token.STRING and prev_toktype == token.INDENT:
- # Docstring
- res.append("#--")
- elif toktype == tokenize.COMMENT:
- # Comment
- res.append("##\n")
- else:
- res.append(ttext)
- prev_toktype = toktype
- last_col = ecol
- last_lineno = elineno
- return ''.join(res)
- def gen_standalone(lark_inst, output=None, out=sys.stdout, compress=False):
- if output is None:
- output = partial(print, file=out)
- import pickle, zlib, base64
- def compressed_output(obj):
- s = pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)
- c = zlib.compress(s)
- output(repr(base64.b64encode(c)))
- def output_decompress(name):
- output('%(name)s = pickle.loads(zlib.decompress(base64.b64decode(%(name)s)))' % locals())
- output('# The file was automatically generated by Lark v%s' % lark.__version__)
- output('__version__ = "%s"' % lark.__version__)
- output()
- for i, pyfile in enumerate(EXTRACT_STANDALONE_FILES):
- with open(os.path.join(_larkdir, pyfile)) as f:
- code = extract_sections(f)['standalone']
- if i: # if not this file
- code = strip_docstrings(partial(next, iter(code.splitlines(True))))
- output(code)
- data, m = lark_inst.memo_serialize([TerminalDef, Rule])
- output('import pickle, zlib, base64')
- if compress:
- output('DATA = (')
- compressed_output(data)
- output(')')
- output_decompress('DATA')
- output('MEMO = (')
- compressed_output(m)
- output(')')
- output_decompress('MEMO')
- else:
- output('DATA = (')
- output(data)
- output(')')
- output('MEMO = (')
- output(m)
- output(')')
- output('Shift = 0')
- output('Reduce = 1')
- output("def Lark_StandAlone(**kwargs):")
- output(" return Lark._load_from_dict(DATA, MEMO, **kwargs)")
- def main():
- make_warnings_comments()
- parser = ArgumentParser(prog="prog='python -m lark.tools.standalone'", description="Lark Stand-alone Generator Tool",
- parents=[lalr_argparser], epilog='Look at the Lark documentation for more info on the options')
- parser.add_argument('-c', '--compress', action='store_true', default=0, help="Enable compression")
- if len(sys.argv) == 1:
- parser.print_help(sys.stderr)
- sys.exit(1)
- ns = parser.parse_args()
- lark_inst, out = build_lalr(ns)
- gen_standalone(lark_inst, out=out, compress=ns.compress)
- ns.out.close()
- ns.grammar_file.close()
- if __name__ == '__main__':
- main()
|