parser_coverage.py 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. #!/usr/bin/env python
  2. # Copyright (c) 2022 Rocky Bernstein
  3. #
  4. # This program is free software: you can redistribute it and/or modify
  5. # it under the terms of the GNU General Public License as published by
  6. # the Free Software Foundation, either version 3 of the License, or
  7. # (at your option) any later version.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. from __future__ import print_function
  17. import os
  18. import pickle
  19. import click
  20. from spark_parser.version import __version__
  21. def sort_profile_info(path, max_count=1000):
  22. profile_info = pickle.load(open(path, "rb"))
  23. # Classify unused rules. Some are "unused" because the have nullable
  24. # nonterminals and those show up as a different rule. Sothe rule
  25. # *is* used just not in the form where a nullable symbol hasn't been
  26. # nulled.
  27. # And in some cases this is intentional. Uncompyle6 creates such grammar
  28. # rules to ensure that positions of certain nonterminals in semantic
  29. # actions appear in the same place as similar grammar rules
  30. unused_rules = set() # just the grammar rules
  31. used_rules = [] # (count, grammar rule)
  32. for rule, count in profile_info.items():
  33. if count == 0:
  34. unused_rules.add(rule)
  35. else:
  36. used_rules.append((count, rule))
  37. for count, rule in used_rules:
  38. if rule.find("\\e_") > -1:
  39. canonic_rule = rule.replace("\\e_", "", 1000)
  40. if canonic_rule in unused_rules:
  41. unused_rules.remove(canonic_rule)
  42. pass
  43. pass
  44. pass
  45. unused_items = [(0, item) for item in sorted(unused_rules)]
  46. used_items = sorted(used_rules, reverse=False)
  47. return [item for item in unused_items + used_items if item[0] <= max_count]
  48. DEFAULT_COVERAGE_FILE = os.environ.get(
  49. "SPARK_PARSER_COVERAGE", "/tmp/spark-grammar.cover"
  50. )
  51. DEFAULT_COUNT = 100
  52. @click.command()
  53. @click.version_option(version=__version__)
  54. @click.option(
  55. "--max-count",
  56. type=int,
  57. default=DEFAULT_COUNT,
  58. help=(
  59. f"limit output to rules having no more than this many hits (default {DEFAULT_COUNT})"
  60. ),
  61. )
  62. @click.argument("path", type=click.Path(), default=DEFAULT_COVERAGE_FILE)
  63. def run(max_count, path: str):
  64. """Print grammar reduce statistics for a series of spark-parser parses"""
  65. for count, rule in sort_profile_info(path, max_count):
  66. print("%d: %s" % (count, rule))
  67. pass
  68. return
  69. if __name__ == "__main__":
  70. run()