syntax_helpers.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. from lark import Lark, ParseTree, exceptions
  2. from pathlib import Path
  3. from rfc3987_syntax.utils import load_grammar
  4. RFC3987_SYNTAX_PARSER_TYPE: str = "earley"
  5. RFC3987_SYNTAX_GRAMMAR_PATH: Path = Path(__file__).parent / "syntax_rfc3987.lark"
  6. RFC3987_SYNTAX_TERMS: list[str] = [
  7. "iri",
  8. "iri_reference",
  9. "absolute_iri",
  10. "scheme",
  11. "irelative_ref",
  12. "irelative_part"
  13. "ihier_part",
  14. "iauthority",
  15. "iuserinfo",
  16. "ihost",
  17. "ireg_name",
  18. "ipath_abempty",
  19. "isegment",
  20. "isegment_nz",
  21. "isegment_nz_nc",
  22. "ipchar",
  23. "iquery",
  24. "ifragment",
  25. "iunreserved",
  26. "ucschar",
  27. "iprivate",
  28. "sub_delims",
  29. "ip_literal",
  30. "ipvfuture",
  31. "ipv6address",
  32. "h16",
  33. "ls32",
  34. "ipv4address",
  35. "dec_octet",
  36. "digit",
  37. "non_zero",
  38. "unreserved",
  39. "alpha",
  40. "hexdig",
  41. "port",
  42. "pct_encoded",
  43. ]
  44. grammar: str = load_grammar(RFC3987_SYNTAX_GRAMMAR_PATH)
  45. syntax_parser = Lark(grammar, start=["iri", "iri_reference", "absolute_iri"], parser=RFC3987_SYNTAX_PARSER_TYPE)
  46. def parse(term: str, value: str) -> ParseTree:
  47. return syntax_parser.parse(value, start=term)
  48. def is_valid_syntax(term: str, value: str):
  49. try:
  50. parse(term=term, value=value)
  51. return True
  52. except exceptions.LarkError:
  53. return False
  54. def make_syntax_validator(rule_name):
  55. parser = Lark(grammar, start=rule_name, parser=RFC3987_SYNTAX_PARSER_TYPE)
  56. def syntax_validator(text):
  57. try:
  58. parser.parse(text)
  59. return True
  60. except exceptions.LarkError:
  61. return False
  62. return syntax_validator
  63. is_valid_syntax_iri = make_syntax_validator("iri")
  64. is_valid_syntax_iri_reference = make_syntax_validator("iri_reference")
  65. is_valid_syntax_absolute_iri = make_syntax_validator("absolute_iri")
  66. is_valid_syntax_irelative_ref = make_syntax_validator("irelative_ref")
  67. is_valid_syntax_irelative_part = make_syntax_validator("irelative_part")
  68. is_valid_syntax_ihier_part = make_syntax_validator("ihier_part")
  69. is_valid_syntax_iauthority = make_syntax_validator("iauthority")
  70. is_valid_syntax_iuserinfo = make_syntax_validator("iuserinfo")
  71. is_valid_syntax_ihost = make_syntax_validator("ihost")
  72. is_valid_syntax_ireg_name = make_syntax_validator("ireg_name")
  73. is_valid_syntax_ipath = make_syntax_validator("ipath")
  74. is_valid_syntax_ipath_abempty = make_syntax_validator("ipath_abempty")
  75. is_valid_syntax_ipath_absolute = make_syntax_validator("ipath_absolute")
  76. is_valid_syntax_ipath_noscheme = make_syntax_validator("ipath_noscheme")
  77. is_valid_syntax_ipath_rootless = make_syntax_validator("ipath_rootless")
  78. is_valid_syntax_ipath_empty = make_syntax_validator("ipath_empty")
  79. is_valid_syntax_isegment = make_syntax_validator("isegment")
  80. is_valid_syntax_isegment_nz = make_syntax_validator("isegment_nz")
  81. is_valid_syntax_isegment_nz_nc = make_syntax_validator("isegment_nz_nc")
  82. is_valid_syntax_ipchar = make_syntax_validator("ipchar")
  83. is_valid_syntax_iquery = make_syntax_validator("iquery")
  84. is_valid_syntax_ifragment = make_syntax_validator("ifragment")
  85. is_valid_syntax_iunreserved = make_syntax_validator("iunreserved")
  86. is_valid_syntax_ucschar = make_syntax_validator("ucschar")
  87. is_valid_syntax_iprivate = make_syntax_validator("iprivate")
  88. is_valid_syntax_sub_delims = make_syntax_validator("sub_delims")
  89. is_valid_syntax_ip_literal = make_syntax_validator("ip_literal")
  90. is_valid_syntax_ipvfuture = make_syntax_validator("ipvfuture")
  91. is_valid_syntax_ipv6address = make_syntax_validator("ipv6address")
  92. is_valid_syntax_h16 = make_syntax_validator("h16")
  93. is_valid_syntax_ls32 = make_syntax_validator("ls32")
  94. is_valid_syntax_ipv4address = make_syntax_validator("ipv4address")
  95. is_valid_syntax_dec_octet = make_syntax_validator("dec_octet")
  96. is_valid_syntax_unreserved = make_syntax_validator("unreserved")
  97. is_valid_syntax_alpha = make_syntax_validator("alpha")
  98. is_valid_syntax_digit = make_syntax_validator("digit")
  99. is_valid_syntax_hexdig = make_syntax_validator("hexdig")
  100. is_valid_syntax_port = make_syntax_validator("port")