pretty.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. """
  2. Format a pretty string of a `SoupSieve` object for easy debugging.
  3. This won't necessarily support all types and such, and definitely
  4. not support custom outputs.
  5. It is mainly geared towards our types as the `SelectorList`
  6. object is a beast to look at without some indentation and newlines.
  7. The format and various output types is fairly known (though it
  8. hasn't been tested extensively to make sure we aren't missing corners).
  9. Example:
  10. -------
  11. ```
  12. >>> import soupsieve as sv
  13. >>> sv.compile('this > that.class[name=value]').selectors.pretty()
  14. SelectorList(
  15. selectors=(
  16. Selector(
  17. tag=SelectorTag(
  18. name='that',
  19. prefix=None),
  20. ids=(),
  21. classes=(
  22. 'class',
  23. ),
  24. attributes=(
  25. SelectorAttribute(
  26. attribute='name',
  27. prefix='',
  28. pattern=re.compile(
  29. '^value$'),
  30. xml_type_pattern=None),
  31. ),
  32. nth=(),
  33. selectors=(),
  34. relation=SelectorList(
  35. selectors=(
  36. Selector(
  37. tag=SelectorTag(
  38. name='this',
  39. prefix=None),
  40. ids=(),
  41. classes=(),
  42. attributes=(),
  43. nth=(),
  44. selectors=(),
  45. relation=SelectorList(
  46. selectors=(),
  47. is_not=False,
  48. is_html=False),
  49. rel_type='>',
  50. contains=(),
  51. lang=(),
  52. flags=0),
  53. ),
  54. is_not=False,
  55. is_html=False),
  56. rel_type=None,
  57. contains=(),
  58. lang=(),
  59. flags=0),
  60. ),
  61. is_not=False,
  62. is_html=False)
  63. ```
  64. """
  65. from __future__ import annotations
  66. import re
  67. from typing import Any
  68. RE_CLASS = re.compile(r'(?i)[a-z_][_a-z\d.]+\(')
  69. RE_PARAM = re.compile(r'(?i)[_a-z][_a-z\d]+=')
  70. RE_EMPTY = re.compile(r'\(\)|\[\]|\{\}')
  71. RE_LSTRT = re.compile(r'\[')
  72. RE_DSTRT = re.compile(r'\{')
  73. RE_TSTRT = re.compile(r'\(')
  74. RE_LEND = re.compile(r'\]')
  75. RE_DEND = re.compile(r'\}')
  76. RE_TEND = re.compile(r'\)')
  77. RE_INT = re.compile(r'\d+')
  78. RE_KWORD = re.compile(r'(?i)[_a-z][_a-z\d.]+')
  79. RE_DQSTR = re.compile(r'"(?:\\.|[^"\\])*"')
  80. RE_SQSTR = re.compile(r"'(?:\\.|[^'\\])*'")
  81. RE_SEP = re.compile(r'\s*(,)\s*')
  82. RE_DSEP = re.compile(r'\s*(:)\s*')
  83. RE_PSEP = re.compile(r'\s*(\|)\s*')
  84. TOKENS = {
  85. 'class': RE_CLASS,
  86. 'param': RE_PARAM,
  87. 'empty': RE_EMPTY,
  88. 'lstrt': RE_LSTRT,
  89. 'dstrt': RE_DSTRT,
  90. 'tstrt': RE_TSTRT,
  91. 'lend': RE_LEND,
  92. 'dend': RE_DEND,
  93. 'tend': RE_TEND,
  94. 'sqstr': RE_SQSTR,
  95. 'sep': RE_SEP,
  96. 'dsep': RE_DSEP,
  97. 'psep': RE_PSEP,
  98. 'int': RE_INT,
  99. 'kword': RE_KWORD,
  100. 'dqstr': RE_DQSTR
  101. }
  102. def pretty(obj: Any) -> str: # pragma: no cover
  103. """Make the object output string pretty."""
  104. sel = str(obj)
  105. index = 0
  106. end = len(sel) - 1
  107. indent = 0
  108. output = []
  109. while index <= end:
  110. m = None
  111. for k, v in TOKENS.items():
  112. m = v.match(sel, index)
  113. if m:
  114. name = k
  115. index = m.end(0)
  116. if name in ('class', 'lstrt', 'dstrt', 'tstrt'):
  117. indent += 4
  118. output.append(f'{m.group(0)}\n{" " * indent}')
  119. elif name in ('param', 'int', 'kword', 'sqstr', 'dqstr', 'empty'):
  120. output.append(m.group(0))
  121. elif name in ('lend', 'dend', 'tend'):
  122. indent -= 4
  123. output.append(m.group(0))
  124. elif name in ('sep',):
  125. output.append(f'{m.group(1)}\n{" " * indent}')
  126. elif name in ('dsep',):
  127. output.append(f'{m.group(1)} ')
  128. elif name in ('psep'):
  129. output.append(f' {m.group(1)} ')
  130. break
  131. # We shouldn't hit this, but if we do, store unrecognized character
  132. if m is None: # pragma: no cover
  133. output.append(sel[index])
  134. index += 1
  135. return ''.join(output)