cells.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352
  1. from __future__ import annotations
  2. from functools import lru_cache
  3. from operator import itemgetter
  4. from typing import Callable, NamedTuple, Sequence, Tuple
  5. from rich._unicode_data import load as load_cell_table
  6. CellSpan = Tuple[int, int, int]
  7. _span_get_cell_len = itemgetter(2)
  8. # Ranges of unicode ordinals that produce a 1-cell wide character
  9. # This is non-exhaustive, but covers most common Western characters
  10. _SINGLE_CELL_UNICODE_RANGES: list[tuple[int, int]] = [
  11. (0x20, 0x7E), # Latin (excluding non-printable)
  12. (0xA0, 0xAC),
  13. (0xAE, 0x002FF),
  14. (0x00370, 0x00482), # Greek / Cyrillic
  15. (0x02500, 0x025FC), # Box drawing, box elements, geometric shapes
  16. (0x02800, 0x028FF), # Braille
  17. ]
  18. # A frozen set of characters that are a single cell wide
  19. _SINGLE_CELLS = frozenset(
  20. [
  21. character
  22. for _start, _end in _SINGLE_CELL_UNICODE_RANGES
  23. for character in map(chr, range(_start, _end + 1))
  24. ]
  25. )
  26. # When called with a string this will return True if all
  27. # characters are single-cell, otherwise False
  28. _is_single_cell_widths: Callable[[str], bool] = _SINGLE_CELLS.issuperset
  29. class CellTable(NamedTuple):
  30. """Contains unicode data required to measure the cell widths of glyphs."""
  31. unicode_version: str
  32. widths: Sequence[tuple[int, int, int]]
  33. narrow_to_wide: frozenset[str]
  34. @lru_cache(maxsize=4096)
  35. def get_character_cell_size(character: str, unicode_version: str = "auto") -> int:
  36. """Get the cell size of a character.
  37. Args:
  38. character (str): A single character.
  39. unicode_version: Unicode version, `"auto"` to auto detect, `"latest"` for the latest unicode version.
  40. Returns:
  41. int: Number of cells (0, 1 or 2) occupied by that character.
  42. """
  43. codepoint = ord(character)
  44. if codepoint and codepoint < 32 or 0x07F <= codepoint < 0x0A0:
  45. return 0
  46. table = load_cell_table(unicode_version).widths
  47. last_entry = table[-1]
  48. if codepoint > last_entry[1]:
  49. return 1
  50. lower_bound = 0
  51. upper_bound = len(table) - 1
  52. while lower_bound <= upper_bound:
  53. index = (lower_bound + upper_bound) >> 1
  54. start, end, width = table[index]
  55. if codepoint < start:
  56. upper_bound = index - 1
  57. elif codepoint > end:
  58. lower_bound = index + 1
  59. else:
  60. return width
  61. return 1
  62. @lru_cache(4096)
  63. def cached_cell_len(text: str, unicode_version: str = "auto") -> int:
  64. """Get the number of cells required to display text.
  65. This method always caches, which may use up a lot of memory. It is recommended to use
  66. `cell_len` over this method.
  67. Args:
  68. text (str): Text to display.
  69. unicode_version: Unicode version, `"auto"` to auto detect, `"latest"` for the latest unicode version.
  70. Returns:
  71. int: Get the number of cells required to display text.
  72. """
  73. return _cell_len(text, unicode_version)
  74. def cell_len(text: str, unicode_version: str = "auto") -> int:
  75. """Get the cell length of a string (length as it appears in the terminal).
  76. Args:
  77. text: String to measure.
  78. unicode_version: Unicode version, `"auto"` to auto detect, `"latest"` for the latest unicode version.
  79. Returns:
  80. Length of string in terminal cells.
  81. """
  82. if len(text) < 512:
  83. return cached_cell_len(text, unicode_version)
  84. return _cell_len(text, unicode_version)
  85. def _cell_len(text: str, unicode_version: str) -> int:
  86. """Get the cell length of a string (length as it appears in the terminal).
  87. Args:
  88. text: String to measure.
  89. unicode_version: Unicode version, `"auto"` to auto detect, `"latest"` for the latest unicode version.
  90. Returns:
  91. Length of string in terminal cells.
  92. """
  93. if _is_single_cell_widths(text):
  94. return len(text)
  95. # "\u200d" is zero width joiner
  96. # "\ufe0f" is variation selector 16
  97. if "\u200d" not in text and "\ufe0f" not in text:
  98. # Simplest case with no unicode stuff that changes the size
  99. return sum(
  100. get_character_cell_size(character, unicode_version) for character in text
  101. )
  102. cell_table = load_cell_table(unicode_version)
  103. total_width = 0
  104. last_measured_character: str | None = None
  105. SPECIAL = {"\u200d", "\ufe0f"}
  106. index = 0
  107. character_count = len(text)
  108. while index < character_count:
  109. character = text[index]
  110. if character in SPECIAL:
  111. if character == "\u200d":
  112. index += 1
  113. elif last_measured_character:
  114. total_width += last_measured_character in cell_table.narrow_to_wide
  115. last_measured_character = None
  116. else:
  117. if character_width := get_character_cell_size(character, unicode_version):
  118. last_measured_character = character
  119. total_width += character_width
  120. index += 1
  121. return total_width
  122. def split_graphemes(
  123. text: str, unicode_version: str = "auto"
  124. ) -> "tuple[list[CellSpan], int]":
  125. """Divide text into spans that define a single grapheme, and additionally return the cell length of the whole string.
  126. The returned spans will cover every index in the string, with no gaps. It is possible for some graphemes to have a cell length of zero.
  127. This can occur for nonsense strings like two zero width joiners, or for control codes that don't contribute to the grapheme size.
  128. Args:
  129. text: String to split.
  130. unicode_version: Unicode version, `"auto"` to auto detect, `"latest"` for the latest unicode version.
  131. Returns:
  132. A tuple of a list of *spans* and the cell length of the entire string. A span is a list of tuples
  133. of three values consisting of (<START>, <END>, <CELL LENGTH>), where START and END are string indices,
  134. and CELL LENGTH is the cell length of the single grapheme.
  135. """
  136. cell_table = load_cell_table(unicode_version)
  137. codepoint_count = len(text)
  138. index = 0
  139. last_measured_character: str | None = None
  140. total_width = 0
  141. spans: list[tuple[int, int, int]] = []
  142. SPECIAL = {"\u200d", "\ufe0f"}
  143. while index < codepoint_count:
  144. if (character := text[index]) in SPECIAL:
  145. if not spans:
  146. # ZWJ or variation selector at the beginning of the string doesn't really make sense.
  147. # But handle it, we must.
  148. spans.append((index, index := index + 1, 0))
  149. continue
  150. if character == "\u200d":
  151. # zero width joiner
  152. # The condition handles the case where a ZWJ is at the end of the string, and has nothing to join
  153. index += 2 if index < (codepoint_count - 1) else 1
  154. start, _end, cell_length = spans[-1]
  155. spans[-1] = (start, index, cell_length)
  156. else:
  157. # variation selector 16
  158. index += 1
  159. if last_measured_character:
  160. start, _end, cell_length = spans[-1]
  161. if last_measured_character in cell_table.narrow_to_wide:
  162. last_measured_character = None
  163. cell_length += 1
  164. total_width += 1
  165. spans[-1] = (start, index, cell_length)
  166. else:
  167. # No previous character to change the size of.
  168. # Shouldn't occur in practice.
  169. # But handle it, we must.
  170. start, _end, cell_length = spans[-1]
  171. spans[-1] = (start, index, cell_length)
  172. continue
  173. if character_width := get_character_cell_size(character, unicode_version):
  174. last_measured_character = character
  175. spans.append((index, index := index + 1, character_width))
  176. total_width += character_width
  177. else:
  178. # Character has zero width
  179. if spans:
  180. # zero width characters are associated with the previous character
  181. start, _end, cell_length = spans[-1]
  182. spans[-1] = (start, index := index + 1, cell_length)
  183. else:
  184. # A zero width character with no prior spans
  185. spans.append((index, index := index + 1, 0))
  186. return (spans, total_width)
  187. def _split_text(
  188. text: str, cell_position: int, unicode_version: str = "auto"
  189. ) -> tuple[str, str]:
  190. """Split text by cell position.
  191. If the cell position falls within a double width character, it is converted to two spaces.
  192. Args:
  193. text: Text to split.
  194. cell_position Offset in cells.
  195. unicode_version: Unicode version, `"auto"` to auto detect, `"latest"` for the latest unicode version.
  196. Returns:
  197. Tuple to two split strings.
  198. """
  199. if cell_position <= 0:
  200. return "", text
  201. spans, cell_length = split_graphemes(text, unicode_version)
  202. # Guess initial offset
  203. offset = int((cell_position / cell_length) * len(spans))
  204. left_size = sum(map(_span_get_cell_len, spans[:offset]))
  205. while True:
  206. if left_size == cell_position:
  207. if offset >= len(spans):
  208. return text, ""
  209. split_index = spans[offset][0]
  210. return text[:split_index], text[split_index:]
  211. if left_size < cell_position:
  212. start, end, cell_size = spans[offset]
  213. if left_size + cell_size > cell_position:
  214. return text[:start] + " ", " " + text[end:]
  215. offset += 1
  216. left_size += cell_size
  217. else: # left_size > cell_position
  218. start, end, cell_size = spans[offset - 1]
  219. if left_size - cell_size < cell_position:
  220. return text[:start] + " ", " " + text[end:]
  221. offset -= 1
  222. left_size -= cell_size
  223. def split_text(
  224. text: str, cell_position: int, unicode_version: str = "auto"
  225. ) -> tuple[str, str]:
  226. """Split text by cell position.
  227. If the cell position falls within a double width character, it is converted to two spaces.
  228. Args:
  229. text: Text to split.
  230. cell_position Offset in cells.
  231. unicode_version: Unicode version, `"auto"` to auto detect, `"latest"` for the latest unicode version.
  232. Returns:
  233. Tuple to two split strings.
  234. """
  235. if _is_single_cell_widths(text):
  236. return text[:cell_position], text[cell_position:]
  237. return _split_text(text, cell_position, unicode_version)
  238. def set_cell_size(text: str, total: int, unicode_version: str = "auto") -> str:
  239. """Adjust a string by cropping or padding with spaces such that it fits within the given number of cells.
  240. Args:
  241. text: String to adjust.
  242. total: Desired size in cells.
  243. unicode_version: Unicode version.
  244. Returns:
  245. A string with cell size equal to total.
  246. """
  247. if _is_single_cell_widths(text):
  248. size = len(text)
  249. if size < total:
  250. return text + " " * (total - size)
  251. return text[:total]
  252. if total <= 0:
  253. return ""
  254. cell_size = cell_len(text)
  255. if cell_size == total:
  256. return text
  257. if cell_size < total:
  258. return text + " " * (total - cell_size)
  259. text, _ = _split_text(text, total, unicode_version)
  260. return text
  261. def chop_cells(text: str, width: int, unicode_version: str = "auto") -> list[str]:
  262. """Split text into lines such that each line fits within the available (cell) width.
  263. Args:
  264. text: The text to fold such that it fits in the given width.
  265. width: The width available (number of cells).
  266. Returns:
  267. A list of strings such that each string in the list has cell width
  268. less than or equal to the available width.
  269. """
  270. if _is_single_cell_widths(text):
  271. return [text[index : index + width] for index in range(0, len(text), width)]
  272. spans, _ = split_graphemes(text, unicode_version)
  273. line_size = 0 # Size of line in cells
  274. lines: list[str] = []
  275. line_offset = 0 # Offset (in codepoints) of start of line
  276. for start, end, cell_size in spans:
  277. if line_size + cell_size > width:
  278. lines.append(text[line_offset:start])
  279. line_offset = start
  280. line_size = 0
  281. line_size += cell_size
  282. if line_size:
  283. lines.append(text[line_offset:])
  284. return lines