gpos.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439
  1. import logging
  2. import os
  3. from collections import defaultdict, namedtuple
  4. from dataclasses import dataclass
  5. from functools import cached_property, reduce
  6. from itertools import chain
  7. from math import log2
  8. from typing import DefaultDict, Dict, Iterable, List, Sequence, Tuple
  9. from fontTools.config import OPTIONS
  10. from fontTools.misc.intTools import bit_count, bit_indices
  11. from fontTools.ttLib import TTFont
  12. from fontTools.ttLib.tables import otBase, otTables
  13. log = logging.getLogger(__name__)
  14. COMPRESSION_LEVEL = OPTIONS[f"{__name__}:COMPRESSION_LEVEL"]
  15. # Kept because ufo2ft depends on it, to be removed once ufo2ft uses the config instead
  16. # https://github.com/fonttools/fonttools/issues/2592
  17. GPOS_COMPACT_MODE_ENV_KEY = "FONTTOOLS_GPOS_COMPACT_MODE"
  18. GPOS_COMPACT_MODE_DEFAULT = str(COMPRESSION_LEVEL.default)
  19. def _compression_level_from_env() -> int:
  20. env_level = GPOS_COMPACT_MODE_DEFAULT
  21. if GPOS_COMPACT_MODE_ENV_KEY in os.environ:
  22. import warnings
  23. warnings.warn(
  24. f"'{GPOS_COMPACT_MODE_ENV_KEY}' environment variable is deprecated. "
  25. "Please set the 'fontTools.otlLib.optimize.gpos:COMPRESSION_LEVEL' option "
  26. "in TTFont.cfg.",
  27. DeprecationWarning,
  28. )
  29. env_level = os.environ[GPOS_COMPACT_MODE_ENV_KEY]
  30. if len(env_level) == 1 and env_level in "0123456789":
  31. return int(env_level)
  32. raise ValueError(f"Bad {GPOS_COMPACT_MODE_ENV_KEY}={env_level}")
  33. def compact(font: TTFont, level: int) -> TTFont:
  34. # Ideal plan:
  35. # 1. Find lookups of Lookup Type 2: Pair Adjustment Positioning Subtable
  36. # https://docs.microsoft.com/en-us/typography/opentype/spec/gpos#lookup-type-2-pair-adjustment-positioning-subtable
  37. # 2. Extract glyph-glyph kerning and class-kerning from all present subtables
  38. # 3. Regroup into different subtable arrangements
  39. # 4. Put back into the lookup
  40. #
  41. # Actual implementation:
  42. # 2. Only class kerning is optimized currently
  43. # 3. If the input kerning is already in several subtables, the subtables
  44. # are not grouped together first; instead each subtable is treated
  45. # independently, so currently this step is:
  46. # Split existing subtables into more smaller subtables
  47. gpos = font.get("GPOS")
  48. # If the font does not contain a GPOS table, there is nothing to do.
  49. if gpos is None:
  50. return font
  51. for lookup in gpos.table.LookupList.Lookup:
  52. if lookup.LookupType == 2:
  53. compact_lookup(font, level, lookup)
  54. elif lookup.LookupType == 9 and lookup.SubTable[0].ExtensionLookupType == 2:
  55. compact_ext_lookup(font, level, lookup)
  56. return font
  57. def compact_lookup(font: TTFont, level: int, lookup: otTables.Lookup) -> None:
  58. new_subtables = compact_pair_pos(font, level, lookup.SubTable)
  59. lookup.SubTable = new_subtables
  60. lookup.SubTableCount = len(new_subtables)
  61. def compact_ext_lookup(font: TTFont, level: int, lookup: otTables.Lookup) -> None:
  62. new_subtables = compact_pair_pos(
  63. font, level, [ext_subtable.ExtSubTable for ext_subtable in lookup.SubTable]
  64. )
  65. new_ext_subtables = []
  66. for subtable in new_subtables:
  67. ext_subtable = otTables.ExtensionPos()
  68. ext_subtable.Format = 1
  69. ext_subtable.ExtSubTable = subtable
  70. new_ext_subtables.append(ext_subtable)
  71. lookup.SubTable = new_ext_subtables
  72. lookup.SubTableCount = len(new_ext_subtables)
  73. def compact_pair_pos(
  74. font: TTFont, level: int, subtables: Sequence[otTables.PairPos]
  75. ) -> Sequence[otTables.PairPos]:
  76. new_subtables = []
  77. for subtable in subtables:
  78. if subtable.Format == 1:
  79. # Not doing anything to Format 1 (yet?)
  80. new_subtables.append(subtable)
  81. elif subtable.Format == 2:
  82. new_subtables.extend(compact_class_pairs(font, level, subtable))
  83. return new_subtables
  84. def compact_class_pairs(
  85. font: TTFont, level: int, subtable: otTables.PairPos
  86. ) -> List[otTables.PairPos]:
  87. from fontTools.otlLib.builder import buildPairPosClassesSubtable
  88. subtables = []
  89. classes1: DefaultDict[int, List[str]] = defaultdict(list)
  90. for g in subtable.Coverage.glyphs:
  91. classes1[subtable.ClassDef1.classDefs.get(g, 0)].append(g)
  92. classes2: DefaultDict[int, List[str]] = defaultdict(list)
  93. for g, i in subtable.ClassDef2.classDefs.items():
  94. classes2[i].append(g)
  95. all_pairs = {}
  96. for i, class1 in enumerate(subtable.Class1Record):
  97. for j, class2 in enumerate(class1.Class2Record):
  98. if is_really_zero(class2):
  99. continue
  100. all_pairs[(tuple(sorted(classes1[i])), tuple(sorted(classes2[j])))] = (
  101. getattr(class2, "Value1", None),
  102. getattr(class2, "Value2", None),
  103. )
  104. grouped_pairs = cluster_pairs_by_class2_coverage_custom_cost(font, all_pairs, level)
  105. for pairs in grouped_pairs:
  106. subtables.append(buildPairPosClassesSubtable(pairs, font.getReverseGlyphMap()))
  107. return subtables
  108. def is_really_zero(class2: otTables.Class2Record) -> bool:
  109. v1 = getattr(class2, "Value1", None)
  110. v2 = getattr(class2, "Value2", None)
  111. return (v1 is None or v1.getEffectiveFormat() == 0) and (
  112. v2 is None or v2.getEffectiveFormat() == 0
  113. )
  114. Pairs = Dict[
  115. Tuple[Tuple[str, ...], Tuple[str, ...]],
  116. Tuple[otBase.ValueRecord, otBase.ValueRecord],
  117. ]
  118. # Adapted from https://github.com/fonttools/fonttools/blob/f64f0b42f2d1163b2d85194e0979def539f5dca3/Lib/fontTools/ttLib/tables/otTables.py#L935-L958
  119. def _getClassRanges(glyphIDs: Iterable[int]):
  120. glyphIDs = sorted(glyphIDs)
  121. last = glyphIDs[0]
  122. ranges = [[last]]
  123. for glyphID in glyphIDs[1:]:
  124. if glyphID != last + 1:
  125. ranges[-1].append(last)
  126. ranges.append([glyphID])
  127. last = glyphID
  128. ranges[-1].append(last)
  129. return ranges, glyphIDs[0], glyphIDs[-1]
  130. # Adapted from https://github.com/fonttools/fonttools/blob/f64f0b42f2d1163b2d85194e0979def539f5dca3/Lib/fontTools/ttLib/tables/otTables.py#L960-L989
  131. def _classDef_bytes(
  132. class_data: List[Tuple[List[Tuple[int, int]], int, int]],
  133. class_ids: List[int],
  134. coverage=False,
  135. ):
  136. if not class_ids:
  137. return 0
  138. first_ranges, min_glyph_id, max_glyph_id = class_data[class_ids[0]]
  139. range_count = len(first_ranges)
  140. for i in class_ids[1:]:
  141. data = class_data[i]
  142. range_count += len(data[0])
  143. min_glyph_id = min(min_glyph_id, data[1])
  144. max_glyph_id = max(max_glyph_id, data[2])
  145. glyphCount = max_glyph_id - min_glyph_id + 1
  146. # https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#class-definition-table-format-1
  147. format1_bytes = 6 + glyphCount * 2
  148. # https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#class-definition-table-format-2
  149. format2_bytes = 4 + range_count * 6
  150. return min(format1_bytes, format2_bytes)
  151. ClusteringContext = namedtuple(
  152. "ClusteringContext",
  153. [
  154. "lines",
  155. "all_class1",
  156. "all_class1_data",
  157. "all_class2_data",
  158. "valueFormat1_bytes",
  159. "valueFormat2_bytes",
  160. ],
  161. )
  162. @dataclass
  163. class Cluster:
  164. ctx: ClusteringContext
  165. indices_bitmask: int
  166. @cached_property
  167. def indices(self):
  168. return bit_indices(self.indices_bitmask)
  169. @cached_property
  170. def column_indices(self):
  171. # Indices of columns that have a 1 in at least 1 line
  172. # => binary OR all the lines
  173. bitmask = reduce(int.__or__, (self.ctx.lines[i] for i in self.indices))
  174. return bit_indices(bitmask)
  175. @property
  176. def width(self):
  177. # Add 1 because Class2=0 cannot be used but needs to be encoded.
  178. return len(self.column_indices) + 1
  179. @cached_property
  180. def cost(self):
  181. return (
  182. # 2 bytes to store the offset to this subtable in the Lookup table above
  183. 2
  184. # Contents of the subtable
  185. # From: https://docs.microsoft.com/en-us/typography/opentype/spec/gpos#pair-adjustment-positioning-format-2-class-pair-adjustment
  186. # uint16 posFormat Format identifier: format = 2
  187. + 2
  188. # Offset16 coverageOffset Offset to Coverage table, from beginning of PairPos subtable.
  189. + 2
  190. + self.coverage_bytes
  191. # uint16 valueFormat1 ValueRecord definition — for the first glyph of the pair (may be zero).
  192. + 2
  193. # uint16 valueFormat2 ValueRecord definition — for the second glyph of the pair (may be zero).
  194. + 2
  195. # Offset16 classDef1Offset Offset to ClassDef table, from beginning of PairPos subtable — for the first glyph of the pair.
  196. + 2
  197. + self.classDef1_bytes
  198. # Offset16 classDef2Offset Offset to ClassDef table, from beginning of PairPos subtable — for the second glyph of the pair.
  199. + 2
  200. + self.classDef2_bytes
  201. # uint16 class1Count Number of classes in classDef1 table — includes Class 0.
  202. + 2
  203. # uint16 class2Count Number of classes in classDef2 table — includes Class 0.
  204. + 2
  205. # Class1Record class1Records[class1Count] Array of Class1 records, ordered by classes in classDef1.
  206. + (self.ctx.valueFormat1_bytes + self.ctx.valueFormat2_bytes)
  207. * len(self.indices)
  208. * self.width
  209. )
  210. @property
  211. def coverage_bytes(self):
  212. format1_bytes = (
  213. # From https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#coverage-format-1
  214. # uint16 coverageFormat Format identifier — format = 1
  215. # uint16 glyphCount Number of glyphs in the glyph array
  216. 4
  217. # uint16 glyphArray[glyphCount] Array of glyph IDs — in numerical order
  218. + sum(len(self.ctx.all_class1[i]) for i in self.indices) * 2
  219. )
  220. ranges = sorted(
  221. chain.from_iterable(self.ctx.all_class1_data[i][0] for i in self.indices)
  222. )
  223. merged_range_count = 0
  224. last = None
  225. for start, end in ranges:
  226. if last is not None and start != last + 1:
  227. merged_range_count += 1
  228. last = end
  229. format2_bytes = (
  230. # From https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#coverage-format-2
  231. # uint16 coverageFormat Format identifier — format = 2
  232. # uint16 rangeCount Number of RangeRecords
  233. 4
  234. # RangeRecord rangeRecords[rangeCount] Array of glyph ranges — ordered by startGlyphID.
  235. # uint16 startGlyphID First glyph ID in the range
  236. # uint16 endGlyphID Last glyph ID in the range
  237. # uint16 startCoverageIndex Coverage Index of first glyph ID in range
  238. + merged_range_count * 6
  239. )
  240. return min(format1_bytes, format2_bytes)
  241. @property
  242. def classDef1_bytes(self):
  243. # We can skip encoding one of the Class1 definitions, and use
  244. # Class1=0 to represent it instead, because Class1 is gated by the
  245. # Coverage definition. Use Class1=0 for the highest byte savings.
  246. # Going through all options takes too long, pick the biggest class
  247. # = what happens in otlLib.builder.ClassDefBuilder.classes()
  248. biggest_index = max(self.indices, key=lambda i: len(self.ctx.all_class1[i]))
  249. return _classDef_bytes(
  250. self.ctx.all_class1_data, [i for i in self.indices if i != biggest_index]
  251. )
  252. @property
  253. def classDef2_bytes(self):
  254. # All Class2 need to be encoded because we can't use Class2=0
  255. return _classDef_bytes(self.ctx.all_class2_data, self.column_indices)
  256. def cluster_pairs_by_class2_coverage_custom_cost(
  257. font: TTFont,
  258. pairs: Pairs,
  259. compression: int = 5,
  260. ) -> List[Pairs]:
  261. if not pairs:
  262. # The subtable was actually empty?
  263. return [pairs]
  264. # Sorted for reproducibility/determinism
  265. all_class1 = sorted(set(pair[0] for pair in pairs))
  266. all_class2 = sorted(set(pair[1] for pair in pairs))
  267. # Use Python's big ints for binary vectors representing each line
  268. lines = [
  269. sum(
  270. 1 << i if (class1, class2) in pairs else 0
  271. for i, class2 in enumerate(all_class2)
  272. )
  273. for class1 in all_class1
  274. ]
  275. # Map glyph names to ids and work with ints throughout for ClassDef formats
  276. name_to_id = font.getReverseGlyphMap()
  277. # Each entry in the arrays below is (range_count, min_glyph_id, max_glyph_id)
  278. all_class1_data = [
  279. _getClassRanges(name_to_id[name] for name in cls) for cls in all_class1
  280. ]
  281. all_class2_data = [
  282. _getClassRanges(name_to_id[name] for name in cls) for cls in all_class2
  283. ]
  284. format1 = 0
  285. format2 = 0
  286. for pair, value in pairs.items():
  287. format1 |= value[0].getEffectiveFormat() if value[0] else 0
  288. format2 |= value[1].getEffectiveFormat() if value[1] else 0
  289. valueFormat1_bytes = bit_count(format1) * 2
  290. valueFormat2_bytes = bit_count(format2) * 2
  291. ctx = ClusteringContext(
  292. lines,
  293. all_class1,
  294. all_class1_data,
  295. all_class2_data,
  296. valueFormat1_bytes,
  297. valueFormat2_bytes,
  298. )
  299. cluster_cache: Dict[int, Cluster] = {}
  300. def make_cluster(indices: int) -> Cluster:
  301. cluster = cluster_cache.get(indices, None)
  302. if cluster is not None:
  303. return cluster
  304. cluster = Cluster(ctx, indices)
  305. cluster_cache[indices] = cluster
  306. return cluster
  307. def merge(cluster: Cluster, other: Cluster) -> Cluster:
  308. return make_cluster(cluster.indices_bitmask | other.indices_bitmask)
  309. # Agglomerative clustering by hand, checking the cost gain of the new
  310. # cluster against the previously separate clusters
  311. # Start with 1 cluster per line
  312. # cluster = set of lines = new subtable
  313. clusters = [make_cluster(1 << i) for i in range(len(lines))]
  314. # Cost of 1 cluster with everything
  315. # `(1 << len) - 1` gives a bitmask full of 1's of length `len`
  316. cost_before_splitting = make_cluster((1 << len(lines)) - 1).cost
  317. log.debug(f" len(clusters) = {len(clusters)}")
  318. while len(clusters) > 1:
  319. lowest_cost_change = None
  320. best_cluster_index = None
  321. best_other_index = None
  322. best_merged = None
  323. for i, cluster in enumerate(clusters):
  324. for j, other in enumerate(clusters[i + 1 :]):
  325. merged = merge(cluster, other)
  326. cost_change = merged.cost - cluster.cost - other.cost
  327. if lowest_cost_change is None or cost_change < lowest_cost_change:
  328. lowest_cost_change = cost_change
  329. best_cluster_index = i
  330. best_other_index = i + 1 + j
  331. best_merged = merged
  332. assert lowest_cost_change is not None
  333. assert best_cluster_index is not None
  334. assert best_other_index is not None
  335. assert best_merged is not None
  336. # If the best merge we found is still taking down the file size, then
  337. # there's no question: we must do it, because it's beneficial in both
  338. # ways (lower file size and lower number of subtables). However, if the
  339. # best merge we found is not reducing file size anymore, then we need to
  340. # look at the other stop criteria = the compression factor.
  341. if lowest_cost_change > 0:
  342. # Stop critera: check whether we should keep merging.
  343. # Compute size reduction brought by splitting
  344. cost_after_splitting = sum(c.cost for c in clusters)
  345. # size_reduction so that after = before * (1 - size_reduction)
  346. # E.g. before = 1000, after = 800, 1 - 800/1000 = 0.2
  347. size_reduction = 1 - cost_after_splitting / cost_before_splitting
  348. # Force more merging by taking into account the compression number.
  349. # Target behaviour: compression number = 1 to 9, default 5 like gzip
  350. # - 1 = accept to add 1 subtable to reduce size by 50%
  351. # - 5 = accept to add 5 subtables to reduce size by 50%
  352. # See https://github.com/harfbuzz/packtab/blob/master/Lib/packTab/__init__.py#L690-L691
  353. # Given the size reduction we have achieved so far, compute how many
  354. # new subtables are acceptable.
  355. max_new_subtables = -log2(1 - size_reduction) * compression
  356. log.debug(
  357. f" len(clusters) = {len(clusters):3d} size_reduction={size_reduction:5.2f} max_new_subtables={max_new_subtables}",
  358. )
  359. if compression == 9:
  360. # Override level 9 to mean: create any number of subtables
  361. max_new_subtables = len(clusters)
  362. # If we have managed to take the number of new subtables below the
  363. # threshold, then we can stop.
  364. if len(clusters) <= max_new_subtables + 1:
  365. break
  366. # No reason to stop yet, do the merge and move on to the next.
  367. del clusters[best_other_index]
  368. clusters[best_cluster_index] = best_merged
  369. # All clusters are final; turn bitmasks back into the "Pairs" format
  370. pairs_by_class1: Dict[Tuple[str, ...], Pairs] = defaultdict(dict)
  371. for pair, values in pairs.items():
  372. pairs_by_class1[pair[0]][pair] = values
  373. pairs_groups: List[Pairs] = []
  374. for cluster in clusters:
  375. pairs_group: Pairs = dict()
  376. for i in cluster.indices:
  377. class1 = all_class1[i]
  378. pairs_group.update(pairs_by_class1[class1])
  379. pairs_groups.append(pairs_group)
  380. return pairs_groups