xmlWriter.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. """xmlWriter.py -- Simple XML authoring class"""
  2. from __future__ import annotations
  3. from typing import BinaryIO, Callable, TextIO
  4. from fontTools.misc.textTools import byteord, strjoin, tobytes, tostr
  5. import sys
  6. import os
  7. import string
  8. import logging
  9. import itertools
  10. INDENT = " "
  11. TTX_LOG = logging.getLogger("fontTools.ttx")
  12. REPLACEMENT = "?"
  13. ILLEGAL_XML_CHARS = dict.fromkeys(
  14. itertools.chain(
  15. range(0x00, 0x09),
  16. (0x0B, 0x0C),
  17. range(0x0E, 0x20),
  18. range(0xD800, 0xE000),
  19. (0xFFFE, 0xFFFF),
  20. ),
  21. REPLACEMENT,
  22. )
  23. class XMLWriter(object):
  24. def __init__(
  25. self,
  26. fileOrPath: str | os.PathLike[str] | BinaryIO | TextIO,
  27. indentwhite: str = INDENT,
  28. idlefunc: Callable[[], None] | None = None,
  29. encoding: str = "utf_8",
  30. newlinestr: str | bytes = "\n",
  31. ) -> None:
  32. if encoding.lower().replace("-", "").replace("_", "") != "utf8":
  33. raise Exception("Only UTF-8 encoding is supported.")
  34. if fileOrPath == "-":
  35. fileOrPath = sys.stdout
  36. self.filename: str | os.PathLike[str] | None
  37. if not hasattr(fileOrPath, "write"):
  38. if not isinstance(fileOrPath, (str, os.PathLike)):
  39. raise TypeError(
  40. "fileOrPath must be a file path (str or PathLike) if it isn't an object with a `write` method."
  41. )
  42. self.filename = fileOrPath
  43. self.file = open(fileOrPath, "wb")
  44. self._closeStream = True
  45. else:
  46. self.filename = None
  47. # assume writable file object
  48. self.file = fileOrPath
  49. self._closeStream = False
  50. # Figure out if writer expects bytes or unicodes
  51. try:
  52. # The bytes check should be first. See:
  53. # https://github.com/fonttools/fonttools/pull/233
  54. self.file.write(b"")
  55. self.totype = tobytes
  56. except TypeError:
  57. # This better not fail.
  58. self.file.write("")
  59. self.totype = tostr
  60. self.indentwhite = self.totype(indentwhite)
  61. if newlinestr is None:
  62. self.newlinestr = self.totype(os.linesep)
  63. else:
  64. self.newlinestr = self.totype(newlinestr)
  65. self.indentlevel = 0
  66. self.stack = []
  67. self.needindent = 1
  68. self.idlefunc = idlefunc
  69. self.idlecounter = 0
  70. self._writeraw('<?xml version="1.0" encoding="UTF-8"?>')
  71. self.newline()
  72. def __enter__(self):
  73. return self
  74. def __exit__(self, exception_type, exception_value, traceback):
  75. self.close()
  76. def close(self) -> None:
  77. if self._closeStream:
  78. assert not isinstance(self.file, (str, os.PathLike))
  79. self.file.close()
  80. def write(self, string, indent=True):
  81. """Writes text."""
  82. self._writeraw(escape(string), indent=indent)
  83. def writecdata(self, string):
  84. """Writes text in a CDATA section."""
  85. self._writeraw("<![CDATA[" + string + "]]>")
  86. def write8bit(self, data, strip=False):
  87. """Writes a bytes() sequence into the XML, escaping
  88. non-ASCII bytes. When this is read in xmlReader,
  89. the original bytes can be recovered by encoding to
  90. 'latin-1'."""
  91. self._writeraw(escape8bit(data), strip=strip)
  92. def write_noindent(self, string):
  93. """Writes text without indentation."""
  94. self._writeraw(escape(string), indent=False)
  95. def _writeraw(self, data, indent=True, strip=False):
  96. """Writes bytes, possibly indented."""
  97. if indent and self.needindent:
  98. self.file.write(self.indentlevel * self.indentwhite)
  99. self.needindent = 0
  100. s = self.totype(data, encoding="utf_8")
  101. if strip:
  102. s = s.strip()
  103. self.file.write(s)
  104. def newline(self):
  105. self.file.write(self.newlinestr)
  106. self.needindent = 1
  107. idlecounter = self.idlecounter
  108. if not idlecounter % 100 and self.idlefunc is not None:
  109. self.idlefunc()
  110. self.idlecounter = idlecounter + 1
  111. def comment(self, data):
  112. data = escape(data)
  113. lines = data.split("\n")
  114. self._writeraw("<!-- " + lines[0])
  115. for line in lines[1:]:
  116. self.newline()
  117. self._writeraw(" " + line)
  118. self._writeraw(" -->")
  119. def simpletag(self, _TAG_, *args, **kwargs):
  120. attrdata = self.stringifyattrs(*args, **kwargs)
  121. data = "<%s%s/>" % (_TAG_, attrdata)
  122. self._writeraw(data)
  123. def begintag(self, _TAG_, *args, **kwargs):
  124. attrdata = self.stringifyattrs(*args, **kwargs)
  125. data = "<%s%s>" % (_TAG_, attrdata)
  126. self._writeraw(data)
  127. self.stack.append(_TAG_)
  128. self.indent()
  129. def endtag(self, _TAG_):
  130. assert self.stack and self.stack[-1] == _TAG_, "nonmatching endtag"
  131. del self.stack[-1]
  132. self.dedent()
  133. data = "</%s>" % _TAG_
  134. self._writeraw(data)
  135. def dumphex(self, data):
  136. linelength = 16
  137. hexlinelength = linelength * 2
  138. chunksize = 8
  139. for i in range(0, len(data), linelength):
  140. hexline = hexStr(data[i : i + linelength])
  141. line = ""
  142. white = ""
  143. for j in range(0, hexlinelength, chunksize):
  144. line = line + white + hexline[j : j + chunksize]
  145. white = " "
  146. self._writeraw(line)
  147. self.newline()
  148. def indent(self):
  149. self.indentlevel = self.indentlevel + 1
  150. def dedent(self):
  151. assert self.indentlevel > 0
  152. self.indentlevel = self.indentlevel - 1
  153. def stringifyattrs(self, *args, **kwargs):
  154. if kwargs:
  155. assert not args
  156. attributes = sorted(kwargs.items())
  157. elif args:
  158. assert len(args) == 1
  159. attributes = args[0]
  160. else:
  161. return ""
  162. data = ""
  163. for attr, value in attributes:
  164. if not isinstance(value, (bytes, str)):
  165. value = str(value)
  166. data = data + ' %s="%s"' % (attr, escapeattr(value))
  167. return data
  168. def escape(data):
  169. """Escape characters not allowed in `XML 1.0 <https://www.w3.org/TR/xml/#NT-Char>`_."""
  170. data = tostr(data, "utf_8")
  171. data = data.replace("&", "&amp;")
  172. data = data.replace("<", "&lt;")
  173. data = data.replace(">", "&gt;")
  174. data = data.replace("\r", "&#13;")
  175. newData = data.translate(ILLEGAL_XML_CHARS)
  176. if newData != data:
  177. maxLen = 10
  178. preview = repr(data)
  179. if len(data) > maxLen:
  180. preview = repr(data[:maxLen])[1:-1] + "..."
  181. TTX_LOG.warning(
  182. "Illegal XML character(s) found; replacing offending string %r with %r",
  183. preview,
  184. REPLACEMENT,
  185. )
  186. return newData
  187. def escapeattr(data):
  188. data = escape(data)
  189. data = data.replace('"', "&quot;")
  190. return data
  191. def escape8bit(data):
  192. """Input is Unicode string."""
  193. def escapechar(c):
  194. n = ord(c)
  195. if 32 <= n <= 127 and c not in "<&>":
  196. return c
  197. else:
  198. return "&#" + repr(n) + ";"
  199. return strjoin(map(escapechar, data.decode("latin-1")))
  200. def hexStr(s):
  201. h = string.hexdigits
  202. r = ""
  203. for c in s:
  204. i = byteord(c)
  205. r = r + h[(i >> 4) & 0xF] + h[i & 0xF]
  206. return r