filenames.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356
  1. from __future__ import annotations
  2. from collections.abc import Iterable
  3. """
  4. Convert user-provided internal UFO names to spec-compliant filenames.
  5. This module implements the algorithm for converting between a "user name" -
  6. something that a user can choose arbitrarily inside a font editor - and a file
  7. name suitable for use in a wide range of operating systems and filesystems.
  8. The `UFO 3 specification <http://unifiedfontobject.org/versions/ufo3/conventions/>`_
  9. provides an example of an algorithm for such conversion, which avoids illegal
  10. characters, reserved file names, ambiguity between upper- and lower-case
  11. characters, and clashes with existing files.
  12. This code was originally copied from
  13. `ufoLib <https://github.com/unified-font-object/ufoLib/blob/8747da7/Lib/ufoLib/filenames.py>`_
  14. by Tal Leming and is copyright (c) 2005-2016, The RoboFab Developers:
  15. - Erik van Blokland
  16. - Tal Leming
  17. - Just van Rossum
  18. """
  19. # Restrictions are taken mostly from
  20. # https://docs.microsoft.com/en-gb/windows/win32/fileio/naming-a-file#naming-conventions.
  21. #
  22. # 1. Integer value zero, sometimes referred to as the ASCII NUL character.
  23. # 2. Characters whose integer representations are in the range 1 to 31,
  24. # inclusive.
  25. # 3. Various characters that (mostly) Windows and POSIX-y filesystems don't
  26. # allow, plus "(" and ")", as per the specification.
  27. illegalCharacters: set[str] = {
  28. "\x00",
  29. "\x01",
  30. "\x02",
  31. "\x03",
  32. "\x04",
  33. "\x05",
  34. "\x06",
  35. "\x07",
  36. "\x08",
  37. "\t",
  38. "\n",
  39. "\x0b",
  40. "\x0c",
  41. "\r",
  42. "\x0e",
  43. "\x0f",
  44. "\x10",
  45. "\x11",
  46. "\x12",
  47. "\x13",
  48. "\x14",
  49. "\x15",
  50. "\x16",
  51. "\x17",
  52. "\x18",
  53. "\x19",
  54. "\x1a",
  55. "\x1b",
  56. "\x1c",
  57. "\x1d",
  58. "\x1e",
  59. "\x1f",
  60. '"',
  61. "*",
  62. "+",
  63. "/",
  64. ":",
  65. "<",
  66. ">",
  67. "?",
  68. "[",
  69. "\\",
  70. "]",
  71. "(",
  72. ")",
  73. "|",
  74. "\x7f",
  75. }
  76. reservedFileNames: set[str] = {
  77. "aux",
  78. "clock$",
  79. "com1",
  80. "com2",
  81. "com3",
  82. "com4",
  83. "com5",
  84. "com6",
  85. "com7",
  86. "com8",
  87. "com9",
  88. "con",
  89. "lpt1",
  90. "lpt2",
  91. "lpt3",
  92. "lpt4",
  93. "lpt5",
  94. "lpt6",
  95. "lpt7",
  96. "lpt8",
  97. "lpt9",
  98. "nul",
  99. "prn",
  100. }
  101. maxFileNameLength: int = 255
  102. class NameTranslationError(Exception):
  103. pass
  104. def userNameToFileName(
  105. userName: str, existing: Iterable[str] = (), prefix: str = "", suffix: str = ""
  106. ) -> str:
  107. """Converts from a user name to a file name.
  108. Takes care to avoid illegal characters, reserved file names, ambiguity between
  109. upper- and lower-case characters, and clashes with existing files.
  110. Args:
  111. userName (str): The input file name.
  112. existing: A case-insensitive list of all existing file names.
  113. prefix: Prefix to be prepended to the file name.
  114. suffix: Suffix to be appended to the file name.
  115. Returns:
  116. A suitable filename.
  117. Raises:
  118. NameTranslationError: If no suitable name could be generated.
  119. Examples::
  120. >>> userNameToFileName("a") == "a"
  121. True
  122. >>> userNameToFileName("A") == "A_"
  123. True
  124. >>> userNameToFileName("AE") == "A_E_"
  125. True
  126. >>> userNameToFileName("Ae") == "A_e"
  127. True
  128. >>> userNameToFileName("ae") == "ae"
  129. True
  130. >>> userNameToFileName("aE") == "aE_"
  131. True
  132. >>> userNameToFileName("a.alt") == "a.alt"
  133. True
  134. >>> userNameToFileName("A.alt") == "A_.alt"
  135. True
  136. >>> userNameToFileName("A.Alt") == "A_.A_lt"
  137. True
  138. >>> userNameToFileName("A.aLt") == "A_.aL_t"
  139. True
  140. >>> userNameToFileName(u"A.alT") == "A_.alT_"
  141. True
  142. >>> userNameToFileName("T_H") == "T__H_"
  143. True
  144. >>> userNameToFileName("T_h") == "T__h"
  145. True
  146. >>> userNameToFileName("t_h") == "t_h"
  147. True
  148. >>> userNameToFileName("F_F_I") == "F__F__I_"
  149. True
  150. >>> userNameToFileName("f_f_i") == "f_f_i"
  151. True
  152. >>> userNameToFileName("Aacute_V.swash") == "A_acute_V_.swash"
  153. True
  154. >>> userNameToFileName(".notdef") == "_notdef"
  155. True
  156. >>> userNameToFileName("con") == "_con"
  157. True
  158. >>> userNameToFileName("CON") == "C_O_N_"
  159. True
  160. >>> userNameToFileName("con.alt") == "_con.alt"
  161. True
  162. >>> userNameToFileName("alt.con") == "alt._con"
  163. True
  164. """
  165. # the incoming name must be a string
  166. if not isinstance(userName, str):
  167. raise ValueError("The value for userName must be a string.")
  168. # establish the prefix and suffix lengths
  169. prefixLength = len(prefix)
  170. suffixLength = len(suffix)
  171. # replace an initial period with an _
  172. # if no prefix is to be added
  173. if not prefix and userName[0] == ".":
  174. userName = "_" + userName[1:]
  175. # filter the user name
  176. filteredUserName = []
  177. for character in userName:
  178. # replace illegal characters with _
  179. if character in illegalCharacters:
  180. character = "_"
  181. # add _ to all non-lower characters
  182. elif character != character.lower():
  183. character += "_"
  184. filteredUserName.append(character)
  185. userName = "".join(filteredUserName)
  186. # clip to 255
  187. sliceLength = maxFileNameLength - prefixLength - suffixLength
  188. userName = userName[:sliceLength]
  189. # test for illegal files names
  190. parts = []
  191. for part in userName.split("."):
  192. if part.lower() in reservedFileNames:
  193. part = "_" + part
  194. parts.append(part)
  195. userName = ".".join(parts)
  196. # test for clash
  197. fullName = prefix + userName + suffix
  198. if fullName.lower() in existing:
  199. fullName = handleClash1(userName, existing, prefix, suffix)
  200. # finished
  201. return fullName
  202. def handleClash1(
  203. userName: str, existing: Iterable[str] = [], prefix: str = "", suffix: str = ""
  204. ) -> str:
  205. """A helper function that resolves collisions with existing names when choosing a filename.
  206. This function attempts to append an unused integer counter to the filename.
  207. Args:
  208. userName (str): The input file name.
  209. existing: A case-insensitive list of all existing file names.
  210. prefix: Prefix to be prepended to the file name.
  211. suffix: Suffix to be appended to the file name.
  212. Returns:
  213. A suitable filename.
  214. >>> prefix = ("0" * 5) + "."
  215. >>> suffix = "." + ("0" * 10)
  216. >>> existing = ["a" * 5]
  217. >>> e = list(existing)
  218. >>> handleClash1(userName="A" * 5, existing=e,
  219. ... prefix=prefix, suffix=suffix) == (
  220. ... '00000.AAAAA000000000000001.0000000000')
  221. True
  222. >>> e = list(existing)
  223. >>> e.append(prefix + "aaaaa" + "1".zfill(15) + suffix)
  224. >>> handleClash1(userName="A" * 5, existing=e,
  225. ... prefix=prefix, suffix=suffix) == (
  226. ... '00000.AAAAA000000000000002.0000000000')
  227. True
  228. >>> e = list(existing)
  229. >>> e.append(prefix + "AAAAA" + "2".zfill(15) + suffix)
  230. >>> handleClash1(userName="A" * 5, existing=e,
  231. ... prefix=prefix, suffix=suffix) == (
  232. ... '00000.AAAAA000000000000001.0000000000')
  233. True
  234. """
  235. # if the prefix length + user name length + suffix length + 15 is at
  236. # or past the maximum length, silce 15 characters off of the user name
  237. prefixLength = len(prefix)
  238. suffixLength = len(suffix)
  239. if prefixLength + len(userName) + suffixLength + 15 > maxFileNameLength:
  240. l = prefixLength + len(userName) + suffixLength + 15
  241. sliceLength = maxFileNameLength - l
  242. userName = userName[:sliceLength]
  243. finalName = None
  244. # try to add numbers to create a unique name
  245. counter = 1
  246. while finalName is None:
  247. name = userName + str(counter).zfill(15)
  248. fullName = prefix + name + suffix
  249. if fullName.lower() not in existing:
  250. finalName = fullName
  251. break
  252. else:
  253. counter += 1
  254. if counter >= 999999999999999:
  255. break
  256. # if there is a clash, go to the next fallback
  257. if finalName is None:
  258. finalName = handleClash2(existing, prefix, suffix)
  259. # finished
  260. return finalName
  261. def handleClash2(
  262. existing: Iterable[str] = [], prefix: str = "", suffix: str = ""
  263. ) -> str:
  264. """A helper function that resolves collisions with existing names when choosing a filename.
  265. This function is a fallback to :func:`handleClash1`. It attempts to append an unused integer counter to the filename.
  266. Args:
  267. userName (str): The input file name.
  268. existing: A case-insensitive list of all existing file names.
  269. prefix: Prefix to be prepended to the file name.
  270. suffix: Suffix to be appended to the file name.
  271. Returns:
  272. A suitable filename.
  273. Raises:
  274. NameTranslationError: If no suitable name could be generated.
  275. Examples::
  276. >>> prefix = ("0" * 5) + "."
  277. >>> suffix = "." + ("0" * 10)
  278. >>> existing = [prefix + str(i) + suffix for i in range(100)]
  279. >>> e = list(existing)
  280. >>> handleClash2(existing=e, prefix=prefix, suffix=suffix) == (
  281. ... '00000.100.0000000000')
  282. True
  283. >>> e = list(existing)
  284. >>> e.remove(prefix + "1" + suffix)
  285. >>> handleClash2(existing=e, prefix=prefix, suffix=suffix) == (
  286. ... '00000.1.0000000000')
  287. True
  288. >>> e = list(existing)
  289. >>> e.remove(prefix + "2" + suffix)
  290. >>> handleClash2(existing=e, prefix=prefix, suffix=suffix) == (
  291. ... '00000.2.0000000000')
  292. True
  293. """
  294. # calculate the longest possible string
  295. maxLength = maxFileNameLength - len(prefix) - len(suffix)
  296. maxValue = int("9" * maxLength)
  297. # try to find a number
  298. finalName = None
  299. counter = 1
  300. while finalName is None:
  301. fullName = prefix + str(counter) + suffix
  302. if fullName.lower() not in existing:
  303. finalName = fullName
  304. break
  305. else:
  306. counter += 1
  307. if counter >= maxValue:
  308. break
  309. # raise an error if nothing has been found
  310. if finalName is None:
  311. raise NameTranslationError("No unique name could be found.")
  312. # finished
  313. return finalName
  314. if __name__ == "__main__":
  315. import doctest
  316. doctest.testmod()