localedata.py 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. """
  2. babel.localedata
  3. ~~~~~~~~~~~~~~~~
  4. Low-level locale data access.
  5. :note: The `Locale` class, which uses this module under the hood, provides a
  6. more convenient interface for accessing the locale data.
  7. :copyright: (c) 2013-2026 by the Babel Team.
  8. :license: BSD, see LICENSE for more details.
  9. """
  10. from __future__ import annotations
  11. import os
  12. import pickle
  13. import re
  14. import sys
  15. import threading
  16. from collections import abc
  17. from collections.abc import Iterator, Mapping, MutableMapping
  18. from functools import lru_cache
  19. from itertools import chain
  20. from typing import Any
  21. _cache: dict[str, Any] = {}
  22. _cache_lock = threading.RLock()
  23. _dirname = os.path.join(os.path.dirname(__file__), 'locale-data')
  24. _windows_reserved_name_re = re.compile("^(con|prn|aux|nul|com[0-9]|lpt[0-9])$", re.I)
  25. def normalize_locale(name: str) -> str | None:
  26. """Normalize a locale ID by stripping spaces and apply proper casing.
  27. Returns the normalized locale ID string or `None` if the ID is not
  28. recognized.
  29. """
  30. if not name or not isinstance(name, str):
  31. return None
  32. name = name.strip().lower()
  33. for locale_id in chain.from_iterable([_cache, locale_identifiers()]):
  34. if name == locale_id.lower():
  35. return locale_id
  36. def resolve_locale_filename(name: os.PathLike[str] | str) -> str:
  37. """
  38. Resolve a locale identifier to a `.dat` path on disk.
  39. """
  40. # Clean up any possible relative paths.
  41. name = os.path.basename(name)
  42. # Ensure we're not left with one of the Windows reserved names.
  43. if sys.platform == "win32" and _windows_reserved_name_re.match(os.path.splitext(name)[0]):
  44. raise ValueError(f"Name {name} is invalid on Windows")
  45. # Build the path.
  46. return os.path.join(_dirname, f"{name}.dat")
  47. def exists(name: str) -> bool:
  48. """Check whether locale data is available for the given locale.
  49. Returns `True` if it exists, `False` otherwise.
  50. :param name: the locale identifier string
  51. """
  52. if not name or not isinstance(name, str):
  53. return False
  54. if name in _cache:
  55. return True
  56. file_found = os.path.exists(resolve_locale_filename(name))
  57. return True if file_found else bool(normalize_locale(name))
  58. @lru_cache(maxsize=None)
  59. def locale_identifiers() -> list[str]:
  60. """Return a list of all locale identifiers for which locale data is
  61. available.
  62. This data is cached after the first invocation.
  63. You can clear the cache by calling `locale_identifiers.cache_clear()`.
  64. .. versionadded:: 0.8.1
  65. :return: a list of locale identifiers (strings)
  66. """
  67. return [
  68. stem
  69. for stem, extension in (
  70. os.path.splitext(filename) for filename in os.listdir(_dirname)
  71. )
  72. if extension == '.dat' and stem != 'root'
  73. ]
  74. def _is_non_likely_script(name: str) -> bool:
  75. """Return whether the locale is of the form ``lang_Script``,
  76. and the script is not the likely script for the language.
  77. This implements the behavior of the ``nonlikelyScript`` value of the
  78. ``localRules`` attribute for parent locales added in CLDR 45.
  79. """
  80. from babel.core import get_global, parse_locale
  81. try:
  82. lang, territory, script, variant, *rest = parse_locale(name)
  83. except ValueError:
  84. return False
  85. if lang and script and not territory and not variant and not rest:
  86. likely_subtag = get_global('likely_subtags').get(lang)
  87. _, _, likely_script, *_ = parse_locale(likely_subtag)
  88. return script != likely_script
  89. return False
  90. def load(name: os.PathLike[str] | str, merge_inherited: bool = True) -> dict[str, Any]:
  91. """Load the locale data for the given locale.
  92. The locale data is a dictionary that contains much of the data defined by
  93. the Common Locale Data Repository (CLDR). This data is stored as a
  94. collection of pickle files inside the ``babel`` package.
  95. >>> d = load('en_US')
  96. >>> d['languages']['sv']
  97. 'Swedish'
  98. Note that the results are cached, and subsequent requests for the same
  99. locale return the same dictionary:
  100. >>> d1 = load('en_US')
  101. >>> d2 = load('en_US')
  102. >>> d1 is d2
  103. True
  104. :param name: the locale identifier string (or "root")
  105. :param merge_inherited: whether the inherited data should be merged into
  106. the data of the requested locale
  107. :raise `IOError`: if no locale data file is found for the given locale
  108. identifier, or one of the locales it inherits from
  109. """
  110. name = os.path.basename(name)
  111. _cache_lock.acquire()
  112. try:
  113. data = _cache.get(name)
  114. if not data:
  115. # Load inherited data
  116. if name == 'root' or not merge_inherited:
  117. data = {}
  118. else:
  119. from babel.core import get_global
  120. parent = get_global('parent_exceptions').get(name)
  121. if not parent:
  122. if _is_non_likely_script(name):
  123. parent = 'root'
  124. else:
  125. parts = name.split('_')
  126. parent = "root" if len(parts) == 1 else "_".join(parts[:-1])
  127. data = load(parent).copy()
  128. filename = resolve_locale_filename(name)
  129. with open(filename, 'rb') as fileobj:
  130. if name != 'root' and merge_inherited:
  131. merge(data, pickle.load(fileobj))
  132. else:
  133. data = pickle.load(fileobj)
  134. _cache[name] = data
  135. return data
  136. finally:
  137. _cache_lock.release()
  138. def merge(dict1: MutableMapping[Any, Any], dict2: Mapping[Any, Any]) -> None:
  139. """Merge the data from `dict2` into the `dict1` dictionary, making copies
  140. of nested dictionaries.
  141. >>> d = {1: 'foo', 3: 'baz'}
  142. >>> merge(d, {1: 'Foo', 2: 'Bar'})
  143. >>> sorted(d.items())
  144. [(1, 'Foo'), (2, 'Bar'), (3, 'baz')]
  145. :param dict1: the dictionary to merge into
  146. :param dict2: the dictionary containing the data that should be merged
  147. """
  148. for key, val2 in dict2.items():
  149. if val2 is not None:
  150. val1 = dict1.get(key)
  151. if isinstance(val2, dict):
  152. if val1 is None:
  153. val1 = {}
  154. if isinstance(val1, Alias):
  155. val1 = (val1, val2)
  156. elif isinstance(val1, tuple):
  157. alias, others = val1
  158. others = others.copy()
  159. merge(others, val2)
  160. val1 = (alias, others)
  161. else:
  162. val1 = val1.copy()
  163. merge(val1, val2)
  164. else:
  165. val1 = val2
  166. dict1[key] = val1
  167. class Alias:
  168. """Representation of an alias in the locale data.
  169. An alias is a value that refers to some other part of the locale data,
  170. as specified by the `keys`.
  171. """
  172. def __init__(self, keys: tuple[str, ...]) -> None:
  173. self.keys = tuple(keys)
  174. def __repr__(self) -> str:
  175. return f"<{type(self).__name__} {self.keys!r}>"
  176. def resolve(self, data: Mapping[str | int | None, Any]) -> Mapping[str | int | None, Any]:
  177. """Resolve the alias based on the given data.
  178. This is done recursively, so if one alias resolves to a second alias,
  179. that second alias will also be resolved.
  180. :param data: the locale data
  181. :type data: `dict`
  182. """
  183. base = data
  184. for key in self.keys:
  185. data = data[key]
  186. if isinstance(data, Alias):
  187. data = data.resolve(base)
  188. elif isinstance(data, tuple):
  189. alias, others = data
  190. data = alias.resolve(base)
  191. return data
  192. class LocaleDataDict(abc.MutableMapping):
  193. """Dictionary wrapper that automatically resolves aliases to the actual
  194. values.
  195. """
  196. def __init__(
  197. self,
  198. data: MutableMapping[str | int | None, Any],
  199. base: Mapping[str | int | None, Any] | None = None,
  200. ):
  201. self._data = data
  202. if base is None:
  203. base = data
  204. self.base = base
  205. def __len__(self) -> int:
  206. return len(self._data)
  207. def __iter__(self) -> Iterator[str | int | None]:
  208. return iter(self._data)
  209. def __getitem__(self, key: str | int | None) -> Any:
  210. orig = val = self._data[key]
  211. if isinstance(val, Alias): # resolve an alias
  212. val = val.resolve(self.base)
  213. if isinstance(val, tuple): # Merge a partial dict with an alias
  214. alias, others = val
  215. val = alias.resolve(self.base).copy()
  216. merge(val, others)
  217. if isinstance(val, dict): # Return a nested alias-resolving dict
  218. val = LocaleDataDict(val, base=self.base)
  219. if val is not orig:
  220. self._data[key] = val
  221. return val
  222. def __setitem__(self, key: str | int | None, value: Any) -> None:
  223. self._data[key] = value
  224. def __delitem__(self, key: str | int | None) -> None:
  225. del self._data[key]
  226. def copy(self) -> LocaleDataDict:
  227. return LocaleDataDict(self._data.copy(), base=self.base)