translation_utils.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755
  1. # Copyright (c) Jupyter Development Team.
  2. # Distributed under the terms of the Modified BSD License.
  3. """
  4. Localization utilities to find available language packs and packages with
  5. localization data.
  6. """
  7. from __future__ import annotations
  8. import gettext
  9. import importlib
  10. import json
  11. import locale
  12. import os
  13. import re
  14. import sys
  15. import traceback
  16. from functools import lru_cache
  17. from re import Pattern
  18. from typing import Any
  19. import babel
  20. from packaging.version import parse as parse_version
  21. # See compatibility note on `group` keyword in https://docs.python.org/3/library/importlib.metadata.html#entry-points
  22. if sys.version_info < (3, 10): # pragma: no cover
  23. from importlib_metadata import entry_points
  24. else: # pragma: no cover
  25. from importlib.metadata import entry_points
  26. # Entry points
  27. JUPYTERLAB_LANGUAGEPACK_ENTRY = "jupyterlab.languagepack"
  28. JUPYTERLAB_LOCALE_ENTRY = "jupyterlab.locale"
  29. # Constants
  30. DEFAULT_LOCALE = "en"
  31. SYS_LOCALE = locale.getlocale()[0] or DEFAULT_LOCALE
  32. LOCALE_DIR = "locale"
  33. LC_MESSAGES_DIR = "LC_MESSAGES"
  34. DEFAULT_DOMAIN = "jupyterlab"
  35. L10N_SCHEMA_NAME = "@jupyterlab/translation-extension:plugin"
  36. PY37_OR_LOWER = sys.version_info[:2] <= (3, 7)
  37. # Pseudo language locale for in-context translation
  38. PSEUDO_LANGUAGE = "ach_UG"
  39. _default_schema_context = "schema"
  40. _default_settings_context = "settings"
  41. _lab_i18n_config = "jupyter.lab.internationalization"
  42. # mapping of schema translatable string selectors to translation context
  43. DEFAULT_SCHEMA_SELECTORS = {
  44. "properties/.*/title": _default_settings_context,
  45. "properties/.*/description": _default_settings_context,
  46. "definitions/.*/properties/.*/title": _default_settings_context,
  47. "definitions/.*/properties/.*/description": _default_settings_context,
  48. "title": _default_schema_context,
  49. "description": _default_schema_context,
  50. # JupyterLab-specific
  51. r"jupyter\.lab\.setting-icon-label": _default_settings_context,
  52. r"jupyter\.lab\.menus/.*/label": "menu",
  53. r"jupyter\.lab\.toolbars/.*/label": "toolbar",
  54. }
  55. @lru_cache
  56. def _get_default_schema_selectors() -> dict[Pattern, str]:
  57. return {
  58. re.compile("^/" + pattern + "$"): context
  59. for pattern, context in DEFAULT_SCHEMA_SELECTORS.items()
  60. }
  61. def _prepare_schema_patterns(schema: dict) -> dict[Pattern, str]:
  62. return {
  63. **_get_default_schema_selectors(),
  64. **{
  65. re.compile("^/" + selector + "$"): _default_schema_context
  66. for selector in schema.get(_lab_i18n_config, {}).get("selectors", [])
  67. },
  68. }
  69. # --- Private process helpers
  70. # ----------------------------------------------------------------------------
  71. def _get_installed_language_pack_locales() -> tuple[dict[str, Any], str]:
  72. """
  73. Get available installed language pack locales.
  74. Returns
  75. -------
  76. tuple
  77. A tuple, where the first item is the result and the second item any
  78. error messages.
  79. """
  80. data = {}
  81. messages = []
  82. for entry_point in entry_points(group=JUPYTERLAB_LANGUAGEPACK_ENTRY):
  83. try:
  84. data[entry_point.name] = os.path.dirname(entry_point.load().__file__)
  85. except Exception: # pragma: no cover
  86. messages.append(traceback.format_exc())
  87. message = "\n".join(messages)
  88. return data, message
  89. def _get_installed_package_locales() -> tuple[dict[str, Any], str]:
  90. """
  91. Get available installed packages containing locale information.
  92. Returns
  93. -------
  94. tuple
  95. A tuple, where the first item is the result and the second item any
  96. error messages. The value for the key points to the root location
  97. the package.
  98. """
  99. data = {}
  100. messages = []
  101. for entry_point in entry_points(group=JUPYTERLAB_LOCALE_ENTRY):
  102. try:
  103. data[entry_point.name] = os.path.dirname(entry_point.load().__file__)
  104. except Exception:
  105. messages.append(traceback.format_exc())
  106. message = "\n".join(messages)
  107. return data, message
  108. # --- Helpers
  109. # ----------------------------------------------------------------------------
  110. def is_valid_locale(locale_: str) -> bool:
  111. """
  112. Check if a `locale_` value is valid.
  113. Parameters
  114. ----------
  115. locale_: str
  116. Language locale code.
  117. Notes
  118. -----
  119. A valid locale is in the form language (See ISO-639 standard) and an
  120. optional territory (See ISO-3166 standard).
  121. Examples of valid locales:
  122. - English: DEFAULT_LOCALE
  123. - Australian English: "en_AU"
  124. - Portuguese: "pt"
  125. - Brazilian Portuguese: "pt_BR"
  126. Examples of invalid locales:
  127. - Australian Spanish: "es_AU"
  128. - Brazilian German: "de_BR"
  129. """
  130. # Add exception for Norwegian
  131. if locale_ in {
  132. "no_NO",
  133. }:
  134. return True
  135. valid = False
  136. try:
  137. babel.Locale.parse(locale_)
  138. valid = True
  139. except (babel.core.UnknownLocaleError, ValueError):
  140. # Expected error if the locale is unknown
  141. pass
  142. return valid
  143. def get_display_name(locale_: str, display_locale: str = DEFAULT_LOCALE) -> str:
  144. """
  145. Return the language name to use with a `display_locale` for a given language locale.
  146. Parameters
  147. ----------
  148. locale_: str
  149. The language name to use.
  150. display_locale: str, optional
  151. The language to display the `locale_`.
  152. Returns
  153. -------
  154. str
  155. Localized `locale_` and capitalized language name using `display_locale` as language.
  156. """
  157. locale_ = locale_ if is_valid_locale(locale_) else DEFAULT_LOCALE
  158. display_locale = display_locale if is_valid_locale(display_locale) else DEFAULT_LOCALE
  159. try:
  160. loc = babel.Locale.parse(locale_)
  161. display_name = loc.get_display_name(display_locale)
  162. except babel.UnknownLocaleError:
  163. display_name = display_locale
  164. if display_name:
  165. display_name = display_name[0].upper() + display_name[1:]
  166. return display_name # type:ignore[return-value]
  167. def merge_locale_data(
  168. language_pack_locale_data: dict[str, Any], package_locale_data: dict[str, Any]
  169. ) -> dict[str, Any]:
  170. """
  171. Merge language pack data with locale data bundled in packages.
  172. Parameters
  173. ----------
  174. language_pack_locale_data: dict
  175. The dictionary with language pack locale data.
  176. package_locale_data: dict
  177. The dictionary with package locale data.
  178. Returns
  179. -------
  180. dict
  181. Merged locale data.
  182. """
  183. result = language_pack_locale_data
  184. package_lp_metadata = language_pack_locale_data.get("", {})
  185. package_lp_version = package_lp_metadata.get("version", None)
  186. package_lp_domain = package_lp_metadata.get("domain", None)
  187. package_metadata = package_locale_data.get("", {})
  188. package_version = package_metadata.get("version", None)
  189. package_domain = package_metadata.get("domain", "None")
  190. if package_lp_version and package_version and package_domain == package_lp_domain:
  191. package_version = parse_version(package_version)
  192. package_lp_version = parse_version(package_lp_version)
  193. if package_version > package_lp_version:
  194. # If package version is more recent, then update keys of the language pack
  195. result = language_pack_locale_data.copy()
  196. result.update(package_locale_data)
  197. return result
  198. def get_installed_packages_locale(locale_: str) -> tuple[dict, str]:
  199. """
  200. Get all jupyterlab extensions installed that contain locale data.
  201. Returns
  202. -------
  203. tuple
  204. A tuple in the form `(locale_data_dict, message)`,
  205. where the `locale_data_dict` is an ordered list
  206. of available language packs:
  207. >>> {"package-name": locale_data, ...}
  208. Examples
  209. --------
  210. - `entry_points={"jupyterlab.locale": "package-name = package_module"}`
  211. - `entry_points={"jupyterlab.locale": "jupyterlab-git = jupyterlab_git"}`
  212. """
  213. found_package_locales, message = _get_installed_package_locales()
  214. packages_locale_data = {}
  215. messages = message.split("\n")
  216. if not message:
  217. for package_name, package_root_path in found_package_locales.items():
  218. locales = {}
  219. try:
  220. locale_path = os.path.join(package_root_path, LOCALE_DIR)
  221. # Handle letter casing
  222. locales = {
  223. loc.lower(): loc
  224. for loc in os.listdir(locale_path)
  225. if os.path.isdir(os.path.join(locale_path, loc))
  226. }
  227. except Exception:
  228. messages.append(traceback.format_exc())
  229. if locale_.lower() in locales:
  230. locale_json_path = os.path.join(
  231. locale_path,
  232. locales[locale_.lower()],
  233. LC_MESSAGES_DIR,
  234. f"{package_name}.json",
  235. )
  236. if os.path.isfile(locale_json_path):
  237. try:
  238. with open(locale_json_path, encoding="utf-8") as fh:
  239. packages_locale_data[package_name] = json.load(fh)
  240. except Exception:
  241. messages.append(traceback.format_exc())
  242. return packages_locale_data, "\n".join(messages)
  243. # --- API
  244. # ----------------------------------------------------------------------------
  245. def get_language_packs(display_locale: str = DEFAULT_LOCALE) -> tuple[dict, str]:
  246. """
  247. Return the available language packs installed in the system.
  248. The returned information contains the languages displayed in the current
  249. locale.
  250. Parameters
  251. ----------
  252. display_locale: str, optional
  253. Default is DEFAULT_LOCALE.
  254. Returns
  255. -------
  256. tuple
  257. A tuple in the form `(locale_data_dict, message)`.
  258. """
  259. found_locales, message = _get_installed_language_pack_locales()
  260. locales = {}
  261. messages = message.split("\n")
  262. if not message:
  263. invalid_locales = []
  264. valid_locales = []
  265. messages = []
  266. for locale_ in found_locales:
  267. if is_valid_locale(locale_):
  268. valid_locales.append(locale_)
  269. else:
  270. invalid_locales.append(locale_)
  271. display_locale_ = display_locale if display_locale in valid_locales else DEFAULT_LOCALE
  272. locales = {
  273. DEFAULT_LOCALE: {
  274. "displayName": (
  275. get_display_name(DEFAULT_LOCALE, display_locale_)
  276. if display_locale != PSEUDO_LANGUAGE
  277. else "Default"
  278. ),
  279. "nativeName": get_display_name(DEFAULT_LOCALE, DEFAULT_LOCALE),
  280. }
  281. }
  282. for locale_ in valid_locales:
  283. locales[locale_] = {
  284. "displayName": get_display_name(locale_, display_locale_),
  285. "nativeName": get_display_name(locale_, locale_),
  286. }
  287. if invalid_locales:
  288. if PSEUDO_LANGUAGE in invalid_locales:
  289. invalid_locales.remove(PSEUDO_LANGUAGE)
  290. locales[PSEUDO_LANGUAGE] = {
  291. "displayName": "Pseudo-language",
  292. # Trick to ensure the proper language is selected in the language menu
  293. "nativeName": (
  294. "to translate the UI"
  295. if display_locale != PSEUDO_LANGUAGE
  296. else "Pseudo-language"
  297. ),
  298. }
  299. # Check again as the pseudo-language was maybe the only invalid locale
  300. if invalid_locales:
  301. messages.append(f"The following locales are invalid: {invalid_locales}!")
  302. return locales, "\n".join(messages)
  303. def get_language_pack(locale_: str) -> tuple:
  304. """
  305. Get a language pack for a given `locale_` and update with any installed
  306. package locales.
  307. Returns
  308. -------
  309. tuple
  310. A tuple in the form `(locale_data_dict, message)`.
  311. Notes
  312. -----
  313. We call `_get_installed_language_pack_locales` via a subprocess to
  314. guarantee the results represent the most up-to-date entry point
  315. information, which seems to be defined on interpreter startup.
  316. """
  317. found_locales, message = _get_installed_language_pack_locales()
  318. found_packages_locales, message = get_installed_packages_locale(locale_)
  319. locale_data = {}
  320. messages = message.split("\n")
  321. if (
  322. not message
  323. and (locale_ == PSEUDO_LANGUAGE or is_valid_locale(locale_))
  324. and locale_ in found_locales
  325. ):
  326. path = found_locales[locale_]
  327. for root, __, files in os.walk(path, topdown=False):
  328. for name in files:
  329. if name.endswith(".json"):
  330. pkg_name = name.replace(".json", "")
  331. json_path = os.path.join(root, name)
  332. try:
  333. with open(json_path, encoding="utf-8") as fh:
  334. merged_data = json.load(fh)
  335. except Exception:
  336. messages.append(traceback.format_exc())
  337. # Load packages with locale data and merge them
  338. if pkg_name in found_packages_locales:
  339. pkg_data = found_packages_locales[pkg_name]
  340. merged_data = merge_locale_data(merged_data, pkg_data)
  341. locale_data[pkg_name] = merged_data
  342. # Check if package locales exist that do not exists in language pack
  343. for pkg_name, data in found_packages_locales.items():
  344. if pkg_name not in locale_data:
  345. locale_data[pkg_name] = data
  346. return locale_data, "\n".join(messages)
  347. # --- Translators
  348. # ----------------------------------------------------------------------------
  349. class TranslationBundle:
  350. """
  351. Translation bundle providing gettext translation functionality.
  352. """
  353. def __init__(self, domain: str, locale_: str):
  354. """Initialize the bundle."""
  355. self._domain = domain
  356. self._locale = locale_
  357. self._translator = gettext.NullTranslations()
  358. self.update_locale(locale_)
  359. def update_locale(self, locale_: str) -> None:
  360. """
  361. Update the locale.
  362. Parameters
  363. ----------
  364. locale_: str
  365. The language name to use.
  366. """
  367. # TODO: Need to handle packages that provide their own .mo files
  368. self._locale = locale_
  369. localedir = None
  370. if locale_ != DEFAULT_LOCALE:
  371. language_pack_module = f"jupyterlab_language_pack_{locale_}"
  372. try:
  373. mod = importlib.import_module(language_pack_module)
  374. assert mod.__file__ is not None
  375. localedir = os.path.join(os.path.dirname(mod.__file__), LOCALE_DIR)
  376. except Exception: # noqa: S110
  377. # no-op
  378. pass
  379. self._translator = gettext.translation(
  380. self._domain, localedir=localedir, languages=(self._locale,), fallback=True
  381. )
  382. def gettext(self, msgid: str) -> str:
  383. """
  384. Translate a singular string.
  385. Parameters
  386. ----------
  387. msgid: str
  388. The singular string to translate.
  389. Returns
  390. -------
  391. str
  392. The translated string.
  393. """
  394. return self._translator.gettext(msgid)
  395. def ngettext(self, msgid: str, msgid_plural: str, n: int) -> str:
  396. """
  397. Translate a singular string with pluralization.
  398. Parameters
  399. ----------
  400. msgid: str
  401. The singular string to translate.
  402. msgid_plural: str
  403. The plural string to translate.
  404. n: int
  405. The number for pluralization.
  406. Returns
  407. -------
  408. str
  409. The translated string.
  410. """
  411. return self._translator.ngettext(msgid, msgid_plural, n)
  412. def pgettext(self, msgctxt: str, msgid: str) -> str:
  413. """
  414. Translate a singular string with context.
  415. Parameters
  416. ----------
  417. msgctxt: str
  418. The message context.
  419. msgid: str
  420. The singular string to translate.
  421. Returns
  422. -------
  423. str
  424. The translated string.
  425. """
  426. # Python 3.7 or lower does not offer translations based on context.
  427. # On these versions `pgettext` falls back to `gettext`
  428. if PY37_OR_LOWER:
  429. translation = self._translator.gettext(msgid)
  430. else:
  431. translation = self._translator.pgettext(msgctxt, msgid)
  432. return translation
  433. def npgettext(self, msgctxt: str, msgid: str, msgid_plural: str, n: int) -> str:
  434. """
  435. Translate a singular string with context and pluralization.
  436. Parameters
  437. ----------
  438. msgctxt: str
  439. The message context.
  440. msgid: str
  441. The singular string to translate.
  442. msgid_plural: str
  443. The plural string to translate.
  444. n: int
  445. The number for pluralization.
  446. Returns
  447. -------
  448. str
  449. The translated string.
  450. """
  451. # Python 3.7 or lower does not offer translations based on context.
  452. # On these versions `npgettext` falls back to `ngettext`
  453. if PY37_OR_LOWER:
  454. translation = self._translator.ngettext(msgid, msgid_plural, n)
  455. else:
  456. translation = self._translator.npgettext(msgctxt, msgid, msgid_plural, n)
  457. return translation
  458. # Shorthands
  459. def __(self, msgid: str) -> str:
  460. """
  461. Shorthand for gettext.
  462. Parameters
  463. ----------
  464. msgid: str
  465. The singular string to translate.
  466. Returns
  467. -------
  468. str
  469. The translated string.
  470. """
  471. return self.gettext(msgid)
  472. def _n(self, msgid: str, msgid_plural: str, n: int) -> str:
  473. """
  474. Shorthand for ngettext.
  475. Parameters
  476. ----------
  477. msgid: str
  478. The singular string to translate.
  479. msgid_plural: str
  480. The plural string to translate.
  481. n: int
  482. The number for pluralization.
  483. Returns
  484. -------
  485. str
  486. The translated string.
  487. """
  488. return self.ngettext(msgid, msgid_plural, n)
  489. def _p(self, msgctxt: str, msgid: str) -> str:
  490. """
  491. Shorthand for pgettext.
  492. Parameters
  493. ----------
  494. msgctxt: str
  495. The message context.
  496. msgid: str
  497. The singular string to translate.
  498. Returns
  499. -------
  500. str
  501. The translated string.
  502. """
  503. return self.pgettext(msgctxt, msgid)
  504. def _np(self, msgctxt: str, msgid: str, msgid_plural: str, n: int) -> str:
  505. """
  506. Shorthand for npgettext.
  507. Parameters
  508. ----------
  509. msgctxt: str
  510. The message context.
  511. msgid: str
  512. The singular string to translate.
  513. msgid_plural: str
  514. The plural string to translate.
  515. n: int
  516. The number for pluralization.
  517. Returns
  518. -------
  519. str
  520. The translated string.
  521. """
  522. return self.npgettext(msgctxt, msgid, msgid_plural, n)
  523. class translator:
  524. """
  525. Translations manager.
  526. """
  527. _TRANSLATORS: dict[str, TranslationBundle] = {}
  528. _LOCALE = SYS_LOCALE
  529. @staticmethod
  530. def normalize_domain(domain: str) -> str:
  531. """Normalize a domain name.
  532. Parameters
  533. ----------
  534. domain: str
  535. Domain to normalize
  536. Returns
  537. -------
  538. str
  539. Normalized domain
  540. """
  541. return domain.replace("-", "_")
  542. @classmethod
  543. def set_locale(cls, locale_: str) -> None:
  544. """
  545. Set locale for the translation bundles based on the settings.
  546. Parameters
  547. ----------
  548. locale_: str
  549. The language name to use.
  550. """
  551. if locale_ == cls._LOCALE:
  552. # Nothing to do bail early
  553. return
  554. if is_valid_locale(locale_):
  555. cls._LOCALE = locale_
  556. for _, bundle in cls._TRANSLATORS.items():
  557. bundle.update_locale(locale_)
  558. @classmethod
  559. def load(cls, domain: str) -> TranslationBundle:
  560. """
  561. Load translation domain.
  562. The domain is usually the normalized ``package_name``.
  563. Parameters
  564. ----------
  565. domain: str
  566. The translations domain. The normalized python package name.
  567. Returns
  568. -------
  569. Translator
  570. A translator instance bound to the domain.
  571. """
  572. norm_domain = translator.normalize_domain(domain)
  573. if norm_domain in cls._TRANSLATORS:
  574. trans = cls._TRANSLATORS[norm_domain]
  575. else:
  576. trans = TranslationBundle(norm_domain, cls._LOCALE)
  577. cls._TRANSLATORS[norm_domain] = trans
  578. return trans
  579. @staticmethod
  580. def _translate_schema_strings(
  581. translations: Any,
  582. schema: dict,
  583. prefix: str = "",
  584. to_translate: dict[Pattern, str] | None = None,
  585. ) -> None:
  586. """Translate a schema in-place."""
  587. if to_translate is None:
  588. to_translate = _prepare_schema_patterns(schema)
  589. for key, value in schema.items():
  590. path = prefix + "/" + key
  591. if isinstance(value, str):
  592. matched = False
  593. for pattern, context in to_translate.items(): # noqa: B007
  594. if pattern.fullmatch(path):
  595. matched = True
  596. break
  597. if matched:
  598. schema[key] = translations.pgettext(context, value)
  599. elif isinstance(value, dict):
  600. translator._translate_schema_strings(
  601. translations,
  602. value,
  603. prefix=path,
  604. to_translate=to_translate,
  605. )
  606. elif isinstance(value, list):
  607. for i, element in enumerate(value):
  608. if not isinstance(element, dict):
  609. continue
  610. translator._translate_schema_strings(
  611. translations,
  612. element,
  613. prefix=path + "[" + str(i) + "]",
  614. to_translate=to_translate,
  615. )
  616. @staticmethod
  617. def translate_schema(schema: dict) -> dict:
  618. """Translate a schema.
  619. Parameters
  620. ----------
  621. schema: dict
  622. The schema to be translated
  623. Returns
  624. -------
  625. Dict
  626. The translated schema
  627. """
  628. if translator._LOCALE == DEFAULT_LOCALE:
  629. return schema
  630. translations = translator.load(
  631. schema.get(_lab_i18n_config, {}).get("domain", DEFAULT_DOMAIN)
  632. )
  633. new_schema = schema.copy()
  634. translator._translate_schema_strings(translations, new_schema)
  635. return new_schema