checkers.py 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176
  1. """
  2. babel.messages.checkers
  3. ~~~~~~~~~~~~~~~~~~~~~~~
  4. Various routines that help with validation of translations.
  5. :since: version 0.9
  6. :copyright: (c) 2013-2026 by the Babel Team.
  7. :license: BSD, see LICENSE for more details.
  8. """
  9. from __future__ import annotations
  10. from collections.abc import Callable
  11. from babel.messages.catalog import PYTHON_FORMAT, Catalog, Message, TranslationError
  12. #: list of format chars that are compatible to each other
  13. _string_format_compatibilities = [
  14. {'i', 'd', 'u'},
  15. {'x', 'X'},
  16. {'f', 'F', 'g', 'G'},
  17. ]
  18. def num_plurals(catalog: Catalog | None, message: Message) -> None:
  19. """Verify the number of plurals in the translation."""
  20. if not message.pluralizable:
  21. if not isinstance(message.string, str):
  22. raise TranslationError("Found plural forms for non-pluralizable message")
  23. return
  24. # skip further tests if no catalog is provided.
  25. elif catalog is None:
  26. return
  27. msgstrs = message.string
  28. if not isinstance(msgstrs, (list, tuple)):
  29. msgstrs = (msgstrs,)
  30. if len(msgstrs) != catalog.num_plurals:
  31. raise TranslationError(
  32. f"Wrong number of plural forms (expected {catalog.num_plurals})",
  33. )
  34. def python_format(catalog: Catalog | None, message: Message) -> None:
  35. """Verify the format string placeholders in the translation."""
  36. if 'python-format' not in message.flags:
  37. return
  38. msgids = message.id
  39. if not isinstance(msgids, (list, tuple)):
  40. msgids = (msgids,)
  41. msgstrs = message.string
  42. if not isinstance(msgstrs, (list, tuple)):
  43. msgstrs = (msgstrs,)
  44. if msgstrs[0]:
  45. _validate_format(msgids[0], msgstrs[0])
  46. if message.pluralizable:
  47. for msgstr in msgstrs[1:]:
  48. if msgstr:
  49. _validate_format(msgids[1], msgstr)
  50. def _validate_format(format: str, alternative: str) -> None:
  51. """Test format string `alternative` against `format`. `format` can be the
  52. msgid of a message and `alternative` one of the `msgstr`\\s. The two
  53. arguments are not interchangeable as `alternative` may contain less
  54. placeholders if `format` uses named placeholders.
  55. If the string formatting of `alternative` is compatible to `format` the
  56. function returns `None`, otherwise a `TranslationError` is raised.
  57. Examples for compatible format strings:
  58. >>> _validate_format('Hello %s!', 'Hallo %s!')
  59. >>> _validate_format('Hello %i!', 'Hallo %d!')
  60. Example for an incompatible format strings:
  61. >>> _validate_format('Hello %(name)s!', 'Hallo %s!')
  62. Traceback (most recent call last):
  63. ...
  64. TranslationError: the format strings are of different kinds
  65. This function is used by the `python_format` checker.
  66. :param format: The original format string
  67. :param alternative: The alternative format string that should be checked
  68. against format
  69. :raises TranslationError: on formatting errors
  70. """
  71. def _parse(string: str) -> list[tuple[str, str]]:
  72. result: list[tuple[str, str]] = []
  73. for match in PYTHON_FORMAT.finditer(string):
  74. name, format, typechar = match.groups()
  75. if typechar == '%' and name is None:
  76. continue
  77. result.append((name, str(typechar)))
  78. return result
  79. def _compatible(a: str, b: str) -> bool:
  80. if a == b:
  81. return True
  82. for set in _string_format_compatibilities:
  83. if a in set and b in set:
  84. return True
  85. return False
  86. def _check_positional(results: list[tuple[str, str]]) -> bool:
  87. positional = None
  88. for name, _char in results:
  89. if positional is None:
  90. positional = name is None
  91. else:
  92. if (name is None) != positional:
  93. raise TranslationError(
  94. 'format string mixes positional and named placeholders',
  95. )
  96. return bool(positional)
  97. a = _parse(format)
  98. b = _parse(alternative)
  99. if not a:
  100. return
  101. # now check if both strings are positional or named
  102. a_positional = _check_positional(a)
  103. b_positional = _check_positional(b)
  104. if a_positional and not b_positional and not b:
  105. raise TranslationError('placeholders are incompatible')
  106. elif a_positional != b_positional:
  107. raise TranslationError('the format strings are of different kinds')
  108. # if we are operating on positional strings both must have the
  109. # same number of format chars and those must be compatible
  110. if a_positional:
  111. if len(a) != len(b):
  112. raise TranslationError('positional format placeholders are unbalanced')
  113. for idx, ((_, first), (_, second)) in enumerate(zip(a, b)):
  114. if not _compatible(first, second):
  115. raise TranslationError(
  116. f'incompatible format for placeholder {idx + 1:d}: '
  117. f'{first!r} and {second!r} are not compatible',
  118. )
  119. # otherwise the second string must not have names the first one
  120. # doesn't have and the types of those included must be compatible
  121. else:
  122. type_map = dict(a)
  123. for name, typechar in b:
  124. if name not in type_map:
  125. raise TranslationError(f'unknown named placeholder {name!r}')
  126. elif not _compatible(typechar, type_map[name]):
  127. raise TranslationError(
  128. f'incompatible format for placeholder {name!r}: '
  129. f'{typechar!r} and {type_map[name]!r} are not compatible',
  130. )
  131. def _find_checkers() -> list[Callable[[Catalog | None, Message], object]]:
  132. from babel.messages._compat import find_entrypoints
  133. checkers: list[Callable[[Catalog | None, Message], object]] = []
  134. checkers.extend(load() for (name, load) in find_entrypoints('babel.checkers'))
  135. if len(checkers) == 0:
  136. # if entrypoints are not available or no usable egg-info was found
  137. # (see #230), just resort to hard-coded checkers
  138. return [num_plurals, python_format]
  139. return checkers
  140. checkers: list[Callable[[Catalog | None, Message], object]] = _find_checkers()