catalog.py 38 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055
  1. """
  2. babel.messages.catalog
  3. ~~~~~~~~~~~~~~~~~~~~~~
  4. Data structures for message catalogs.
  5. :copyright: (c) 2013-2026 by the Babel Team.
  6. :license: BSD, see LICENSE for more details.
  7. """
  8. from __future__ import annotations
  9. import datetime
  10. import re
  11. from collections.abc import Iterable, Iterator
  12. from copy import copy
  13. from difflib import SequenceMatcher
  14. from email import message_from_string
  15. from heapq import nlargest
  16. from string import Formatter
  17. from typing import TYPE_CHECKING
  18. from babel import __version__ as VERSION
  19. from babel.core import Locale, UnknownLocaleError
  20. from babel.dates import format_datetime
  21. from babel.messages.plurals import get_plural
  22. from babel.util import LOCALTZ, _cmp
  23. if TYPE_CHECKING:
  24. from typing_extensions import TypeAlias
  25. _MessageID: TypeAlias = str | tuple[str, ...] | list[str]
  26. __all__ = [
  27. 'DEFAULT_HEADER',
  28. 'PYTHON_FORMAT',
  29. 'Catalog',
  30. 'Message',
  31. 'TranslationError',
  32. ]
  33. def get_close_matches(word, possibilities, n=3, cutoff=0.6):
  34. """A modified version of ``difflib.get_close_matches``.
  35. It just passes ``autojunk=False`` to the ``SequenceMatcher``, to work
  36. around https://github.com/python/cpython/issues/90825.
  37. """
  38. if not n > 0: # pragma: no cover
  39. raise ValueError(f"n must be > 0: {n!r}")
  40. if not 0.0 <= cutoff <= 1.0: # pragma: no cover
  41. raise ValueError(f"cutoff must be in [0.0, 1.0]: {cutoff!r}")
  42. result = []
  43. s = SequenceMatcher(autojunk=False) # only line changed from difflib.py
  44. s.set_seq2(word)
  45. for x in possibilities:
  46. s.set_seq1(x)
  47. if (
  48. s.real_quick_ratio() >= cutoff
  49. and s.quick_ratio() >= cutoff
  50. and s.ratio() >= cutoff
  51. ):
  52. result.append((s.ratio(), x))
  53. # Move the best scorers to head of list
  54. result = nlargest(n, result)
  55. # Strip scores for the best n matches
  56. return [x for score, x in result]
  57. PYTHON_FORMAT = re.compile(
  58. r'''
  59. \%
  60. (?:\(([\w]*)\))?
  61. (
  62. [-#0\ +]?(?:\*|[\d]+)?
  63. (?:\.(?:\*|[\d]+))?
  64. [hlL]?
  65. )
  66. ([diouxXeEfFgGcrs%])
  67. ''',
  68. re.VERBOSE,
  69. )
  70. def _has_python_brace_format(string: str) -> bool:
  71. if "{" not in string:
  72. return False
  73. fmt = Formatter()
  74. try:
  75. # `fmt.parse` returns 3-or-4-tuples of the form
  76. # `(literal_text, field_name, format_spec, conversion)`;
  77. # if `field_name` is set, this smells like brace format
  78. field_name_seen = False
  79. for t in fmt.parse(string):
  80. if t[1] is not None:
  81. field_name_seen = True
  82. # We cannot break here, as we need to consume the whole string
  83. # to ensure that it is a valid format string.
  84. except ValueError:
  85. return False
  86. return field_name_seen
  87. def _parse_datetime_header(value: str) -> datetime.datetime:
  88. match = re.match(r'^(?P<datetime>.*?)(?P<tzoffset>[+-]\d{4})?$', value)
  89. dt = datetime.datetime.strptime(match.group('datetime'), '%Y-%m-%d %H:%M')
  90. # Separate the offset into a sign component, hours, and # minutes
  91. tzoffset = match.group('tzoffset')
  92. if tzoffset is not None:
  93. plus_minus_s, rest = tzoffset[0], tzoffset[1:]
  94. hours_offset_s, mins_offset_s = rest[:2], rest[2:]
  95. # Make them all integers
  96. plus_minus = int(f"{plus_minus_s}1")
  97. hours_offset = int(hours_offset_s)
  98. mins_offset = int(mins_offset_s)
  99. # Calculate net offset
  100. net_mins_offset = hours_offset * 60
  101. net_mins_offset += mins_offset
  102. net_mins_offset *= plus_minus
  103. # Create an offset object
  104. tzoffset = datetime.timezone(
  105. offset=datetime.timedelta(minutes=net_mins_offset),
  106. name=f'Etc/GMT{net_mins_offset:+d}',
  107. )
  108. # Store the offset in a datetime object
  109. dt = dt.replace(tzinfo=tzoffset)
  110. return dt
  111. class Message:
  112. """Representation of a single message in a catalog."""
  113. def __init__(
  114. self,
  115. id: _MessageID,
  116. string: _MessageID | None = '',
  117. locations: Iterable[tuple[str, int]] = (),
  118. flags: Iterable[str] = (),
  119. auto_comments: Iterable[str] = (),
  120. user_comments: Iterable[str] = (),
  121. previous_id: _MessageID = (),
  122. lineno: int | None = None,
  123. context: str | None = None,
  124. ) -> None:
  125. """Create the message object.
  126. :param id: the message ID, or a ``(singular, plural)`` tuple for
  127. pluralizable messages
  128. :param string: the translated message string, or a
  129. ``(singular, plural)`` tuple for pluralizable messages
  130. :param locations: a sequence of ``(filename, lineno)`` tuples
  131. :param flags: a set or sequence of flags
  132. :param auto_comments: a sequence of automatic comments for the message
  133. :param user_comments: a sequence of user comments for the message
  134. :param previous_id: the previous message ID, or a ``(singular, plural)``
  135. tuple for pluralizable messages
  136. :param lineno: the line number on which the msgid line was found in the
  137. PO file, if any
  138. :param context: the message context
  139. """
  140. self.id = id
  141. if not string and self.pluralizable:
  142. string = ('', '')
  143. self.string = string
  144. self.locations = list(dict.fromkeys(locations)) if locations else []
  145. self.flags = set(flags)
  146. if id and self.python_format:
  147. self.flags.add('python-format')
  148. else:
  149. self.flags.discard('python-format')
  150. if id and self.python_brace_format:
  151. self.flags.add('python-brace-format')
  152. else:
  153. self.flags.discard('python-brace-format')
  154. self.auto_comments = list(dict.fromkeys(auto_comments)) if auto_comments else []
  155. self.user_comments = list(dict.fromkeys(user_comments)) if user_comments else []
  156. if previous_id:
  157. if isinstance(previous_id, str):
  158. self.previous_id = [previous_id]
  159. else:
  160. self.previous_id = list(previous_id)
  161. else:
  162. self.previous_id = []
  163. self.lineno = lineno
  164. self.context = context
  165. def __repr__(self) -> str:
  166. return f"<{type(self).__name__} {self.id!r} (flags: {list(self.flags)!r})>"
  167. def __cmp__(self, other: object) -> int:
  168. """Compare Messages, taking into account plural ids"""
  169. def values_to_compare(obj):
  170. if isinstance(obj, Message) and obj.pluralizable:
  171. return obj.id[0], obj.context or ''
  172. return obj.id, obj.context or ''
  173. return _cmp(values_to_compare(self), values_to_compare(other))
  174. def __gt__(self, other: object) -> bool:
  175. return self.__cmp__(other) > 0
  176. def __lt__(self, other: object) -> bool:
  177. return self.__cmp__(other) < 0
  178. def __ge__(self, other: object) -> bool:
  179. return self.__cmp__(other) >= 0
  180. def __le__(self, other: object) -> bool:
  181. return self.__cmp__(other) <= 0
  182. def __eq__(self, other: object) -> bool:
  183. return self.__cmp__(other) == 0
  184. def __ne__(self, other: object) -> bool:
  185. return self.__cmp__(other) != 0
  186. def is_identical(self, other: Message) -> bool:
  187. """Checks whether messages are identical, taking into account all
  188. properties.
  189. """
  190. assert isinstance(other, Message)
  191. return self.__dict__ == other.__dict__
  192. def clone(self) -> Message:
  193. return Message(
  194. id=copy(self.id),
  195. string=copy(self.string),
  196. locations=copy(self.locations),
  197. flags=copy(self.flags),
  198. auto_comments=copy(self.auto_comments),
  199. user_comments=copy(self.user_comments),
  200. previous_id=copy(self.previous_id),
  201. lineno=self.lineno, # immutable (str/None)
  202. context=self.context, # immutable (str/None)
  203. )
  204. def check(self, catalog: Catalog | None = None) -> list[TranslationError]:
  205. """Run various validation checks on the message. Some validations
  206. are only performed if the catalog is provided. This method returns
  207. a sequence of `TranslationError` objects.
  208. :rtype: ``iterator``
  209. :param catalog: A catalog instance that is passed to the checkers
  210. :see: `Catalog.check` for a way to perform checks for all messages
  211. in a catalog.
  212. """
  213. from babel.messages.checkers import checkers
  214. errors: list[TranslationError] = []
  215. for checker in checkers:
  216. try:
  217. checker(catalog, self)
  218. except TranslationError as e:
  219. errors.append(e)
  220. return errors
  221. @property
  222. def fuzzy(self) -> bool:
  223. """Whether the translation is fuzzy.
  224. >>> Message('foo').fuzzy
  225. False
  226. >>> msg = Message('foo', 'foo', flags=['fuzzy'])
  227. >>> msg.fuzzy
  228. True
  229. >>> msg
  230. <Message 'foo' (flags: ['fuzzy'])>
  231. :type: `bool`"""
  232. return 'fuzzy' in self.flags
  233. @property
  234. def pluralizable(self) -> bool:
  235. """Whether the message is plurizable.
  236. >>> Message('foo').pluralizable
  237. False
  238. >>> Message(('foo', 'bar')).pluralizable
  239. True
  240. :type: `bool`"""
  241. return isinstance(self.id, (list, tuple))
  242. @property
  243. def python_format(self) -> bool:
  244. """Whether the message contains Python-style parameters.
  245. >>> Message('foo %(name)s bar').python_format
  246. True
  247. >>> Message(('foo %(name)s', 'foo %(name)s')).python_format
  248. True
  249. :type: `bool`"""
  250. ids = self.id
  251. if isinstance(ids, (list, tuple)):
  252. for id in ids: # Explicit loop for performance reasons.
  253. if PYTHON_FORMAT.search(id):
  254. return True
  255. return False
  256. return bool(PYTHON_FORMAT.search(ids))
  257. @property
  258. def python_brace_format(self) -> bool:
  259. """Whether the message contains Python f-string parameters.
  260. >>> Message('Hello, {name}!').python_brace_format
  261. True
  262. >>> Message(('One apple', '{count} apples')).python_brace_format
  263. True
  264. :type: `bool`"""
  265. ids = self.id
  266. if isinstance(ids, (list, tuple)):
  267. for id in ids: # Explicit loop for performance reasons.
  268. if _has_python_brace_format(id):
  269. return True
  270. return False
  271. return _has_python_brace_format(ids)
  272. class TranslationError(Exception):
  273. """Exception thrown by translation checkers when invalid message
  274. translations are encountered."""
  275. DEFAULT_HEADER = """\
  276. # Translations template for PROJECT.
  277. # Copyright (C) YEAR ORGANIZATION
  278. # This file is distributed under the same license as the PROJECT project.
  279. # FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
  280. #"""
  281. def parse_separated_header(value: str) -> dict[str, str]:
  282. # Adapted from https://peps.python.org/pep-0594/#cgi
  283. from email.message import Message
  284. m = Message()
  285. m['content-type'] = value
  286. return dict(m.get_params())
  287. def _force_text(s: str | bytes, encoding: str = 'utf-8', errors: str = 'strict') -> str:
  288. if isinstance(s, str):
  289. return s
  290. if isinstance(s, bytes):
  291. return s.decode(encoding, errors)
  292. return str(s)
  293. class Catalog:
  294. """Representation of a message catalog."""
  295. def __init__(
  296. self,
  297. locale: Locale | str | None = None,
  298. domain: str | None = None,
  299. header_comment: str | None = DEFAULT_HEADER,
  300. project: str | None = None,
  301. version: str | None = None,
  302. copyright_holder: str | None = None,
  303. msgid_bugs_address: str | None = None,
  304. creation_date: datetime.datetime | str | None = None,
  305. revision_date: datetime.datetime | datetime.time | float | str | None = None,
  306. last_translator: str | None = None,
  307. language_team: str | None = None,
  308. charset: str | None = None,
  309. fuzzy: bool = True,
  310. ) -> None:
  311. """Initialize the catalog object.
  312. :param locale: the locale identifier or `Locale` object, or `None`
  313. if the catalog is not bound to a locale (which basically
  314. means it's a template)
  315. :param domain: the message domain
  316. :param header_comment: the header comment as string, or `None` for the
  317. default header
  318. :param project: the project's name
  319. :param version: the project's version
  320. :param copyright_holder: the copyright holder of the catalog
  321. :param msgid_bugs_address: the email address or URL to submit bug
  322. reports to
  323. :param creation_date: the date the catalog was created
  324. :param revision_date: the date the catalog was revised
  325. :param last_translator: the name and email of the last translator
  326. :param language_team: the name and email of the language team
  327. :param charset: the encoding to use in the output (defaults to utf-8)
  328. :param fuzzy: the fuzzy bit on the catalog header
  329. """
  330. self.domain = domain
  331. self.locale = locale
  332. self._header_comment = header_comment
  333. self._messages: dict[str | tuple[str, str], Message] = {}
  334. self.project = project or 'PROJECT'
  335. self.version = version or 'VERSION'
  336. self.copyright_holder = copyright_holder or 'ORGANIZATION'
  337. self.msgid_bugs_address = msgid_bugs_address or 'EMAIL@ADDRESS'
  338. self.last_translator = last_translator or 'FULL NAME <EMAIL@ADDRESS>'
  339. """Name and email address of the last translator."""
  340. self.language_team = language_team or 'LANGUAGE <LL@li.org>'
  341. """Name and email address of the language team."""
  342. self.charset = charset or 'utf-8'
  343. if creation_date is None:
  344. creation_date = datetime.datetime.now(LOCALTZ)
  345. elif isinstance(creation_date, datetime.datetime) and not creation_date.tzinfo:
  346. creation_date = creation_date.replace(tzinfo=LOCALTZ)
  347. self.creation_date = creation_date
  348. if revision_date is None:
  349. revision_date = 'YEAR-MO-DA HO:MI+ZONE'
  350. elif isinstance(revision_date, datetime.datetime) and not revision_date.tzinfo:
  351. revision_date = revision_date.replace(tzinfo=LOCALTZ)
  352. self.revision_date = revision_date
  353. self.fuzzy = fuzzy
  354. # Dictionary of obsolete messages
  355. self.obsolete: dict[str | tuple[str, str], Message] = {}
  356. self._num_plurals = None
  357. self._plural_expr = None
  358. def _set_locale(self, locale: Locale | str | None) -> None:
  359. if locale is None:
  360. self._locale_identifier = None
  361. self._locale = None
  362. return
  363. if isinstance(locale, Locale):
  364. self._locale_identifier = str(locale)
  365. self._locale = locale
  366. return
  367. if isinstance(locale, str):
  368. self._locale_identifier = str(locale)
  369. try:
  370. self._locale = Locale.parse(locale)
  371. except UnknownLocaleError:
  372. self._locale = None
  373. return
  374. raise TypeError(
  375. f"`locale` must be a Locale, a locale identifier string, or None; got {locale!r}",
  376. )
  377. def _get_locale(self) -> Locale | None:
  378. return self._locale
  379. def _get_locale_identifier(self) -> str | None:
  380. return self._locale_identifier
  381. locale = property(_get_locale, _set_locale)
  382. locale_identifier = property(_get_locale_identifier)
  383. def _get_header_comment(self) -> str:
  384. comment = self._header_comment
  385. year = datetime.datetime.now(LOCALTZ).strftime('%Y')
  386. if hasattr(self.revision_date, 'strftime'):
  387. year = self.revision_date.strftime('%Y')
  388. comment = (
  389. comment.replace('PROJECT', self.project)
  390. .replace('VERSION', self.version)
  391. .replace('YEAR', year)
  392. .replace('ORGANIZATION', self.copyright_holder)
  393. )
  394. locale_name = self.locale.english_name if self.locale else self.locale_identifier
  395. if locale_name:
  396. comment = comment.replace("Translations template", f"{locale_name} translations")
  397. return comment
  398. def _set_header_comment(self, string: str | None) -> None:
  399. self._header_comment = string
  400. header_comment = property(
  401. _get_header_comment,
  402. _set_header_comment,
  403. doc="""\
  404. The header comment for the catalog.
  405. >>> catalog = Catalog(project='Foobar', version='1.0',
  406. ... copyright_holder='Foo Company')
  407. >>> print(catalog.header_comment) #doctest: +ELLIPSIS
  408. # Translations template for Foobar.
  409. # Copyright (C) ... Foo Company
  410. # This file is distributed under the same license as the Foobar project.
  411. # FIRST AUTHOR <EMAIL@ADDRESS>, ....
  412. #
  413. The header can also be set from a string. Any known upper-case variables
  414. will be replaced when the header is retrieved again:
  415. >>> catalog = Catalog(project='Foobar', version='1.0',
  416. ... copyright_holder='Foo Company')
  417. >>> catalog.header_comment = '''\\
  418. ... # The POT for my really cool PROJECT project.
  419. ... # Copyright (C) 1990-2003 ORGANIZATION
  420. ... # This file is distributed under the same license as the PROJECT
  421. ... # project.
  422. ... #'''
  423. >>> print(catalog.header_comment)
  424. # The POT for my really cool Foobar project.
  425. # Copyright (C) 1990-2003 Foo Company
  426. # This file is distributed under the same license as the Foobar
  427. # project.
  428. #
  429. :type: `unicode`
  430. """,
  431. )
  432. def _get_mime_headers(self) -> list[tuple[str, str]]:
  433. if isinstance(self.revision_date, (datetime.datetime, datetime.time, int, float)):
  434. revision_date = format_datetime(
  435. self.revision_date,
  436. 'yyyy-MM-dd HH:mmZ',
  437. locale='en',
  438. )
  439. else:
  440. revision_date = self.revision_date
  441. language_team = self.language_team
  442. if self.locale_identifier and 'LANGUAGE' in language_team:
  443. language_team = language_team.replace('LANGUAGE', str(self.locale_identifier))
  444. headers: list[tuple[str, str]] = [
  445. ("Project-Id-Version", f"{self.project} {self.version}"),
  446. ('Report-Msgid-Bugs-To', self.msgid_bugs_address),
  447. ('POT-Creation-Date', format_datetime(self.creation_date, 'yyyy-MM-dd HH:mmZ', locale='en')),
  448. ('PO-Revision-Date', revision_date),
  449. ('Last-Translator', self.last_translator),
  450. ] # fmt: skip
  451. if self.locale_identifier:
  452. headers.append(('Language', str(self.locale_identifier)))
  453. headers.append(('Language-Team', language_team))
  454. if self.locale is not None:
  455. headers.append(('Plural-Forms', self.plural_forms))
  456. headers += [
  457. ('MIME-Version', '1.0'),
  458. ("Content-Type", f"text/plain; charset={self.charset}"),
  459. ('Content-Transfer-Encoding', '8bit'),
  460. ("Generated-By", f"Babel {VERSION}\n"),
  461. ]
  462. return headers
  463. def _set_mime_headers(self, headers: Iterable[tuple[str, str]]) -> None:
  464. for name, value in headers:
  465. name = _force_text(name.lower(), encoding=self.charset)
  466. value = _force_text(value, encoding=self.charset)
  467. if name == 'project-id-version':
  468. parts = value.split(' ')
  469. self.project = ' '.join(parts[:-1])
  470. self.version = parts[-1]
  471. elif name == 'report-msgid-bugs-to':
  472. self.msgid_bugs_address = value
  473. elif name == 'last-translator':
  474. self.last_translator = value
  475. elif name == 'language':
  476. value = value.replace('-', '_')
  477. # The `or None` makes sure that the locale is set to None
  478. # if the header's value is an empty string, which is what
  479. # some tools generate (instead of eliding the empty Language
  480. # header altogether).
  481. self._set_locale(value or None)
  482. elif name == 'language-team':
  483. self.language_team = value
  484. elif name == 'content-type':
  485. params = parse_separated_header(value)
  486. if 'charset' in params:
  487. self.charset = params['charset'].lower()
  488. elif name == 'plural-forms':
  489. params = parse_separated_header(f" ;{value}")
  490. self._num_plurals = int(params.get('nplurals', 2))
  491. self._plural_expr = params.get('plural', '(n != 1)')
  492. elif name == 'pot-creation-date':
  493. self.creation_date = _parse_datetime_header(value)
  494. elif name == 'po-revision-date':
  495. # Keep the value if it's not the default one
  496. if 'YEAR' not in value:
  497. self.revision_date = _parse_datetime_header(value)
  498. mime_headers = property(
  499. _get_mime_headers,
  500. _set_mime_headers,
  501. doc="""\
  502. The MIME headers of the catalog, used for the special ``msgid ""`` entry.
  503. The behavior of this property changes slightly depending on whether a locale
  504. is set or not, the latter indicating that the catalog is actually a template
  505. for actual translations.
  506. Here's an example of the output for such a catalog template:
  507. >>> from babel.dates import UTC
  508. >>> from datetime import datetime
  509. >>> created = datetime(1990, 4, 1, 15, 30, tzinfo=UTC)
  510. >>> catalog = Catalog(project='Foobar', version='1.0',
  511. ... creation_date=created)
  512. >>> for name, value in catalog.mime_headers:
  513. ... print('%s: %s' % (name, value))
  514. Project-Id-Version: Foobar 1.0
  515. Report-Msgid-Bugs-To: EMAIL@ADDRESS
  516. POT-Creation-Date: 1990-04-01 15:30+0000
  517. PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE
  518. Last-Translator: FULL NAME <EMAIL@ADDRESS>
  519. Language-Team: LANGUAGE <LL@li.org>
  520. MIME-Version: 1.0
  521. Content-Type: text/plain; charset=utf-8
  522. Content-Transfer-Encoding: 8bit
  523. Generated-By: Babel ...
  524. And here's an example of the output when the locale is set:
  525. >>> revised = datetime(1990, 8, 3, 12, 0, tzinfo=UTC)
  526. >>> catalog = Catalog(locale='de_DE', project='Foobar', version='1.0',
  527. ... creation_date=created, revision_date=revised,
  528. ... last_translator='John Doe <jd@example.com>',
  529. ... language_team='de_DE <de@example.com>')
  530. >>> for name, value in catalog.mime_headers:
  531. ... print('%s: %s' % (name, value))
  532. Project-Id-Version: Foobar 1.0
  533. Report-Msgid-Bugs-To: EMAIL@ADDRESS
  534. POT-Creation-Date: 1990-04-01 15:30+0000
  535. PO-Revision-Date: 1990-08-03 12:00+0000
  536. Last-Translator: John Doe <jd@example.com>
  537. Language: de_DE
  538. Language-Team: de_DE <de@example.com>
  539. Plural-Forms: nplurals=2; plural=(n != 1);
  540. MIME-Version: 1.0
  541. Content-Type: text/plain; charset=utf-8
  542. Content-Transfer-Encoding: 8bit
  543. Generated-By: Babel ...
  544. :type: `list`
  545. """,
  546. )
  547. @property
  548. def num_plurals(self) -> int:
  549. """The number of plurals used by the catalog or locale.
  550. >>> Catalog(locale='en').num_plurals
  551. 2
  552. >>> Catalog(locale='ga').num_plurals
  553. 5
  554. :type: `int`"""
  555. if self._num_plurals is None:
  556. num = 2
  557. if self.locale:
  558. num = get_plural(self.locale)[0]
  559. self._num_plurals = num
  560. return self._num_plurals
  561. @property
  562. def plural_expr(self) -> str:
  563. """The plural expression used by the catalog or locale.
  564. >>> Catalog(locale='en').plural_expr
  565. '(n != 1)'
  566. >>> Catalog(locale='ga').plural_expr
  567. '(n==1 ? 0 : n==2 ? 1 : n>=3 && n<=6 ? 2 : n>=7 && n<=10 ? 3 : 4)'
  568. >>> Catalog(locale='ding').plural_expr # unknown locale
  569. '(n != 1)'
  570. :type: `str`"""
  571. if self._plural_expr is None:
  572. expr = '(n != 1)'
  573. if self.locale:
  574. expr = get_plural(self.locale)[1]
  575. self._plural_expr = expr
  576. return self._plural_expr
  577. @property
  578. def plural_forms(self) -> str:
  579. """Return the plural forms declaration for the locale.
  580. >>> Catalog(locale='en').plural_forms
  581. 'nplurals=2; plural=(n != 1);'
  582. >>> Catalog(locale='pt_BR').plural_forms
  583. 'nplurals=2; plural=(n > 1);'
  584. :type: `str`"""
  585. return f"nplurals={self.num_plurals}; plural={self.plural_expr};"
  586. def __contains__(self, id: _MessageID) -> bool:
  587. """Return whether the catalog has a message with the specified ID."""
  588. return self._key_for(id) in self._messages
  589. def __len__(self) -> int:
  590. """The number of messages in the catalog.
  591. This does not include the special ``msgid ""`` entry."""
  592. return len(self._messages)
  593. def __iter__(self) -> Iterator[Message]:
  594. """Iterates through all the entries in the catalog, in the order they
  595. were added, yielding a `Message` object for every entry.
  596. :rtype: ``iterator``"""
  597. buf = []
  598. for name, value in self.mime_headers:
  599. buf.append(f"{name}: {value}")
  600. flags = set()
  601. if self.fuzzy:
  602. flags |= {'fuzzy'}
  603. yield Message('', '\n'.join(buf), flags=flags)
  604. for key in self._messages:
  605. yield self._messages[key]
  606. def __repr__(self) -> str:
  607. locale = ''
  608. if self.locale:
  609. locale = f" {self.locale}"
  610. return f"<{type(self).__name__} {self.domain!r}{locale}>"
  611. def __delitem__(self, id: _MessageID) -> None:
  612. """Delete the message with the specified ID."""
  613. self.delete(id)
  614. def __getitem__(self, id: _MessageID) -> Message:
  615. """Return the message with the specified ID.
  616. :param id: the message ID
  617. """
  618. return self.get(id)
  619. def __setitem__(self, id: _MessageID, message: Message) -> None:
  620. """Add or update the message with the specified ID.
  621. >>> catalog = Catalog()
  622. >>> catalog['foo'] = Message('foo')
  623. >>> catalog['foo']
  624. <Message 'foo' (flags: [])>
  625. If a message with that ID is already in the catalog, it is updated
  626. to include the locations and flags of the new message.
  627. >>> catalog = Catalog()
  628. >>> catalog['foo'] = Message('foo', locations=[('main.py', 1)])
  629. >>> catalog['foo'].locations
  630. [('main.py', 1)]
  631. >>> catalog['foo'] = Message('foo', locations=[('utils.py', 5)])
  632. >>> catalog['foo'].locations
  633. [('main.py', 1), ('utils.py', 5)]
  634. :param id: the message ID
  635. :param message: the `Message` object
  636. """
  637. assert isinstance(message, Message), 'expected a Message object'
  638. key = self._key_for(id, message.context)
  639. current = self._messages.get(key)
  640. if current:
  641. if message.pluralizable and not current.pluralizable:
  642. # The new message adds pluralization
  643. current.id = message.id
  644. current.string = message.string
  645. current.locations = list(dict.fromkeys([*current.locations, *message.locations]))
  646. current.auto_comments = list(dict.fromkeys([*current.auto_comments, *message.auto_comments])) # fmt:skip
  647. current.user_comments = list(dict.fromkeys([*current.user_comments, *message.user_comments])) # fmt:skip
  648. current.flags |= message.flags
  649. elif id == '':
  650. # special treatment for the header message
  651. self.mime_headers = message_from_string(message.string).items()
  652. self.header_comment = "\n".join(f"# {c}".rstrip() for c in message.user_comments)
  653. self.fuzzy = message.fuzzy
  654. else:
  655. if isinstance(id, (list, tuple)):
  656. assert isinstance(message.string, (list, tuple)), (
  657. f"Expected sequence but got {type(message.string)}"
  658. )
  659. self._messages[key] = message
  660. def add(
  661. self,
  662. id: _MessageID,
  663. string: _MessageID | None = None,
  664. locations: Iterable[tuple[str, int]] = (),
  665. flags: Iterable[str] = (),
  666. auto_comments: Iterable[str] = (),
  667. user_comments: Iterable[str] = (),
  668. previous_id: _MessageID = (),
  669. lineno: int | None = None,
  670. context: str | None = None,
  671. ) -> Message:
  672. """Add or update the message with the specified ID.
  673. >>> catalog = Catalog()
  674. >>> catalog.add('foo')
  675. <Message ...>
  676. >>> catalog['foo']
  677. <Message 'foo' (flags: [])>
  678. This method simply constructs a `Message` object with the given
  679. arguments and invokes `__setitem__` with that object.
  680. :param id: the message ID, or a ``(singular, plural)`` tuple for
  681. pluralizable messages
  682. :param string: the translated message string, or a
  683. ``(singular, plural)`` tuple for pluralizable messages
  684. :param locations: a sequence of ``(filename, lineno)`` tuples
  685. :param flags: a set or sequence of flags
  686. :param auto_comments: a sequence of automatic comments
  687. :param user_comments: a sequence of user comments
  688. :param previous_id: the previous message ID, or a ``(singular, plural)``
  689. tuple for pluralizable messages
  690. :param lineno: the line number on which the msgid line was found in the
  691. PO file, if any
  692. :param context: the message context
  693. """
  694. message = Message(
  695. id,
  696. string,
  697. list(locations),
  698. flags,
  699. auto_comments,
  700. user_comments,
  701. previous_id,
  702. lineno=lineno,
  703. context=context,
  704. )
  705. self[id] = message
  706. return message
  707. def check(self) -> Iterable[tuple[Message, list[TranslationError]]]:
  708. """Run various validation checks on the translations in the catalog.
  709. For every message which fails validation, this method yield a
  710. ``(message, errors)`` tuple, where ``message`` is the `Message` object
  711. and ``errors`` is a sequence of `TranslationError` objects.
  712. :rtype: ``generator`` of ``(message, errors)``
  713. """
  714. for message in self._messages.values():
  715. errors = message.check(catalog=self)
  716. if errors:
  717. yield message, errors
  718. def get(self, id: _MessageID, context: str | None = None) -> Message | None:
  719. """Return the message with the specified ID and context.
  720. :param id: the message ID
  721. :param context: the message context, or ``None`` for no context
  722. """
  723. return self._messages.get(self._key_for(id, context))
  724. def delete(self, id: _MessageID, context: str | None = None) -> None:
  725. """Delete the message with the specified ID and context.
  726. :param id: the message ID
  727. :param context: the message context, or ``None`` for no context
  728. """
  729. key = self._key_for(id, context)
  730. if key in self._messages:
  731. del self._messages[key]
  732. def update(
  733. self,
  734. template: Catalog,
  735. no_fuzzy_matching: bool = False,
  736. update_header_comment: bool = False,
  737. keep_user_comments: bool = True,
  738. update_creation_date: bool = True,
  739. ) -> None:
  740. """Update the catalog based on the given template catalog.
  741. >>> from babel.messages import Catalog
  742. >>> template = Catalog()
  743. >>> template.add('green', locations=[('main.py', 99)])
  744. <Message ...>
  745. >>> template.add('blue', locations=[('main.py', 100)])
  746. <Message ...>
  747. >>> template.add(('salad', 'salads'), locations=[('util.py', 42)])
  748. <Message ...>
  749. >>> catalog = Catalog(locale='de_DE')
  750. >>> catalog.add('blue', 'blau', locations=[('main.py', 98)])
  751. <Message ...>
  752. >>> catalog.add('head', 'Kopf', locations=[('util.py', 33)])
  753. <Message ...>
  754. >>> catalog.add(('salad', 'salads'), ('Salat', 'Salate'),
  755. ... locations=[('util.py', 38)])
  756. <Message ...>
  757. >>> catalog.update(template)
  758. >>> len(catalog)
  759. 3
  760. >>> msg1 = catalog['green']
  761. >>> msg1.string
  762. >>> msg1.locations
  763. [('main.py', 99)]
  764. >>> msg2 = catalog['blue']
  765. >>> msg2.string
  766. 'blau'
  767. >>> msg2.locations
  768. [('main.py', 100)]
  769. >>> msg3 = catalog['salad']
  770. >>> msg3.string
  771. ('Salat', 'Salate')
  772. >>> msg3.locations
  773. [('util.py', 42)]
  774. Messages that are in the catalog but not in the template are removed
  775. from the main collection, but can still be accessed via the `obsolete`
  776. member:
  777. >>> 'head' in catalog
  778. False
  779. >>> list(catalog.obsolete.values())
  780. [<Message 'head' (flags: [])>]
  781. :param template: the reference catalog, usually read from a POT file
  782. :param no_fuzzy_matching: whether to use fuzzy matching of message IDs
  783. :param update_header_comment: whether to copy the header comment from the template
  784. :param keep_user_comments: whether to keep user comments from the old catalog
  785. :param update_creation_date: whether to copy the creation date from the template
  786. """
  787. messages = self._messages
  788. remaining = messages.copy()
  789. self._messages = {}
  790. # Prepare for fuzzy matching
  791. fuzzy_candidates = {}
  792. if not no_fuzzy_matching:
  793. for msgid in messages:
  794. if msgid and messages[msgid].string:
  795. key = self._key_for(msgid)
  796. ctxt = messages[msgid].context
  797. fuzzy_candidates[self._to_fuzzy_match_key(key)] = (key, ctxt)
  798. fuzzy_matches = set()
  799. def _merge(
  800. message: Message,
  801. oldkey: tuple[str, str] | str,
  802. newkey: tuple[str, str] | str,
  803. ) -> None:
  804. message = message.clone()
  805. fuzzy = False
  806. if oldkey != newkey:
  807. fuzzy = True
  808. fuzzy_matches.add(oldkey)
  809. oldmsg = messages.get(oldkey)
  810. assert oldmsg is not None
  811. if isinstance(oldmsg.id, str):
  812. message.previous_id = [oldmsg.id]
  813. else:
  814. message.previous_id = list(oldmsg.id)
  815. else:
  816. oldmsg = remaining.pop(oldkey, None)
  817. assert oldmsg is not None
  818. message.string = oldmsg.string
  819. if keep_user_comments and oldmsg.user_comments:
  820. message.user_comments = list(dict.fromkeys(oldmsg.user_comments))
  821. if isinstance(message.id, (list, tuple)):
  822. if not isinstance(message.string, (list, tuple)):
  823. fuzzy = True
  824. message.string = tuple(
  825. [message.string] + ([''] * (len(message.id) - 1)),
  826. )
  827. elif len(message.string) != self.num_plurals:
  828. fuzzy = True
  829. message.string = tuple(message.string[: len(oldmsg.string)])
  830. elif isinstance(message.string, (list, tuple)):
  831. fuzzy = True
  832. message.string = message.string[0]
  833. message.flags |= oldmsg.flags
  834. if fuzzy:
  835. message.flags |= {'fuzzy'}
  836. self[message.id] = message
  837. for message in template:
  838. if message.id:
  839. key = self._key_for(message.id, message.context)
  840. if key in messages:
  841. _merge(message, key, key)
  842. else:
  843. if not no_fuzzy_matching:
  844. # do some fuzzy matching with difflib
  845. matches = get_close_matches(
  846. self._to_fuzzy_match_key(key),
  847. fuzzy_candidates.keys(),
  848. 1,
  849. )
  850. if matches:
  851. modified_key = matches[0]
  852. newkey, newctxt = fuzzy_candidates[modified_key]
  853. if newctxt is not None:
  854. newkey = newkey, newctxt
  855. _merge(message, newkey, key)
  856. continue
  857. self[message.id] = message
  858. for msgid in remaining:
  859. if no_fuzzy_matching or msgid not in fuzzy_matches:
  860. self.obsolete[msgid] = remaining[msgid]
  861. if update_header_comment:
  862. # Allow the updated catalog's header to be rewritten based on the
  863. # template's header
  864. self.header_comment = template.header_comment
  865. # Make updated catalog's POT-Creation-Date equal to the template
  866. # used to update the catalog
  867. if update_creation_date:
  868. self.creation_date = template.creation_date
  869. def _to_fuzzy_match_key(self, key: tuple[str, str] | str) -> str:
  870. """Converts a message key to a string suitable for fuzzy matching."""
  871. if isinstance(key, tuple):
  872. matchkey = key[0] # just the msgid, no context
  873. else:
  874. matchkey = key
  875. return matchkey.lower().strip()
  876. def _key_for(
  877. self,
  878. id: _MessageID,
  879. context: str | None = None,
  880. ) -> tuple[str, str] | str:
  881. """The key for a message is just the singular ID even for pluralizable
  882. messages, but is a ``(msgid, msgctxt)`` tuple for context-specific
  883. messages.
  884. """
  885. key = id
  886. if isinstance(key, (list, tuple)):
  887. key = id[0]
  888. if context is not None:
  889. key = (key, context)
  890. return key
  891. def is_identical(self, other: Catalog) -> bool:
  892. """Checks if catalogs are identical, taking into account messages and
  893. headers.
  894. """
  895. assert isinstance(other, Catalog)
  896. for key in self._messages.keys() | other._messages.keys():
  897. message_1 = self.get(key)
  898. message_2 = other.get(key)
  899. if message_1 is None or message_2 is None or not message_1.is_identical(message_2):
  900. return False
  901. return dict(self.mime_headers) == dict(other.mime_headers)