core.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415
  1. """Core functionality shared by all JSON loggers"""
  2. ### IMPORTS
  3. ### ============================================================================
  4. ## Future
  5. from __future__ import annotations
  6. ## Standard Library
  7. from datetime import datetime, timezone
  8. import logging
  9. import re
  10. import sys
  11. from typing import TypeAlias, Any
  12. from collections.abc import Container, Sequence
  13. ## Installed
  14. ## Application
  15. ### CONSTANTS
  16. ### ============================================================================
  17. RESERVED_ATTRS: list[str] = [
  18. "args",
  19. "asctime",
  20. "created",
  21. "exc_info",
  22. "exc_text",
  23. "filename",
  24. "funcName",
  25. "levelname",
  26. "levelno",
  27. "lineno",
  28. "module",
  29. "msecs",
  30. "message",
  31. "msg",
  32. "name",
  33. "pathname",
  34. "process",
  35. "processName",
  36. "relativeCreated",
  37. "stack_info",
  38. "thread",
  39. "threadName",
  40. ]
  41. """Default reserved attributes.
  42. These come from the [default attributes of `LogRecord` objects](http://docs.python.org/library/logging.html#logrecord-attributes).
  43. Note:
  44. Although considered a constant, this list is dependent on the Python version due to
  45. different `LogRecord` objects having different attributes in different Python versions.
  46. *Changed in 3.0*: `RESERVED_ATTRS` is now `list[str]` instead of `tuple[str, ...]`.
  47. """
  48. if sys.version_info >= (3, 12):
  49. # taskName added in python 3.12
  50. RESERVED_ATTRS.append("taskName")
  51. RESERVED_ATTRS.sort()
  52. STYLE_STRING_TEMPLATE_REGEX = re.compile(r"\$\{(.+?)\}", re.IGNORECASE) # $ style
  53. STYLE_STRING_FORMAT_REGEX = re.compile(r"\{(.+?)\}", re.IGNORECASE) # { style
  54. STYLE_PERCENT_REGEX = re.compile(r"%\((.+?)\)", re.IGNORECASE) # % style
  55. ## Type Aliases
  56. ## -----------------------------------------------------------------------------
  57. LogData: TypeAlias = dict[str, Any]
  58. """Type alias
  59. *Changed in 4.0*: renamed from `LogRecord` to `LogData`
  60. """
  61. ### FUNCTIONS
  62. ### ============================================================================
  63. def merge_record_extra(
  64. record: logging.LogRecord,
  65. target: dict[Any, Any],
  66. reserved: Container[str],
  67. rename_fields: dict[str, str] | None = None,
  68. ) -> dict[Any, Any]:
  69. """
  70. Merges extra attributes from LogRecord object into target dictionary
  71. Args:
  72. record: logging.LogRecord
  73. target: dict to update
  74. reserved: dict or list with reserved keys to skip
  75. rename_fields: an optional dict, used to rename field names in the output.
  76. e.g. Rename `levelname` to `log.level`: `{'levelname': 'log.level'}`
  77. *Changed in 3.1*: `reserved` is now `Container[str]`.
  78. """
  79. if rename_fields is None:
  80. rename_fields = {}
  81. for key, value in record.__dict__.items():
  82. # this allows to have numeric keys
  83. if key not in reserved and not (hasattr(key, "startswith") and key.startswith("_")):
  84. target[rename_fields.get(key, key)] = value
  85. return target
  86. ### CLASSES
  87. ### ============================================================================
  88. class BaseJsonFormatter(logging.Formatter):
  89. """Base class for all formatters
  90. Must not be used directly.
  91. *New in 3.1*
  92. *Changed in 3.2*: `defaults` argument is no longer ignored.
  93. *Added in 3.3*: `exc_info_as_array` and `stack_info_as_array` options are added.
  94. """
  95. _style: logging.PercentStyle | str # type: ignore[assignment]
  96. ## Parent Methods
  97. ## -------------------------------------------------------------------------
  98. # pylint: disable=too-many-arguments,super-init-not-called
  99. def __init__(
  100. self,
  101. fmt: str | Sequence[str] | None = None,
  102. datefmt: str | None = None,
  103. style: str = "%",
  104. validate: bool = True,
  105. *,
  106. prefix: str = "",
  107. rename_fields: dict[str, str] | None = None,
  108. rename_fields_keep_missing: bool = False,
  109. static_fields: dict[str, Any] | None = None,
  110. reserved_attrs: Sequence[str] | None = None,
  111. timestamp: bool | str = False,
  112. defaults: dict[str, Any] | None = None,
  113. exc_info_as_array: bool = False,
  114. stack_info_as_array: bool = False,
  115. ) -> None:
  116. """
  117. Args:
  118. fmt: String format or `Sequence` of field names of fields to log.
  119. datefmt: format to use when formatting `asctime` field
  120. style: how to extract log fields from `fmt`. Ignored if `fmt` is a `Sequence[str]`.
  121. validate: validate `fmt` against style, if implementing a custom `style` you
  122. must set this to `False`. Ignored if `fmt` is a `Sequence[str]`.
  123. defaults: a dictionary containing default fields that are added before all other fields and
  124. may be overridden. The supplied fields are still subject to `rename_fields`.
  125. prefix: an optional string prefix added at the beginning of
  126. the formatted string
  127. rename_fields: an optional dict, used to rename field names in the output.
  128. Rename `message` to `@message`: `{'message': '@message'}`
  129. rename_fields_keep_missing: When renaming fields, include missing fields in the output.
  130. static_fields: an optional dict, used to add fields with static values to all logs
  131. reserved_attrs: an optional list of fields that will be skipped when
  132. outputting json log record. Defaults to [all log record attributes][pythonjsonlogger.core.RESERVED_ATTRS].
  133. timestamp: an optional string/boolean field to add a timestamp when
  134. outputting the json log record. If string is passed, timestamp will be added
  135. to log record using string as key. If True boolean is passed, timestamp key
  136. will be "timestamp". Defaults to False/off.
  137. exc_info_as_array: break the exc_info into a list of lines based on line breaks.
  138. stack_info_as_array: break the stack_info into a list of lines based on line breaks.
  139. *Changed in 3.1*:
  140. - you can now use custom values for style by setting validate to `False`.
  141. The value is stored in `self._style` as a string. The `parse` method will need to be
  142. overridden in order to support the new style.
  143. - Renaming fields now preserves the order that fields were added in and avoids adding
  144. missing fields. The original behaviour, missing fields have a value of `None`, is still
  145. available by setting `rename_fields_keep_missing` to `True`.
  146. *Added in 4.0*:
  147. - `fmt` now supports comma seperated lists (`style=","`). Note that this style is specific
  148. to `python-json-logger` and thus care should be taken to not to pass this format to other
  149. logging Formatter implementations.
  150. - `fmt` now supports sequences of strings (e.g. lists and tuples) of field names.
  151. """
  152. ## logging.Formatter compatibility
  153. ## ---------------------------------------------------------------------
  154. # Note: validate added in python 3.8, defaults added in 3.10
  155. if fmt is None or isinstance(fmt, str):
  156. if style in logging._STYLES:
  157. _style = logging._STYLES[style][0](fmt) # type: ignore[operator]
  158. if validate:
  159. _style.validate()
  160. self._style = _style
  161. self._fmt = _style._fmt
  162. elif style == "," or not validate:
  163. self._style = style
  164. self._fmt = fmt
  165. # TODO: Validate comma format
  166. else:
  167. raise ValueError("Style must be one of: '%{$,'")
  168. self._required_fields = self.parse()
  169. # Note: we do this check second as string is still a Sequence[str]
  170. elif isinstance(fmt, Sequence):
  171. self._style = "__sequence__"
  172. self._fmt = str(fmt)
  173. self._required_fields = list(fmt)
  174. self.datefmt = datefmt
  175. ## JSON Logging specific
  176. ## ---------------------------------------------------------------------
  177. self.prefix = prefix
  178. # We recreate the dict in rename_fields and static_fields to support internal/external
  179. # references which require getting the item to do the conversion.
  180. # For more details see: https://github.com/nhairs/python-json-logger/pull/45
  181. self.rename_fields = (
  182. {key: rename_fields[key] for key in rename_fields} if rename_fields is not None else {}
  183. )
  184. self.static_fields = (
  185. {key: static_fields[key] for key in static_fields} if static_fields is not None else {}
  186. )
  187. self.rename_fields_keep_missing = rename_fields_keep_missing
  188. self.reserved_attrs = set(reserved_attrs if reserved_attrs is not None else RESERVED_ATTRS)
  189. self.timestamp = timestamp
  190. self._skip_fields = set(self._required_fields)
  191. self._skip_fields.update(self.reserved_attrs)
  192. self.defaults = defaults if defaults is not None else {}
  193. self.exc_info_as_array = exc_info_as_array
  194. self.stack_info_as_array = stack_info_as_array
  195. return
  196. def format(self, record: logging.LogRecord) -> str:
  197. """Formats a log record and serializes to json
  198. Args:
  199. record: the record to format
  200. """
  201. message_dict: dict[str, Any] = {}
  202. # TODO: logging.LogRecord.msg and logging.LogRecord.message in typeshed
  203. # are always type of str. We shouldn't need to override that.
  204. if isinstance(record.msg, dict):
  205. message_dict = record.msg
  206. record.message = ""
  207. else:
  208. record.message = record.getMessage()
  209. # only format time if needed
  210. if "asctime" in self._required_fields:
  211. record.asctime = self.formatTime(record, self.datefmt)
  212. # Display formatted exception, but allow overriding it in the
  213. # user-supplied dict.
  214. if record.exc_info and not message_dict.get("exc_info"):
  215. message_dict["exc_info"] = self.formatException(record.exc_info)
  216. if not message_dict.get("exc_info") and record.exc_text:
  217. message_dict["exc_info"] = record.exc_text
  218. # Display formatted record of stack frames
  219. # default format is a string returned from :func:`traceback.print_stack`
  220. if record.stack_info and not message_dict.get("stack_info"):
  221. message_dict["stack_info"] = self.formatStack(record.stack_info)
  222. log_data: LogData = {}
  223. self.add_fields(log_data, record, message_dict)
  224. log_data = self.process_log_record(log_data)
  225. return self.serialize_log_record(log_data)
  226. ## JSON Formatter Specific Methods
  227. ## -------------------------------------------------------------------------
  228. def parse(self) -> list[str]:
  229. """Parses format string looking for substitutions
  230. This method is responsible for returning a list of fields (as strings)
  231. to include in all log messages.
  232. You can support custom styles by overriding this method.
  233. Returns:
  234. list of fields to be extracted and serialized
  235. """
  236. if self._fmt is None:
  237. return []
  238. if isinstance(self._style, str):
  239. if self._style == "__sequence__":
  240. raise RuntimeError("Must not call parse when fmt is a sequence of strings")
  241. if self._style == ",":
  242. return [field.strip() for field in self._fmt.split(",") if field.strip()]
  243. raise ValueError(f"Style {self._style!r} is not supported")
  244. if isinstance(self._style, logging.StringTemplateStyle):
  245. formatter_style_pattern = STYLE_STRING_TEMPLATE_REGEX
  246. elif isinstance(self._style, logging.StrFormatStyle):
  247. formatter_style_pattern = STYLE_STRING_FORMAT_REGEX
  248. elif isinstance(self._style, logging.PercentStyle):
  249. # PercentStyle is parent class of StringTemplateStyle and StrFormatStyle
  250. # so it must be checked last.
  251. formatter_style_pattern = STYLE_PERCENT_REGEX
  252. else:
  253. raise ValueError(f"Style {self._style!r} is not supported")
  254. return formatter_style_pattern.findall(self._fmt)
  255. def serialize_log_record(self, log_data: LogData) -> str:
  256. """Returns the final representation of the data to be logged
  257. Args:
  258. log_data: the data
  259. *Changed in 4.0*: `log_record` renamed to `log_data`
  260. """
  261. return self.prefix + self.jsonify_log_record(log_data)
  262. def add_fields(
  263. self,
  264. log_data: dict[str, Any],
  265. record: logging.LogRecord,
  266. message_dict: dict[str, Any],
  267. ) -> None:
  268. """Extract fields from a LogRecord for logging
  269. This method can be overridden to implement custom logic for adding fields.
  270. Args:
  271. log_data: data that will be logged
  272. record: the record to extract data from
  273. message_dict: dictionary that was logged instead of a message. e.g
  274. `logger.info({"is_this_message_dict": True})`
  275. *Changed in 4.0*: `log_record` renamed to `log_data`
  276. """
  277. for field in self.defaults:
  278. log_data[self._get_rename(field)] = self.defaults[field]
  279. for field in self._required_fields:
  280. log_data[self._get_rename(field)] = record.__dict__.get(field)
  281. for data_dict in [self.static_fields, message_dict]:
  282. for key, value in data_dict.items():
  283. log_data[self._get_rename(key)] = value
  284. merge_record_extra(
  285. record,
  286. log_data,
  287. reserved=self._skip_fields,
  288. rename_fields=self.rename_fields,
  289. )
  290. if self.timestamp:
  291. key = self.timestamp if isinstance(self.timestamp, str) else "timestamp"
  292. log_data[self._get_rename(key)] = datetime.fromtimestamp(
  293. record.created, tz=timezone.utc
  294. )
  295. if self.rename_fields_keep_missing:
  296. for field in self.rename_fields.values():
  297. if field not in log_data:
  298. log_data[field] = None
  299. return
  300. def _get_rename(self, key: str) -> str:
  301. return self.rename_fields.get(key, key)
  302. # Child Methods
  303. # ..........................................................................
  304. def jsonify_log_record(self, log_data: LogData) -> str:
  305. """Convert the log data into a JSON string.
  306. Child classes MUST override this method.
  307. Args:
  308. log_data: the data to serialize
  309. *Changed in 4.0*: `log_record` renamed to `log_data`
  310. """
  311. raise NotImplementedError()
  312. def process_log_record(self, log_data: LogData) -> LogData:
  313. """Custom processing of the data to be logged.
  314. Child classes can override this method to alter the log record before it
  315. is serialized.
  316. Args:
  317. log_data: incoming data
  318. *Changed in 4.0*: `log_record` renamed to `log_data`
  319. """
  320. return log_data
  321. def formatException(self, ei) -> str | list[str]: # type: ignore[override]
  322. """Format and return the specified exception information.
  323. If exc_info_as_array is set to True, This method returns an array of strings.
  324. """
  325. exception_info_str = super().formatException(ei)
  326. return exception_info_str.splitlines() if self.exc_info_as_array else exception_info_str
  327. def formatStack(self, stack_info) -> str | list[str]: # type: ignore[override]
  328. """Format and return the specified stack information.
  329. If stack_info_as_array is set to True, This method returns an array of strings.
  330. """
  331. stack_info_str = super().formatStack(stack_info)
  332. return stack_info_str.splitlines() if self.stack_info_as_array else stack_info_str