| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415 |
- """Core functionality shared by all JSON loggers"""
- ### IMPORTS
- ### ============================================================================
- ## Future
- from __future__ import annotations
- ## Standard Library
- from datetime import datetime, timezone
- import logging
- import re
- import sys
- from typing import TypeAlias, Any
- from collections.abc import Container, Sequence
- ## Installed
- ## Application
- ### CONSTANTS
- ### ============================================================================
- RESERVED_ATTRS: list[str] = [
- "args",
- "asctime",
- "created",
- "exc_info",
- "exc_text",
- "filename",
- "funcName",
- "levelname",
- "levelno",
- "lineno",
- "module",
- "msecs",
- "message",
- "msg",
- "name",
- "pathname",
- "process",
- "processName",
- "relativeCreated",
- "stack_info",
- "thread",
- "threadName",
- ]
- """Default reserved attributes.
- These come from the [default attributes of `LogRecord` objects](http://docs.python.org/library/logging.html#logrecord-attributes).
- Note:
- Although considered a constant, this list is dependent on the Python version due to
- different `LogRecord` objects having different attributes in different Python versions.
- *Changed in 3.0*: `RESERVED_ATTRS` is now `list[str]` instead of `tuple[str, ...]`.
- """
- if sys.version_info >= (3, 12):
- # taskName added in python 3.12
- RESERVED_ATTRS.append("taskName")
- RESERVED_ATTRS.sort()
- STYLE_STRING_TEMPLATE_REGEX = re.compile(r"\$\{(.+?)\}", re.IGNORECASE) # $ style
- STYLE_STRING_FORMAT_REGEX = re.compile(r"\{(.+?)\}", re.IGNORECASE) # { style
- STYLE_PERCENT_REGEX = re.compile(r"%\((.+?)\)", re.IGNORECASE) # % style
- ## Type Aliases
- ## -----------------------------------------------------------------------------
- LogData: TypeAlias = dict[str, Any]
- """Type alias
- *Changed in 4.0*: renamed from `LogRecord` to `LogData`
- """
- ### FUNCTIONS
- ### ============================================================================
- def merge_record_extra(
- record: logging.LogRecord,
- target: dict[Any, Any],
- reserved: Container[str],
- rename_fields: dict[str, str] | None = None,
- ) -> dict[Any, Any]:
- """
- Merges extra attributes from LogRecord object into target dictionary
- Args:
- record: logging.LogRecord
- target: dict to update
- reserved: dict or list with reserved keys to skip
- rename_fields: an optional dict, used to rename field names in the output.
- e.g. Rename `levelname` to `log.level`: `{'levelname': 'log.level'}`
- *Changed in 3.1*: `reserved` is now `Container[str]`.
- """
- if rename_fields is None:
- rename_fields = {}
- for key, value in record.__dict__.items():
- # this allows to have numeric keys
- if key not in reserved and not (hasattr(key, "startswith") and key.startswith("_")):
- target[rename_fields.get(key, key)] = value
- return target
- ### CLASSES
- ### ============================================================================
- class BaseJsonFormatter(logging.Formatter):
- """Base class for all formatters
- Must not be used directly.
- *New in 3.1*
- *Changed in 3.2*: `defaults` argument is no longer ignored.
- *Added in 3.3*: `exc_info_as_array` and `stack_info_as_array` options are added.
- """
- _style: logging.PercentStyle | str # type: ignore[assignment]
- ## Parent Methods
- ## -------------------------------------------------------------------------
- # pylint: disable=too-many-arguments,super-init-not-called
- def __init__(
- self,
- fmt: str | Sequence[str] | None = None,
- datefmt: str | None = None,
- style: str = "%",
- validate: bool = True,
- *,
- prefix: str = "",
- rename_fields: dict[str, str] | None = None,
- rename_fields_keep_missing: bool = False,
- static_fields: dict[str, Any] | None = None,
- reserved_attrs: Sequence[str] | None = None,
- timestamp: bool | str = False,
- defaults: dict[str, Any] | None = None,
- exc_info_as_array: bool = False,
- stack_info_as_array: bool = False,
- ) -> None:
- """
- Args:
- fmt: String format or `Sequence` of field names of fields to log.
- datefmt: format to use when formatting `asctime` field
- style: how to extract log fields from `fmt`. Ignored if `fmt` is a `Sequence[str]`.
- validate: validate `fmt` against style, if implementing a custom `style` you
- must set this to `False`. Ignored if `fmt` is a `Sequence[str]`.
- defaults: a dictionary containing default fields that are added before all other fields and
- may be overridden. The supplied fields are still subject to `rename_fields`.
- prefix: an optional string prefix added at the beginning of
- the formatted string
- rename_fields: an optional dict, used to rename field names in the output.
- Rename `message` to `@message`: `{'message': '@message'}`
- rename_fields_keep_missing: When renaming fields, include missing fields in the output.
- static_fields: an optional dict, used to add fields with static values to all logs
- reserved_attrs: an optional list of fields that will be skipped when
- outputting json log record. Defaults to [all log record attributes][pythonjsonlogger.core.RESERVED_ATTRS].
- timestamp: an optional string/boolean field to add a timestamp when
- outputting the json log record. If string is passed, timestamp will be added
- to log record using string as key. If True boolean is passed, timestamp key
- will be "timestamp". Defaults to False/off.
- exc_info_as_array: break the exc_info into a list of lines based on line breaks.
- stack_info_as_array: break the stack_info into a list of lines based on line breaks.
- *Changed in 3.1*:
- - you can now use custom values for style by setting validate to `False`.
- The value is stored in `self._style` as a string. The `parse` method will need to be
- overridden in order to support the new style.
- - Renaming fields now preserves the order that fields were added in and avoids adding
- missing fields. The original behaviour, missing fields have a value of `None`, is still
- available by setting `rename_fields_keep_missing` to `True`.
- *Added in 4.0*:
- - `fmt` now supports comma seperated lists (`style=","`). Note that this style is specific
- to `python-json-logger` and thus care should be taken to not to pass this format to other
- logging Formatter implementations.
- - `fmt` now supports sequences of strings (e.g. lists and tuples) of field names.
- """
- ## logging.Formatter compatibility
- ## ---------------------------------------------------------------------
- # Note: validate added in python 3.8, defaults added in 3.10
- if fmt is None or isinstance(fmt, str):
- if style in logging._STYLES:
- _style = logging._STYLES[style][0](fmt) # type: ignore[operator]
- if validate:
- _style.validate()
- self._style = _style
- self._fmt = _style._fmt
- elif style == "," or not validate:
- self._style = style
- self._fmt = fmt
- # TODO: Validate comma format
- else:
- raise ValueError("Style must be one of: '%{$,'")
- self._required_fields = self.parse()
- # Note: we do this check second as string is still a Sequence[str]
- elif isinstance(fmt, Sequence):
- self._style = "__sequence__"
- self._fmt = str(fmt)
- self._required_fields = list(fmt)
- self.datefmt = datefmt
- ## JSON Logging specific
- ## ---------------------------------------------------------------------
- self.prefix = prefix
- # We recreate the dict in rename_fields and static_fields to support internal/external
- # references which require getting the item to do the conversion.
- # For more details see: https://github.com/nhairs/python-json-logger/pull/45
- self.rename_fields = (
- {key: rename_fields[key] for key in rename_fields} if rename_fields is not None else {}
- )
- self.static_fields = (
- {key: static_fields[key] for key in static_fields} if static_fields is not None else {}
- )
- self.rename_fields_keep_missing = rename_fields_keep_missing
- self.reserved_attrs = set(reserved_attrs if reserved_attrs is not None else RESERVED_ATTRS)
- self.timestamp = timestamp
- self._skip_fields = set(self._required_fields)
- self._skip_fields.update(self.reserved_attrs)
- self.defaults = defaults if defaults is not None else {}
- self.exc_info_as_array = exc_info_as_array
- self.stack_info_as_array = stack_info_as_array
- return
- def format(self, record: logging.LogRecord) -> str:
- """Formats a log record and serializes to json
- Args:
- record: the record to format
- """
- message_dict: dict[str, Any] = {}
- # TODO: logging.LogRecord.msg and logging.LogRecord.message in typeshed
- # are always type of str. We shouldn't need to override that.
- if isinstance(record.msg, dict):
- message_dict = record.msg
- record.message = ""
- else:
- record.message = record.getMessage()
- # only format time if needed
- if "asctime" in self._required_fields:
- record.asctime = self.formatTime(record, self.datefmt)
- # Display formatted exception, but allow overriding it in the
- # user-supplied dict.
- if record.exc_info and not message_dict.get("exc_info"):
- message_dict["exc_info"] = self.formatException(record.exc_info)
- if not message_dict.get("exc_info") and record.exc_text:
- message_dict["exc_info"] = record.exc_text
- # Display formatted record of stack frames
- # default format is a string returned from :func:`traceback.print_stack`
- if record.stack_info and not message_dict.get("stack_info"):
- message_dict["stack_info"] = self.formatStack(record.stack_info)
- log_data: LogData = {}
- self.add_fields(log_data, record, message_dict)
- log_data = self.process_log_record(log_data)
- return self.serialize_log_record(log_data)
- ## JSON Formatter Specific Methods
- ## -------------------------------------------------------------------------
- def parse(self) -> list[str]:
- """Parses format string looking for substitutions
- This method is responsible for returning a list of fields (as strings)
- to include in all log messages.
- You can support custom styles by overriding this method.
- Returns:
- list of fields to be extracted and serialized
- """
- if self._fmt is None:
- return []
- if isinstance(self._style, str):
- if self._style == "__sequence__":
- raise RuntimeError("Must not call parse when fmt is a sequence of strings")
- if self._style == ",":
- return [field.strip() for field in self._fmt.split(",") if field.strip()]
- raise ValueError(f"Style {self._style!r} is not supported")
- if isinstance(self._style, logging.StringTemplateStyle):
- formatter_style_pattern = STYLE_STRING_TEMPLATE_REGEX
- elif isinstance(self._style, logging.StrFormatStyle):
- formatter_style_pattern = STYLE_STRING_FORMAT_REGEX
- elif isinstance(self._style, logging.PercentStyle):
- # PercentStyle is parent class of StringTemplateStyle and StrFormatStyle
- # so it must be checked last.
- formatter_style_pattern = STYLE_PERCENT_REGEX
- else:
- raise ValueError(f"Style {self._style!r} is not supported")
- return formatter_style_pattern.findall(self._fmt)
- def serialize_log_record(self, log_data: LogData) -> str:
- """Returns the final representation of the data to be logged
- Args:
- log_data: the data
- *Changed in 4.0*: `log_record` renamed to `log_data`
- """
- return self.prefix + self.jsonify_log_record(log_data)
- def add_fields(
- self,
- log_data: dict[str, Any],
- record: logging.LogRecord,
- message_dict: dict[str, Any],
- ) -> None:
- """Extract fields from a LogRecord for logging
- This method can be overridden to implement custom logic for adding fields.
- Args:
- log_data: data that will be logged
- record: the record to extract data from
- message_dict: dictionary that was logged instead of a message. e.g
- `logger.info({"is_this_message_dict": True})`
- *Changed in 4.0*: `log_record` renamed to `log_data`
- """
- for field in self.defaults:
- log_data[self._get_rename(field)] = self.defaults[field]
- for field in self._required_fields:
- log_data[self._get_rename(field)] = record.__dict__.get(field)
- for data_dict in [self.static_fields, message_dict]:
- for key, value in data_dict.items():
- log_data[self._get_rename(key)] = value
- merge_record_extra(
- record,
- log_data,
- reserved=self._skip_fields,
- rename_fields=self.rename_fields,
- )
- if self.timestamp:
- key = self.timestamp if isinstance(self.timestamp, str) else "timestamp"
- log_data[self._get_rename(key)] = datetime.fromtimestamp(
- record.created, tz=timezone.utc
- )
- if self.rename_fields_keep_missing:
- for field in self.rename_fields.values():
- if field not in log_data:
- log_data[field] = None
- return
- def _get_rename(self, key: str) -> str:
- return self.rename_fields.get(key, key)
- # Child Methods
- # ..........................................................................
- def jsonify_log_record(self, log_data: LogData) -> str:
- """Convert the log data into a JSON string.
- Child classes MUST override this method.
- Args:
- log_data: the data to serialize
- *Changed in 4.0*: `log_record` renamed to `log_data`
- """
- raise NotImplementedError()
- def process_log_record(self, log_data: LogData) -> LogData:
- """Custom processing of the data to be logged.
- Child classes can override this method to alter the log record before it
- is serialized.
- Args:
- log_data: incoming data
- *Changed in 4.0*: `log_record` renamed to `log_data`
- """
- return log_data
- def formatException(self, ei) -> str | list[str]: # type: ignore[override]
- """Format and return the specified exception information.
- If exc_info_as_array is set to True, This method returns an array of strings.
- """
- exception_info_str = super().formatException(ei)
- return exception_info_str.splitlines() if self.exc_info_as_array else exception_info_str
- def formatStack(self, stack_info) -> str | list[str]: # type: ignore[override]
- """Format and return the specified stack information.
- If stack_info_as_array is set to True, This method returns an array of strings.
- """
- stack_info_str = super().formatStack(stack_info)
- return stack_info_str.splitlines() if self.stack_info_as_array else stack_info_str
|