serializer.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390
  1. import sys
  2. import math
  3. from collections.abc import Mapping, Sequence, Set
  4. from datetime import datetime
  5. from sentry_sdk.utils import (
  6. AnnotatedValue,
  7. capture_internal_exception,
  8. disable_capture_event,
  9. format_timestamp,
  10. safe_repr,
  11. strip_string,
  12. )
  13. from typing import TYPE_CHECKING
  14. if TYPE_CHECKING:
  15. from types import TracebackType
  16. from typing import Any
  17. from typing import Callable
  18. from typing import ContextManager
  19. from typing import Dict
  20. from typing import List
  21. from typing import Optional
  22. from typing import Type
  23. from typing import Union
  24. from sentry_sdk._types import NotImplementedType
  25. Span = Dict[str, Any]
  26. ReprProcessor = Callable[[Any, Dict[str, Any]], Union[NotImplementedType, str]]
  27. Segment = Union[str, int]
  28. # Bytes are technically not strings in Python 3, but we can serialize them
  29. serializable_str_types = (str, bytes, bytearray, memoryview)
  30. # Maximum length of JSON-serialized event payloads that can be safely sent
  31. # before the server may reject the event due to its size. This is not intended
  32. # to reflect actual values defined server-side, but rather only be an upper
  33. # bound for events sent by the SDK.
  34. #
  35. # Can be overwritten if wanting to send more bytes, e.g. with a custom server.
  36. # When changing this, keep in mind that events may be a little bit larger than
  37. # this value due to attached metadata, so keep the number conservative.
  38. MAX_EVENT_BYTES = 10**6
  39. # Maximum depth and breadth of databags. Excess data will be trimmed. If
  40. # max_request_body_size is "always", request bodies won't be trimmed.
  41. MAX_DATABAG_DEPTH = 5
  42. MAX_DATABAG_BREADTH = 10
  43. CYCLE_MARKER = "<cyclic>"
  44. global_repr_processors: "List[ReprProcessor]" = []
  45. def add_global_repr_processor(processor: "ReprProcessor") -> None:
  46. global_repr_processors.append(processor)
  47. sequence_types: "List[type]" = [Sequence, Set]
  48. def add_repr_sequence_type(ty: type) -> None:
  49. sequence_types.append(ty)
  50. class Memo:
  51. __slots__ = ("_ids", "_objs")
  52. def __init__(self) -> None:
  53. self._ids: "Dict[int, Any]" = {}
  54. self._objs: "List[Any]" = []
  55. def memoize(self, obj: "Any") -> "ContextManager[bool]":
  56. self._objs.append(obj)
  57. return self
  58. def __enter__(self) -> bool:
  59. obj = self._objs[-1]
  60. if id(obj) in self._ids:
  61. return True
  62. else:
  63. self._ids[id(obj)] = obj
  64. return False
  65. def __exit__(
  66. self,
  67. ty: "Optional[Type[BaseException]]",
  68. value: "Optional[BaseException]",
  69. tb: "Optional[TracebackType]",
  70. ) -> None:
  71. self._ids.pop(id(self._objs.pop()), None)
  72. def serialize(event: "Dict[str, Any]", **kwargs: "Any") -> "Dict[str, Any]":
  73. """
  74. A very smart serializer that takes a dict and emits a json-friendly dict.
  75. Currently used for serializing the final Event and also prematurely while fetching the stack
  76. local variables for each frame in a stacktrace.
  77. It works internally with 'databags' which are arbitrary data structures like Mapping, Sequence and Set.
  78. The algorithm itself is a recursive graph walk down the data structures it encounters.
  79. It has the following responsibilities:
  80. * Trimming databags and keeping them within MAX_DATABAG_BREADTH and MAX_DATABAG_DEPTH.
  81. * Calling safe_repr() on objects appropriately to keep them informative and readable in the final payload.
  82. * Annotating the payload with the _meta field whenever trimming happens.
  83. :param max_request_body_size: If set to "always", will never trim request bodies.
  84. :param max_value_length: The max length to strip strings to, defaults to sentry_sdk.consts.DEFAULT_MAX_VALUE_LENGTH
  85. :param is_vars: If we're serializing vars early, we want to repr() things that are JSON-serializable to make their type more apparent. For example, it's useful to see the difference between a unicode-string and a bytestring when viewing a stacktrace.
  86. :param custom_repr: A custom repr function that runs before safe_repr on the object to be serialized. If it returns None or throws internally, we will fallback to safe_repr.
  87. """
  88. memo = Memo()
  89. path: "List[Segment]" = []
  90. meta_stack: "List[Dict[str, Any]]" = []
  91. keep_request_bodies: bool = kwargs.pop("max_request_body_size", None) == "always"
  92. max_value_length: "Optional[int]" = kwargs.pop("max_value_length", None)
  93. is_vars = kwargs.pop("is_vars", False)
  94. custom_repr: "Callable[..., Optional[str]]" = kwargs.pop("custom_repr", None)
  95. def _safe_repr_wrapper(value: "Any") -> str:
  96. try:
  97. repr_value = None
  98. if custom_repr is not None:
  99. repr_value = custom_repr(value)
  100. return repr_value or safe_repr(value)
  101. except Exception:
  102. return safe_repr(value)
  103. def _annotate(**meta: "Any") -> None:
  104. while len(meta_stack) <= len(path):
  105. try:
  106. segment = path[len(meta_stack) - 1]
  107. node = meta_stack[-1].setdefault(str(segment), {})
  108. except IndexError:
  109. node = {}
  110. meta_stack.append(node)
  111. meta_stack[-1].setdefault("", {}).update(meta)
  112. def _is_databag() -> "Optional[bool]":
  113. """
  114. A databag is any value that we need to trim.
  115. True for stuff like vars, request bodies, breadcrumbs and extra.
  116. :returns: `True` for "yes", `False` for :"no", `None` for "maybe soon".
  117. """
  118. try:
  119. if is_vars:
  120. return True
  121. is_request_body = _is_request_body()
  122. if is_request_body in (True, None):
  123. return is_request_body
  124. p0 = path[0]
  125. if p0 == "breadcrumbs" and path[1] == "values":
  126. path[2]
  127. return True
  128. if p0 == "extra":
  129. return True
  130. except IndexError:
  131. return None
  132. return False
  133. def _is_span_attribute() -> "Optional[bool]":
  134. try:
  135. if path[0] == "spans" and path[2] == "data":
  136. return True
  137. except IndexError:
  138. return None
  139. return False
  140. def _is_request_body() -> "Optional[bool]":
  141. try:
  142. if path[0] == "request" and path[1] == "data":
  143. return True
  144. except IndexError:
  145. return None
  146. return False
  147. def _serialize_node(
  148. obj: "Any",
  149. is_databag: "Optional[bool]" = None,
  150. is_request_body: "Optional[bool]" = None,
  151. should_repr_strings: "Optional[bool]" = None,
  152. segment: "Optional[Segment]" = None,
  153. remaining_breadth: "Optional[Union[int, float]]" = None,
  154. remaining_depth: "Optional[Union[int, float]]" = None,
  155. ) -> "Any":
  156. if segment is not None:
  157. path.append(segment)
  158. try:
  159. with memo.memoize(obj) as result:
  160. if result:
  161. return CYCLE_MARKER
  162. return _serialize_node_impl(
  163. obj,
  164. is_databag=is_databag,
  165. is_request_body=is_request_body,
  166. should_repr_strings=should_repr_strings,
  167. remaining_depth=remaining_depth,
  168. remaining_breadth=remaining_breadth,
  169. )
  170. except BaseException:
  171. capture_internal_exception(sys.exc_info())
  172. if is_databag:
  173. return "<failed to serialize, use init(debug=True) to see error logs>"
  174. return None
  175. finally:
  176. if segment is not None:
  177. path.pop()
  178. del meta_stack[len(path) + 1 :]
  179. def _flatten_annotated(obj: "Any") -> "Any":
  180. if isinstance(obj, AnnotatedValue):
  181. _annotate(**obj.metadata)
  182. obj = obj.value
  183. return obj
  184. def _serialize_node_impl(
  185. obj: "Any",
  186. is_databag: "Optional[bool]",
  187. is_request_body: "Optional[bool]",
  188. should_repr_strings: "Optional[bool]",
  189. remaining_depth: "Optional[Union[float, int]]",
  190. remaining_breadth: "Optional[Union[float, int]]",
  191. ) -> "Any":
  192. if isinstance(obj, AnnotatedValue):
  193. should_repr_strings = False
  194. if should_repr_strings is None:
  195. should_repr_strings = is_vars
  196. if is_databag is None:
  197. is_databag = _is_databag()
  198. if is_request_body is None:
  199. is_request_body = _is_request_body()
  200. if is_databag:
  201. if is_request_body and keep_request_bodies:
  202. remaining_depth = float("inf")
  203. remaining_breadth = float("inf")
  204. else:
  205. if remaining_depth is None:
  206. remaining_depth = MAX_DATABAG_DEPTH
  207. if remaining_breadth is None:
  208. remaining_breadth = MAX_DATABAG_BREADTH
  209. obj = _flatten_annotated(obj)
  210. if remaining_depth is not None and remaining_depth <= 0:
  211. _annotate(rem=[["!limit", "x"]])
  212. if is_databag:
  213. return _flatten_annotated(
  214. strip_string(_safe_repr_wrapper(obj), max_length=max_value_length)
  215. )
  216. return None
  217. is_span_attribute = _is_span_attribute()
  218. if (is_databag or is_span_attribute) and global_repr_processors:
  219. hints = {"memo": memo, "remaining_depth": remaining_depth}
  220. for processor in global_repr_processors:
  221. result = processor(obj, hints)
  222. if result is not NotImplemented:
  223. return _flatten_annotated(result)
  224. sentry_repr = getattr(type(obj), "__sentry_repr__", None)
  225. if obj is None or isinstance(obj, (bool, int, float)):
  226. if should_repr_strings or (
  227. isinstance(obj, float) and (math.isinf(obj) or math.isnan(obj))
  228. ):
  229. return _safe_repr_wrapper(obj)
  230. else:
  231. return obj
  232. elif callable(sentry_repr):
  233. return sentry_repr(obj)
  234. elif isinstance(obj, datetime):
  235. return (
  236. str(format_timestamp(obj))
  237. if not should_repr_strings
  238. else _safe_repr_wrapper(obj)
  239. )
  240. elif isinstance(obj, Mapping):
  241. # Create temporary copy here to avoid calling too much code that
  242. # might mutate our dictionary while we're still iterating over it.
  243. obj = dict(obj.items())
  244. rv_dict: "Dict[str, Any]" = {}
  245. i = 0
  246. for k, v in obj.items():
  247. if remaining_breadth is not None and i >= remaining_breadth:
  248. _annotate(len=len(obj))
  249. break
  250. str_k = str(k)
  251. v = _serialize_node(
  252. v,
  253. segment=str_k,
  254. should_repr_strings=should_repr_strings,
  255. is_databag=is_databag,
  256. is_request_body=is_request_body,
  257. remaining_depth=(
  258. remaining_depth - 1 if remaining_depth is not None else None
  259. ),
  260. remaining_breadth=remaining_breadth,
  261. )
  262. rv_dict[str_k] = v
  263. i += 1
  264. return rv_dict
  265. elif not isinstance(obj, serializable_str_types) and isinstance(
  266. obj, tuple(sequence_types)
  267. ):
  268. rv_list = []
  269. for i, v in enumerate(obj):
  270. if remaining_breadth is not None and i >= remaining_breadth:
  271. _annotate(len=len(obj))
  272. break
  273. rv_list.append(
  274. _serialize_node(
  275. v,
  276. segment=i,
  277. should_repr_strings=should_repr_strings,
  278. is_databag=is_databag,
  279. is_request_body=is_request_body,
  280. remaining_depth=(
  281. remaining_depth - 1 if remaining_depth is not None else None
  282. ),
  283. remaining_breadth=remaining_breadth,
  284. )
  285. )
  286. return rv_list
  287. if should_repr_strings:
  288. obj = _safe_repr_wrapper(obj)
  289. else:
  290. if isinstance(obj, bytes) or isinstance(obj, bytearray):
  291. obj = obj.decode("utf-8", "replace")
  292. if not isinstance(obj, str):
  293. obj = _safe_repr_wrapper(obj)
  294. is_span_description = (
  295. len(path) == 3 and path[0] == "spans" and path[-1] == "description"
  296. )
  297. if is_span_description:
  298. return obj
  299. return _flatten_annotated(strip_string(obj, max_length=max_value_length))
  300. #
  301. # Start of serialize() function
  302. #
  303. disable_capture_event.set(True)
  304. try:
  305. serialized_event = _serialize_node(event, **kwargs)
  306. if not is_vars and meta_stack and isinstance(serialized_event, dict):
  307. serialized_event["_meta"] = meta_stack[0]
  308. return serialized_event
  309. finally:
  310. disable_capture_event.set(False)