_wsgi_common.py 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. from contextlib import contextmanager
  2. import json
  3. from copy import deepcopy
  4. import sentry_sdk
  5. from sentry_sdk._types import SENSITIVE_DATA_SUBSTITUTE
  6. from sentry_sdk.scope import should_send_default_pii
  7. from sentry_sdk.utils import AnnotatedValue, logger
  8. try:
  9. from django.http.request import RawPostDataException
  10. except ImportError:
  11. RawPostDataException = None
  12. from typing import TYPE_CHECKING
  13. if TYPE_CHECKING:
  14. from typing import Any
  15. from typing import Dict
  16. from typing import Iterator
  17. from typing import Mapping
  18. from typing import MutableMapping
  19. from typing import Optional
  20. from typing import Union
  21. from sentry_sdk._types import Event, HttpStatusCodeRange
  22. SENSITIVE_ENV_KEYS = (
  23. "REMOTE_ADDR",
  24. "HTTP_X_FORWARDED_FOR",
  25. "HTTP_SET_COOKIE",
  26. "HTTP_COOKIE",
  27. "HTTP_AUTHORIZATION",
  28. "HTTP_X_API_KEY",
  29. "HTTP_X_FORWARDED_FOR",
  30. "HTTP_X_REAL_IP",
  31. )
  32. SENSITIVE_HEADERS = tuple(
  33. x[len("HTTP_") :] for x in SENSITIVE_ENV_KEYS if x.startswith("HTTP_")
  34. )
  35. DEFAULT_HTTP_METHODS_TO_CAPTURE = (
  36. "CONNECT",
  37. "DELETE",
  38. "GET",
  39. # "HEAD", # do not capture HEAD requests by default
  40. # "OPTIONS", # do not capture OPTIONS requests by default
  41. "PATCH",
  42. "POST",
  43. "PUT",
  44. "TRACE",
  45. )
  46. # This noop context manager can be replaced with "from contextlib import nullcontext" when we drop Python 3.6 support
  47. @contextmanager
  48. def nullcontext() -> "Iterator[None]":
  49. yield
  50. def request_body_within_bounds(
  51. client: "Optional[sentry_sdk.client.BaseClient]", content_length: int
  52. ) -> bool:
  53. if client is None:
  54. return False
  55. bodies = client.options["max_request_body_size"]
  56. return not (
  57. bodies == "never"
  58. or (bodies == "small" and content_length > 10**3)
  59. or (bodies == "medium" and content_length > 10**4)
  60. )
  61. class RequestExtractor:
  62. """
  63. Base class for request extraction.
  64. """
  65. # It does not make sense to make this class an ABC because it is not used
  66. # for typing, only so that child classes can inherit common methods from
  67. # it. Only some child classes implement all methods that raise
  68. # NotImplementedError in this class.
  69. def __init__(self, request: "Any") -> None:
  70. self.request = request
  71. def extract_into_event(self, event: "Event") -> None:
  72. client = sentry_sdk.get_client()
  73. if not client.is_active():
  74. return
  75. data: "Optional[Union[AnnotatedValue, Dict[str, Any]]]" = None
  76. content_length = self.content_length()
  77. request_info = event.get("request", {})
  78. if should_send_default_pii():
  79. request_info["cookies"] = dict(self.cookies())
  80. if not request_body_within_bounds(client, content_length):
  81. data = AnnotatedValue.removed_because_over_size_limit()
  82. else:
  83. # First read the raw body data
  84. # It is important to read this first because if it is Django
  85. # it will cache the body and then we can read the cached version
  86. # again in parsed_body() (or json() or wherever).
  87. raw_data = None
  88. try:
  89. raw_data = self.raw_data()
  90. except (RawPostDataException, ValueError):
  91. # If DjangoRestFramework is used it already read the body for us
  92. # so reading it here will fail. We can ignore this.
  93. pass
  94. parsed_body = self.parsed_body()
  95. if parsed_body is not None:
  96. data = parsed_body
  97. elif raw_data:
  98. data = AnnotatedValue.removed_because_raw_data()
  99. else:
  100. data = None
  101. if data is not None:
  102. request_info["data"] = data
  103. event["request"] = deepcopy(request_info)
  104. def content_length(self) -> int:
  105. try:
  106. return int(self.env().get("CONTENT_LENGTH", 0))
  107. except ValueError:
  108. return 0
  109. def cookies(self) -> "MutableMapping[str, Any]":
  110. raise NotImplementedError()
  111. def raw_data(self) -> "Optional[Union[str, bytes]]":
  112. raise NotImplementedError()
  113. def form(self) -> "Optional[Dict[str, Any]]":
  114. raise NotImplementedError()
  115. def parsed_body(self) -> "Optional[Dict[str, Any]]":
  116. try:
  117. form = self.form()
  118. except Exception:
  119. form = None
  120. try:
  121. files = self.files()
  122. except Exception:
  123. files = None
  124. if form or files:
  125. data = {}
  126. if form:
  127. data = dict(form.items())
  128. if files:
  129. for key in files.keys():
  130. data[key] = AnnotatedValue.removed_because_raw_data()
  131. return data
  132. return self.json()
  133. def is_json(self) -> bool:
  134. return _is_json_content_type(self.env().get("CONTENT_TYPE"))
  135. def json(self) -> "Optional[Any]":
  136. try:
  137. if not self.is_json():
  138. return None
  139. try:
  140. raw_data = self.raw_data()
  141. except (RawPostDataException, ValueError):
  142. # The body might have already been read, in which case this will
  143. # fail
  144. raw_data = None
  145. if raw_data is None:
  146. return None
  147. if isinstance(raw_data, str):
  148. return json.loads(raw_data)
  149. else:
  150. return json.loads(raw_data.decode("utf-8"))
  151. except ValueError:
  152. pass
  153. return None
  154. def files(self) -> "Optional[Dict[str, Any]]":
  155. raise NotImplementedError()
  156. def size_of_file(self, file: "Any") -> int:
  157. raise NotImplementedError()
  158. def env(self) -> "Dict[str, Any]":
  159. raise NotImplementedError()
  160. def _is_json_content_type(ct: "Optional[str]") -> bool:
  161. mt = (ct or "").split(";", 1)[0]
  162. return (
  163. mt == "application/json"
  164. or (mt.startswith("application/"))
  165. and mt.endswith("+json")
  166. )
  167. def _filter_headers(
  168. headers: "Mapping[str, str]",
  169. use_annotated_value: bool = True,
  170. ) -> "Mapping[str, Union[AnnotatedValue, str]]":
  171. if should_send_default_pii():
  172. return headers
  173. substitute: "Union[AnnotatedValue, str]"
  174. if use_annotated_value:
  175. substitute = AnnotatedValue.removed_because_over_size_limit()
  176. else:
  177. substitute = SENSITIVE_DATA_SUBSTITUTE
  178. return {
  179. k: (v if k.upper().replace("-", "_") not in SENSITIVE_HEADERS else substitute)
  180. for k, v in headers.items()
  181. }
  182. def _in_http_status_code_range(
  183. code: object, code_ranges: "list[HttpStatusCodeRange]"
  184. ) -> bool:
  185. for target in code_ranges:
  186. if isinstance(target, int):
  187. if code == target:
  188. return True
  189. continue
  190. try:
  191. if code in target:
  192. return True
  193. except TypeError:
  194. logger.warning(
  195. "failed_request_status_codes has to be a list of integers or containers"
  196. )
  197. return False
  198. class HttpCodeRangeContainer:
  199. """
  200. Wrapper to make it possible to use list[HttpStatusCodeRange] as a Container[int].
  201. Used for backwards compatibility with the old `failed_request_status_codes` option.
  202. """
  203. def __init__(self, code_ranges: "list[HttpStatusCodeRange]") -> None:
  204. self._code_ranges = code_ranges
  205. def __contains__(self, item: object) -> bool:
  206. return _in_http_status_code_range(item, self._code_ranges)