wsgi.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328
  1. import sys
  2. from functools import partial
  3. from typing import TYPE_CHECKING
  4. import sentry_sdk
  5. from sentry_sdk._werkzeug import _get_headers, get_host
  6. from sentry_sdk.api import continue_trace
  7. from sentry_sdk.consts import OP
  8. from sentry_sdk.integrations._wsgi_common import (
  9. DEFAULT_HTTP_METHODS_TO_CAPTURE,
  10. _filter_headers,
  11. nullcontext,
  12. )
  13. from sentry_sdk.scope import should_send_default_pii, use_isolation_scope
  14. from sentry_sdk.sessions import track_session
  15. from sentry_sdk.tracing import Transaction, TransactionSource
  16. from sentry_sdk.utils import (
  17. ContextVar,
  18. capture_internal_exceptions,
  19. event_from_exception,
  20. reraise,
  21. )
  22. if TYPE_CHECKING:
  23. from typing import Any, Callable, Dict, Iterator, Optional, Protocol, Tuple, TypeVar
  24. from sentry_sdk._types import Event, EventProcessor
  25. from sentry_sdk.utils import ExcInfo
  26. WsgiResponseIter = TypeVar("WsgiResponseIter")
  27. WsgiResponseHeaders = TypeVar("WsgiResponseHeaders")
  28. WsgiExcInfo = TypeVar("WsgiExcInfo")
  29. class StartResponse(Protocol):
  30. def __call__(
  31. self,
  32. status: str,
  33. response_headers: "WsgiResponseHeaders",
  34. exc_info: "Optional[WsgiExcInfo]" = None,
  35. ) -> "WsgiResponseIter": # type: ignore
  36. pass
  37. _wsgi_middleware_applied = ContextVar("sentry_wsgi_middleware_applied")
  38. def wsgi_decoding_dance(s: str, charset: str = "utf-8", errors: str = "replace") -> str:
  39. return s.encode("latin1").decode(charset, errors)
  40. def get_request_url(
  41. environ: "Dict[str, str]", use_x_forwarded_for: bool = False
  42. ) -> str:
  43. """Return the absolute URL without query string for the given WSGI
  44. environment."""
  45. script_name = environ.get("SCRIPT_NAME", "").rstrip("/")
  46. path_info = environ.get("PATH_INFO", "").lstrip("/")
  47. path = f"{script_name}/{path_info}"
  48. scheme = environ.get("wsgi.url_scheme")
  49. if use_x_forwarded_for:
  50. scheme = environ.get("HTTP_X_FORWARDED_PROTO", scheme)
  51. return "%s://%s/%s" % (
  52. scheme,
  53. get_host(environ, use_x_forwarded_for),
  54. wsgi_decoding_dance(path).lstrip("/"),
  55. )
  56. class SentryWsgiMiddleware:
  57. __slots__ = (
  58. "app",
  59. "use_x_forwarded_for",
  60. "span_origin",
  61. "http_methods_to_capture",
  62. )
  63. def __init__(
  64. self,
  65. app: "Callable[[Dict[str, str], Callable[..., Any]], Any]",
  66. use_x_forwarded_for: bool = False,
  67. span_origin: str = "manual",
  68. http_methods_to_capture: "Tuple[str, ...]" = DEFAULT_HTTP_METHODS_TO_CAPTURE,
  69. ) -> None:
  70. self.app = app
  71. self.use_x_forwarded_for = use_x_forwarded_for
  72. self.span_origin = span_origin
  73. self.http_methods_to_capture = http_methods_to_capture
  74. def __call__(
  75. self, environ: "Dict[str, str]", start_response: "Callable[..., Any]"
  76. ) -> "Any":
  77. if _wsgi_middleware_applied.get(False):
  78. return self.app(environ, start_response)
  79. _wsgi_middleware_applied.set(True)
  80. try:
  81. with sentry_sdk.isolation_scope() as scope:
  82. with track_session(scope, session_mode="request"):
  83. with capture_internal_exceptions():
  84. scope.clear_breadcrumbs()
  85. scope._name = "wsgi"
  86. scope.add_event_processor(
  87. _make_wsgi_event_processor(
  88. environ, self.use_x_forwarded_for
  89. )
  90. )
  91. method = environ.get("REQUEST_METHOD", "").upper()
  92. transaction = None
  93. if method in self.http_methods_to_capture:
  94. transaction = continue_trace(
  95. environ,
  96. op=OP.HTTP_SERVER,
  97. name="generic WSGI request",
  98. source=TransactionSource.ROUTE,
  99. origin=self.span_origin,
  100. )
  101. transaction_context = (
  102. sentry_sdk.start_transaction(
  103. transaction,
  104. custom_sampling_context={"wsgi_environ": environ},
  105. )
  106. if transaction is not None
  107. else nullcontext()
  108. )
  109. with transaction_context:
  110. try:
  111. response = self.app(
  112. environ,
  113. partial(
  114. _sentry_start_response, start_response, transaction
  115. ),
  116. )
  117. except BaseException:
  118. reraise(*_capture_exception())
  119. finally:
  120. _wsgi_middleware_applied.set(False)
  121. # Within the uWSGI subhandler, the use of the "offload" mechanism for file responses
  122. # is determined by a pointer equality check on the response object
  123. # (see https://github.com/unbit/uwsgi/blob/8d116f7ea2b098c11ce54d0b3a561c54dcd11929/plugins/python/wsgi_subhandler.c#L278).
  124. #
  125. # If we were to return a _ScopedResponse, this would cause the check to always fail
  126. # since it's checking the files are exactly the same.
  127. #
  128. # To avoid this and ensure that the offloading mechanism works as expected when it's
  129. # enabled, we check if the response is a file-like object (determined by the presence
  130. # of `fileno`), if the wsgi.file_wrapper is available in the environment (as if so,
  131. # it would've been used in handling the file in the response).
  132. #
  133. # Even if the offload mechanism is not enabled, there are optimizations that uWSGI does for file-like objects,
  134. # so we want to make sure we don't interfere with those either.
  135. #
  136. # If all conditions are met, we return the original response object directly,
  137. # allowing uWSGI to handle it as intended.
  138. if (
  139. environ.get("wsgi.file_wrapper")
  140. and getattr(response, "fileno", None) is not None
  141. ):
  142. return response
  143. return _ScopedResponse(scope, response)
  144. def _sentry_start_response(
  145. old_start_response: "StartResponse",
  146. transaction: "Optional[Transaction]",
  147. status: str,
  148. response_headers: "WsgiResponseHeaders",
  149. exc_info: "Optional[WsgiExcInfo]" = None,
  150. ) -> "WsgiResponseIter": # type: ignore[type-var]
  151. with capture_internal_exceptions():
  152. status_int = int(status.split(" ", 1)[0])
  153. if transaction is not None:
  154. transaction.set_http_status(status_int)
  155. if exc_info is None:
  156. # The Django Rest Framework WSGI test client, and likely other
  157. # (incorrect) implementations, cannot deal with the exc_info argument
  158. # if one is present. Avoid providing a third argument if not necessary.
  159. return old_start_response(status, response_headers)
  160. else:
  161. return old_start_response(status, response_headers, exc_info)
  162. def _get_environ(environ: "Dict[str, str]") -> "Iterator[Tuple[str, str]]":
  163. """
  164. Returns our explicitly included environment variables we want to
  165. capture (server name, port and remote addr if pii is enabled).
  166. """
  167. keys = ["SERVER_NAME", "SERVER_PORT"]
  168. if should_send_default_pii():
  169. # make debugging of proxy setup easier. Proxy headers are
  170. # in headers.
  171. keys += ["REMOTE_ADDR"]
  172. for key in keys:
  173. if key in environ:
  174. yield key, environ[key]
  175. def get_client_ip(environ: "Dict[str, str]") -> "Optional[Any]":
  176. """
  177. Infer the user IP address from various headers. This cannot be used in
  178. security sensitive situations since the value may be forged from a client,
  179. but it's good enough for the event payload.
  180. """
  181. try:
  182. return environ["HTTP_X_FORWARDED_FOR"].split(",")[0].strip()
  183. except (KeyError, IndexError):
  184. pass
  185. try:
  186. return environ["HTTP_X_REAL_IP"]
  187. except KeyError:
  188. pass
  189. return environ.get("REMOTE_ADDR")
  190. def _capture_exception() -> "ExcInfo":
  191. """
  192. Captures the current exception and sends it to Sentry.
  193. Returns the ExcInfo tuple to it can be reraised afterwards.
  194. """
  195. exc_info = sys.exc_info()
  196. e = exc_info[1]
  197. # SystemExit(0) is the only uncaught exception that is expected behavior
  198. should_skip_capture = isinstance(e, SystemExit) and e.code in (0, None)
  199. if not should_skip_capture:
  200. event, hint = event_from_exception(
  201. exc_info,
  202. client_options=sentry_sdk.get_client().options,
  203. mechanism={"type": "wsgi", "handled": False},
  204. )
  205. sentry_sdk.capture_event(event, hint=hint)
  206. return exc_info
  207. class _ScopedResponse:
  208. """
  209. Users a separate scope for each response chunk.
  210. This will make WSGI apps more tolerant against:
  211. - WSGI servers streaming responses from a different thread/from
  212. different threads than the one that called start_response
  213. - close() not being called
  214. - WSGI servers streaming responses interleaved from the same thread
  215. """
  216. __slots__ = ("_response", "_scope")
  217. def __init__(
  218. self, scope: "sentry_sdk.scope.Scope", response: "Iterator[bytes]"
  219. ) -> None:
  220. self._scope = scope
  221. self._response = response
  222. def __iter__(self) -> "Iterator[bytes]":
  223. iterator = iter(self._response)
  224. while True:
  225. with use_isolation_scope(self._scope):
  226. try:
  227. chunk = next(iterator)
  228. except StopIteration:
  229. break
  230. except BaseException:
  231. reraise(*_capture_exception())
  232. yield chunk
  233. def close(self) -> None:
  234. with use_isolation_scope(self._scope):
  235. try:
  236. self._response.close() # type: ignore
  237. except AttributeError:
  238. pass
  239. except BaseException:
  240. reraise(*_capture_exception())
  241. def _make_wsgi_event_processor(
  242. environ: "Dict[str, str]", use_x_forwarded_for: bool
  243. ) -> "EventProcessor":
  244. # It's a bit unfortunate that we have to extract and parse the request data
  245. # from the environ so eagerly, but there are a few good reasons for this.
  246. #
  247. # We might be in a situation where the scope never gets torn down
  248. # properly. In that case we will have an unnecessary strong reference to
  249. # all objects in the environ (some of which may take a lot of memory) when
  250. # we're really just interested in a few of them.
  251. #
  252. # Keeping the environment around for longer than the request lifecycle is
  253. # also not necessarily something uWSGI can deal with:
  254. # https://github.com/unbit/uwsgi/issues/1950
  255. client_ip = get_client_ip(environ)
  256. request_url = get_request_url(environ, use_x_forwarded_for)
  257. query_string = environ.get("QUERY_STRING")
  258. method = environ.get("REQUEST_METHOD")
  259. env = dict(_get_environ(environ))
  260. headers = _filter_headers(dict(_get_headers(environ)))
  261. def event_processor(event: "Event", hint: "Dict[str, Any]") -> "Event":
  262. with capture_internal_exceptions():
  263. # if the code below fails halfway through we at least have some data
  264. request_info = event.setdefault("request", {})
  265. if should_send_default_pii():
  266. user_info = event.setdefault("user", {})
  267. if client_ip:
  268. user_info.setdefault("ip_address", client_ip)
  269. request_info["url"] = request_url
  270. request_info["query_string"] = query_string
  271. request_info["method"] = method
  272. request_info["env"] = env
  273. request_info["headers"] = headers
  274. return event
  275. return event_processor