aws_lambda.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489
  1. import functools
  2. import json
  3. import re
  4. import sys
  5. from copy import deepcopy
  6. from datetime import datetime, timedelta, timezone
  7. from os import environ
  8. import sentry_sdk
  9. from sentry_sdk.api import continue_trace
  10. from sentry_sdk.consts import OP
  11. from sentry_sdk.scope import should_send_default_pii
  12. from sentry_sdk.tracing import TransactionSource
  13. from sentry_sdk.utils import (
  14. AnnotatedValue,
  15. capture_internal_exceptions,
  16. ensure_integration_enabled,
  17. event_from_exception,
  18. logger,
  19. TimeoutThread,
  20. reraise,
  21. )
  22. from sentry_sdk.integrations import Integration
  23. from sentry_sdk.integrations._wsgi_common import _filter_headers
  24. from typing import TYPE_CHECKING
  25. if TYPE_CHECKING:
  26. from typing import Any
  27. from typing import TypeVar
  28. from typing import Callable
  29. from typing import Optional
  30. from sentry_sdk._types import EventProcessor, Event, Hint
  31. F = TypeVar("F", bound=Callable[..., Any])
  32. # Constants
  33. TIMEOUT_WARNING_BUFFER = 1500 # Buffer time required to send timeout warning to Sentry
  34. MILLIS_TO_SECONDS = 1000.0
  35. def _wrap_init_error(init_error: "F") -> "F":
  36. @ensure_integration_enabled(AwsLambdaIntegration, init_error)
  37. def sentry_init_error(*args: "Any", **kwargs: "Any") -> "Any":
  38. client = sentry_sdk.get_client()
  39. with capture_internal_exceptions():
  40. sentry_sdk.get_isolation_scope().clear_breadcrumbs()
  41. exc_info = sys.exc_info()
  42. if exc_info and all(exc_info):
  43. sentry_event, hint = event_from_exception(
  44. exc_info,
  45. client_options=client.options,
  46. mechanism={"type": "aws_lambda", "handled": False},
  47. )
  48. sentry_sdk.capture_event(sentry_event, hint=hint)
  49. else:
  50. # Fall back to AWS lambdas JSON representation of the error
  51. error_info = args[1]
  52. if isinstance(error_info, str):
  53. error_info = json.loads(error_info)
  54. sentry_event = _event_from_error_json(error_info)
  55. sentry_sdk.capture_event(sentry_event)
  56. return init_error(*args, **kwargs)
  57. return sentry_init_error # type: ignore
  58. def _wrap_handler(handler: "F") -> "F":
  59. @functools.wraps(handler)
  60. def sentry_handler(
  61. aws_event: "Any", aws_context: "Any", *args: "Any", **kwargs: "Any"
  62. ) -> "Any":
  63. # Per https://docs.aws.amazon.com/lambda/latest/dg/python-handler.html,
  64. # `event` here is *likely* a dictionary, but also might be a number of
  65. # other types (str, int, float, None).
  66. #
  67. # In some cases, it is a list (if the user is batch-invoking their
  68. # function, for example), in which case we'll use the first entry as a
  69. # representative from which to try pulling request data. (Presumably it
  70. # will be the same for all events in the list, since they're all hitting
  71. # the lambda in the same request.)
  72. client = sentry_sdk.get_client()
  73. integration = client.get_integration(AwsLambdaIntegration)
  74. if integration is None:
  75. return handler(aws_event, aws_context, *args, **kwargs)
  76. if isinstance(aws_event, list) and len(aws_event) >= 1:
  77. request_data = aws_event[0]
  78. batch_size = len(aws_event)
  79. else:
  80. request_data = aws_event
  81. batch_size = 1
  82. if not isinstance(request_data, dict):
  83. # If we're not dealing with a dictionary, we won't be able to get
  84. # headers, path, http method, etc in any case, so it's fine that
  85. # this is empty
  86. request_data = {}
  87. configured_time = aws_context.get_remaining_time_in_millis()
  88. with sentry_sdk.isolation_scope() as scope:
  89. timeout_thread = None
  90. with capture_internal_exceptions():
  91. scope.clear_breadcrumbs()
  92. scope.add_event_processor(
  93. _make_request_event_processor(
  94. request_data, aws_context, configured_time
  95. )
  96. )
  97. scope.set_tag(
  98. "aws_region", aws_context.invoked_function_arn.split(":")[3]
  99. )
  100. if batch_size > 1:
  101. scope.set_tag("batch_request", True)
  102. scope.set_tag("batch_size", batch_size)
  103. # Starting the Timeout thread only if the configured time is greater than Timeout warning
  104. # buffer and timeout_warning parameter is set True.
  105. if (
  106. integration.timeout_warning
  107. and configured_time > TIMEOUT_WARNING_BUFFER
  108. ):
  109. waiting_time = (
  110. configured_time - TIMEOUT_WARNING_BUFFER
  111. ) / MILLIS_TO_SECONDS
  112. timeout_thread = TimeoutThread(
  113. waiting_time,
  114. configured_time / MILLIS_TO_SECONDS,
  115. isolation_scope=scope,
  116. current_scope=sentry_sdk.get_current_scope(),
  117. )
  118. # Starting the thread to raise timeout warning exception
  119. timeout_thread.start()
  120. headers = request_data.get("headers", {})
  121. # Some AWS Services (ie. EventBridge) set headers as a list
  122. # or None, so we must ensure it is a dict
  123. if not isinstance(headers, dict):
  124. headers = {}
  125. transaction = continue_trace(
  126. headers,
  127. op=OP.FUNCTION_AWS,
  128. name=aws_context.function_name,
  129. source=TransactionSource.COMPONENT,
  130. origin=AwsLambdaIntegration.origin,
  131. )
  132. with sentry_sdk.start_transaction(
  133. transaction,
  134. custom_sampling_context={
  135. "aws_event": aws_event,
  136. "aws_context": aws_context,
  137. },
  138. ):
  139. try:
  140. return handler(aws_event, aws_context, *args, **kwargs)
  141. except Exception:
  142. exc_info = sys.exc_info()
  143. sentry_event, hint = event_from_exception(
  144. exc_info,
  145. client_options=client.options,
  146. mechanism={"type": "aws_lambda", "handled": False},
  147. )
  148. sentry_sdk.capture_event(sentry_event, hint=hint)
  149. reraise(*exc_info)
  150. finally:
  151. if timeout_thread:
  152. timeout_thread.stop()
  153. return sentry_handler # type: ignore
  154. def _drain_queue() -> None:
  155. with capture_internal_exceptions():
  156. client = sentry_sdk.get_client()
  157. integration = client.get_integration(AwsLambdaIntegration)
  158. if integration is not None:
  159. # Flush out the event queue before AWS kills the
  160. # process.
  161. client.flush()
  162. class AwsLambdaIntegration(Integration):
  163. identifier = "aws_lambda"
  164. origin = f"auto.function.{identifier}"
  165. def __init__(self, timeout_warning: bool = False) -> None:
  166. self.timeout_warning = timeout_warning
  167. @staticmethod
  168. def setup_once() -> None:
  169. lambda_bootstrap = get_lambda_bootstrap()
  170. if not lambda_bootstrap:
  171. logger.warning(
  172. "Not running in AWS Lambda environment, "
  173. "AwsLambdaIntegration disabled (could not find bootstrap module)"
  174. )
  175. return
  176. if not hasattr(lambda_bootstrap, "handle_event_request"):
  177. logger.warning(
  178. "Not running in AWS Lambda environment, "
  179. "AwsLambdaIntegration disabled (could not find handle_event_request)"
  180. )
  181. return
  182. pre_37 = hasattr(lambda_bootstrap, "handle_http_request") # Python 3.6
  183. if pre_37:
  184. old_handle_event_request = lambda_bootstrap.handle_event_request
  185. def sentry_handle_event_request(
  186. request_handler: "Any", *args: "Any", **kwargs: "Any"
  187. ) -> "Any":
  188. request_handler = _wrap_handler(request_handler)
  189. return old_handle_event_request(request_handler, *args, **kwargs)
  190. lambda_bootstrap.handle_event_request = sentry_handle_event_request
  191. old_handle_http_request = lambda_bootstrap.handle_http_request
  192. def sentry_handle_http_request(
  193. request_handler: "Any", *args: "Any", **kwargs: "Any"
  194. ) -> "Any":
  195. request_handler = _wrap_handler(request_handler)
  196. return old_handle_http_request(request_handler, *args, **kwargs)
  197. lambda_bootstrap.handle_http_request = sentry_handle_http_request
  198. # Patch to_json to drain the queue. This should work even when the
  199. # SDK is initialized inside of the handler
  200. old_to_json = lambda_bootstrap.to_json
  201. def sentry_to_json(*args: "Any", **kwargs: "Any") -> "Any":
  202. _drain_queue()
  203. return old_to_json(*args, **kwargs)
  204. lambda_bootstrap.to_json = sentry_to_json
  205. else:
  206. lambda_bootstrap.LambdaRuntimeClient.post_init_error = _wrap_init_error(
  207. lambda_bootstrap.LambdaRuntimeClient.post_init_error
  208. )
  209. old_handle_event_request = lambda_bootstrap.handle_event_request
  210. def sentry_handle_event_request( # type: ignore
  211. lambda_runtime_client, request_handler, *args, **kwargs
  212. ):
  213. request_handler = _wrap_handler(request_handler)
  214. return old_handle_event_request(
  215. lambda_runtime_client, request_handler, *args, **kwargs
  216. )
  217. lambda_bootstrap.handle_event_request = sentry_handle_event_request
  218. # Patch the runtime client to drain the queue. This should work
  219. # even when the SDK is initialized inside of the handler
  220. def _wrap_post_function(f: "F") -> "F":
  221. def inner(*args: "Any", **kwargs: "Any") -> "Any":
  222. _drain_queue()
  223. return f(*args, **kwargs)
  224. return inner # type: ignore
  225. lambda_bootstrap.LambdaRuntimeClient.post_invocation_result = (
  226. _wrap_post_function(
  227. lambda_bootstrap.LambdaRuntimeClient.post_invocation_result
  228. )
  229. )
  230. lambda_bootstrap.LambdaRuntimeClient.post_invocation_error = (
  231. _wrap_post_function(
  232. lambda_bootstrap.LambdaRuntimeClient.post_invocation_error
  233. )
  234. )
  235. def get_lambda_bootstrap() -> "Optional[Any]":
  236. # Python 3.7: If the bootstrap module is *already imported*, it is the
  237. # one we actually want to use (no idea what's in __main__)
  238. #
  239. # Python 3.8: bootstrap is also importable, but will be the same file
  240. # as __main__ imported under a different name:
  241. #
  242. # sys.modules['__main__'].__file__ == sys.modules['bootstrap'].__file__
  243. # sys.modules['__main__'] is not sys.modules['bootstrap']
  244. #
  245. # Python 3.9: bootstrap is in __main__.awslambdaricmain
  246. #
  247. # On container builds using the `aws-lambda-python-runtime-interface-client`
  248. # (awslamdaric) module, bootstrap is located in sys.modules['__main__'].bootstrap
  249. #
  250. # Such a setup would then make all monkeypatches useless.
  251. if "bootstrap" in sys.modules:
  252. return sys.modules["bootstrap"]
  253. elif "__main__" in sys.modules:
  254. module = sys.modules["__main__"]
  255. # python3.9 runtime
  256. if hasattr(module, "awslambdaricmain") and hasattr(
  257. module.awslambdaricmain, "bootstrap"
  258. ):
  259. return module.awslambdaricmain.bootstrap
  260. elif hasattr(module, "bootstrap"):
  261. # awslambdaric python module in container builds
  262. return module.bootstrap
  263. # python3.8 runtime
  264. return module
  265. else:
  266. return None
  267. def _make_request_event_processor(
  268. aws_event: "Any", aws_context: "Any", configured_timeout: "Any"
  269. ) -> "EventProcessor":
  270. start_time = datetime.now(timezone.utc)
  271. def event_processor(
  272. sentry_event: "Event", hint: "Hint", start_time: "datetime" = start_time
  273. ) -> "Optional[Event]":
  274. remaining_time_in_milis = aws_context.get_remaining_time_in_millis()
  275. exec_duration = configured_timeout - remaining_time_in_milis
  276. extra = sentry_event.setdefault("extra", {})
  277. extra["lambda"] = {
  278. "function_name": aws_context.function_name,
  279. "function_version": aws_context.function_version,
  280. "invoked_function_arn": aws_context.invoked_function_arn,
  281. "aws_request_id": aws_context.aws_request_id,
  282. "execution_duration_in_millis": exec_duration,
  283. "remaining_time_in_millis": remaining_time_in_milis,
  284. }
  285. extra["cloudwatch logs"] = {
  286. "url": _get_cloudwatch_logs_url(aws_context, start_time),
  287. "log_group": aws_context.log_group_name,
  288. "log_stream": aws_context.log_stream_name,
  289. }
  290. request = sentry_event.get("request", {})
  291. if "httpMethod" in aws_event:
  292. request["method"] = aws_event["httpMethod"]
  293. request["url"] = _get_url(aws_event, aws_context)
  294. if "queryStringParameters" in aws_event:
  295. request["query_string"] = aws_event["queryStringParameters"]
  296. if "headers" in aws_event:
  297. request["headers"] = _filter_headers(aws_event["headers"])
  298. if should_send_default_pii():
  299. user_info = sentry_event.setdefault("user", {})
  300. identity = aws_event.get("identity")
  301. if identity is None:
  302. identity = {}
  303. id = identity.get("userArn")
  304. if id is not None:
  305. user_info.setdefault("id", id)
  306. ip = identity.get("sourceIp")
  307. if ip is not None:
  308. user_info.setdefault("ip_address", ip)
  309. if "body" in aws_event:
  310. request["data"] = aws_event.get("body", "")
  311. else:
  312. if aws_event.get("body", None):
  313. # Unfortunately couldn't find a way to get structured body from AWS
  314. # event. Meaning every body is unstructured to us.
  315. request["data"] = AnnotatedValue.removed_because_raw_data()
  316. sentry_event["request"] = deepcopy(request)
  317. return sentry_event
  318. return event_processor
  319. def _get_url(aws_event: "Any", aws_context: "Any") -> str:
  320. path = aws_event.get("path", None)
  321. headers = aws_event.get("headers")
  322. if headers is None:
  323. headers = {}
  324. host = headers.get("Host", None)
  325. proto = headers.get("X-Forwarded-Proto", None)
  326. if proto and host and path:
  327. return "{}://{}{}".format(proto, host, path)
  328. return "awslambda:///{}".format(aws_context.function_name)
  329. def _get_cloudwatch_logs_url(aws_context: "Any", start_time: "datetime") -> str:
  330. """
  331. Generates a CloudWatchLogs console URL based on the context object
  332. Arguments:
  333. aws_context {Any} -- context from lambda handler
  334. Returns:
  335. str -- AWS Console URL to logs.
  336. """
  337. formatstring = "%Y-%m-%dT%H:%M:%SZ"
  338. region = environ.get("AWS_REGION", "")
  339. url = (
  340. "https://console.{domain}/cloudwatch/home?region={region}"
  341. "#logEventViewer:group={log_group};stream={log_stream}"
  342. ";start={start_time};end={end_time}"
  343. ).format(
  344. domain="amazonaws.cn" if region.startswith("cn-") else "aws.amazon.com",
  345. region=region,
  346. log_group=aws_context.log_group_name,
  347. log_stream=aws_context.log_stream_name,
  348. start_time=(start_time - timedelta(seconds=1)).strftime(formatstring),
  349. end_time=(datetime.now(timezone.utc) + timedelta(seconds=2)).strftime(
  350. formatstring
  351. ),
  352. )
  353. return url
  354. def _parse_formatted_traceback(formatted_tb: "list[str]") -> "list[dict[str, Any]]":
  355. frames = []
  356. for frame in formatted_tb:
  357. match = re.match(r'File "(.+)", line (\d+), in (.+)', frame.strip())
  358. if match:
  359. file_name, line_number, func_name = match.groups()
  360. line_number = int(line_number)
  361. frames.append(
  362. {
  363. "filename": file_name,
  364. "function": func_name,
  365. "lineno": line_number,
  366. "vars": None,
  367. "pre_context": None,
  368. "context_line": None,
  369. "post_context": None,
  370. }
  371. )
  372. return frames
  373. def _event_from_error_json(error_json: "dict[str, Any]") -> "Event":
  374. """
  375. Converts the error JSON from AWS Lambda into a Sentry error event.
  376. This is not a full fletched event, but better than nothing.
  377. This is an example of where AWS creates the error JSON:
  378. https://github.com/aws/aws-lambda-python-runtime-interface-client/blob/2.2.1/awslambdaric/bootstrap.py#L479
  379. """
  380. event: "Event" = {
  381. "level": "error",
  382. "exception": {
  383. "values": [
  384. {
  385. "type": error_json.get("errorType"),
  386. "value": error_json.get("errorMessage"),
  387. "stacktrace": {
  388. "frames": _parse_formatted_traceback(
  389. error_json.get("stackTrace", [])
  390. ),
  391. },
  392. "mechanism": {
  393. "type": "aws_lambda",
  394. "handled": False,
  395. },
  396. }
  397. ],
  398. },
  399. }
  400. return event