litellm.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347
  1. import copy
  2. from typing import TYPE_CHECKING
  3. import sentry_sdk
  4. from sentry_sdk import consts
  5. from sentry_sdk.ai.monitoring import record_token_usage
  6. from sentry_sdk.ai.utils import (
  7. get_start_span_function,
  8. set_data_normalized,
  9. truncate_and_annotate_messages,
  10. transform_openai_content_part,
  11. truncate_and_annotate_embedding_inputs,
  12. )
  13. from sentry_sdk.consts import SPANDATA
  14. from sentry_sdk.integrations import DidNotEnable, Integration
  15. from sentry_sdk.scope import should_send_default_pii
  16. from sentry_sdk.utils import event_from_exception
  17. if TYPE_CHECKING:
  18. from typing import Any, Dict, List
  19. from datetime import datetime
  20. try:
  21. import litellm # type: ignore[import-not-found]
  22. from litellm import input_callback, success_callback, failure_callback
  23. except ImportError:
  24. raise DidNotEnable("LiteLLM not installed")
  25. def _get_metadata_dict(kwargs: "Dict[str, Any]") -> "Dict[str, Any]":
  26. """Get the metadata dictionary from the kwargs."""
  27. litellm_params = kwargs.setdefault("litellm_params", {})
  28. # we need this weird little dance, as metadata might be set but may be None initially
  29. metadata = litellm_params.get("metadata")
  30. if metadata is None:
  31. metadata = {}
  32. litellm_params["metadata"] = metadata
  33. return metadata
  34. def _convert_message_parts(messages: "List[Dict[str, Any]]") -> "List[Dict[str, Any]]":
  35. """
  36. Convert the message parts from OpenAI format to the `gen_ai.request.messages` format
  37. using the OpenAI-specific transformer (LiteLLM uses OpenAI's message format).
  38. Deep copies messages to avoid mutating original kwargs.
  39. """
  40. # Deep copy to avoid mutating original messages from kwargs
  41. messages = copy.deepcopy(messages)
  42. for message in messages:
  43. if not isinstance(message, dict):
  44. continue
  45. content = message.get("content")
  46. if isinstance(content, (list, tuple)):
  47. transformed = []
  48. for item in content:
  49. if isinstance(item, dict):
  50. result = transform_openai_content_part(item)
  51. # If transformation succeeded, use the result; otherwise keep original
  52. transformed.append(result if result is not None else item)
  53. else:
  54. transformed.append(item)
  55. message["content"] = transformed
  56. return messages
  57. def _input_callback(kwargs: "Dict[str, Any]") -> None:
  58. """Handle the start of a request."""
  59. integration = sentry_sdk.get_client().get_integration(LiteLLMIntegration)
  60. if integration is None:
  61. return
  62. # Get key parameters
  63. full_model = kwargs.get("model", "")
  64. try:
  65. model, provider, _, _ = litellm.get_llm_provider(full_model)
  66. except Exception:
  67. model = full_model
  68. provider = "unknown"
  69. call_type = kwargs.get("call_type", None)
  70. if call_type == "embedding" or call_type == "aembedding":
  71. operation = "embeddings"
  72. else:
  73. operation = "chat"
  74. # Start a new span/transaction
  75. span = get_start_span_function()(
  76. op=(
  77. consts.OP.GEN_AI_CHAT
  78. if operation == "chat"
  79. else consts.OP.GEN_AI_EMBEDDINGS
  80. ),
  81. name=f"{operation} {model}",
  82. origin=LiteLLMIntegration.origin,
  83. )
  84. span.__enter__()
  85. # Store span for later
  86. _get_metadata_dict(kwargs)["_sentry_span"] = span
  87. # Set basic data
  88. set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, provider)
  89. set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, operation)
  90. # Record input/messages if allowed
  91. if should_send_default_pii() and integration.include_prompts:
  92. if operation == "embeddings":
  93. # For embeddings, look for the 'input' parameter
  94. embedding_input = kwargs.get("input")
  95. if embedding_input:
  96. scope = sentry_sdk.get_current_scope()
  97. # Normalize to list format
  98. input_list = (
  99. embedding_input
  100. if isinstance(embedding_input, list)
  101. else [embedding_input]
  102. )
  103. messages_data = truncate_and_annotate_embedding_inputs(
  104. input_list, span, scope
  105. )
  106. if messages_data is not None:
  107. set_data_normalized(
  108. span,
  109. SPANDATA.GEN_AI_EMBEDDINGS_INPUT,
  110. messages_data,
  111. unpack=False,
  112. )
  113. else:
  114. # For chat, look for the 'messages' parameter
  115. messages = kwargs.get("messages", [])
  116. if messages:
  117. scope = sentry_sdk.get_current_scope()
  118. messages = _convert_message_parts(messages)
  119. messages_data = truncate_and_annotate_messages(messages, span, scope)
  120. if messages_data is not None:
  121. set_data_normalized(
  122. span,
  123. SPANDATA.GEN_AI_REQUEST_MESSAGES,
  124. messages_data,
  125. unpack=False,
  126. )
  127. # Record other parameters
  128. params = {
  129. "model": SPANDATA.GEN_AI_REQUEST_MODEL,
  130. "stream": SPANDATA.GEN_AI_RESPONSE_STREAMING,
  131. "max_tokens": SPANDATA.GEN_AI_REQUEST_MAX_TOKENS,
  132. "presence_penalty": SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY,
  133. "frequency_penalty": SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY,
  134. "temperature": SPANDATA.GEN_AI_REQUEST_TEMPERATURE,
  135. "top_p": SPANDATA.GEN_AI_REQUEST_TOP_P,
  136. }
  137. for key, attribute in params.items():
  138. value = kwargs.get(key)
  139. if value is not None:
  140. set_data_normalized(span, attribute, value)
  141. async def _async_input_callback(kwargs: "Dict[str, Any]") -> None:
  142. return _input_callback(kwargs)
  143. def _success_callback(
  144. kwargs: "Dict[str, Any]",
  145. completion_response: "Any",
  146. start_time: "datetime",
  147. end_time: "datetime",
  148. ) -> None:
  149. """Handle successful completion."""
  150. metadata = _get_metadata_dict(kwargs)
  151. span = metadata.get("_sentry_span")
  152. if span is None:
  153. return
  154. integration = sentry_sdk.get_client().get_integration(LiteLLMIntegration)
  155. if integration is None:
  156. return
  157. try:
  158. # Record model information
  159. if hasattr(completion_response, "model"):
  160. set_data_normalized(
  161. span, SPANDATA.GEN_AI_RESPONSE_MODEL, completion_response.model
  162. )
  163. # Record response content if allowed
  164. if should_send_default_pii() and integration.include_prompts:
  165. if hasattr(completion_response, "choices"):
  166. response_messages = []
  167. for choice in completion_response.choices:
  168. if hasattr(choice, "message"):
  169. if hasattr(choice.message, "model_dump"):
  170. response_messages.append(choice.message.model_dump())
  171. elif hasattr(choice.message, "dict"):
  172. response_messages.append(choice.message.dict())
  173. else:
  174. # Fallback for basic message objects
  175. msg = {}
  176. if hasattr(choice.message, "role"):
  177. msg["role"] = choice.message.role
  178. if hasattr(choice.message, "content"):
  179. msg["content"] = choice.message.content
  180. if hasattr(choice.message, "tool_calls"):
  181. msg["tool_calls"] = choice.message.tool_calls
  182. response_messages.append(msg)
  183. if response_messages:
  184. set_data_normalized(
  185. span, SPANDATA.GEN_AI_RESPONSE_TEXT, response_messages
  186. )
  187. # Record token usage
  188. if hasattr(completion_response, "usage"):
  189. usage = completion_response.usage
  190. record_token_usage(
  191. span,
  192. input_tokens=getattr(usage, "prompt_tokens", None),
  193. output_tokens=getattr(usage, "completion_tokens", None),
  194. total_tokens=getattr(usage, "total_tokens", None),
  195. )
  196. finally:
  197. is_streaming = kwargs.get("stream")
  198. # Callback is fired multiple times when streaming a response.
  199. # Streaming flag checked at https://github.com/BerriAI/litellm/blob/33c3f13443eaf990ac8c6e3da78bddbc2b7d0e7a/litellm/litellm_core_utils/litellm_logging.py#L1603
  200. if (
  201. is_streaming is not True
  202. or "complete_streaming_response" in kwargs
  203. or "async_complete_streaming_response" in kwargs
  204. ):
  205. span = metadata.pop("_sentry_span", None)
  206. if span is not None:
  207. span.__exit__(None, None, None)
  208. async def _async_success_callback(
  209. kwargs: "Dict[str, Any]",
  210. completion_response: "Any",
  211. start_time: "datetime",
  212. end_time: "datetime",
  213. ) -> None:
  214. return _success_callback(
  215. kwargs,
  216. completion_response,
  217. start_time,
  218. end_time,
  219. )
  220. def _failure_callback(
  221. kwargs: "Dict[str, Any]",
  222. exception: Exception,
  223. start_time: "datetime",
  224. end_time: "datetime",
  225. ) -> None:
  226. """Handle request failure."""
  227. span = _get_metadata_dict(kwargs).get("_sentry_span")
  228. if span is None:
  229. return
  230. try:
  231. # Capture the exception
  232. event, hint = event_from_exception(
  233. exception,
  234. client_options=sentry_sdk.get_client().options,
  235. mechanism={"type": "litellm", "handled": False},
  236. )
  237. sentry_sdk.capture_event(event, hint=hint)
  238. finally:
  239. # Always finish the span and clean up
  240. span.__exit__(type(exception), exception, None)
  241. class LiteLLMIntegration(Integration):
  242. """
  243. LiteLLM integration for Sentry.
  244. This integration automatically captures LiteLLM API calls and sends them to Sentry
  245. for monitoring and error tracking. It supports all 100+ LLM providers that LiteLLM
  246. supports, including OpenAI, Anthropic, Google, Cohere, and many others.
  247. Features:
  248. - Automatic exception capture for all LiteLLM calls
  249. - Token usage tracking across all providers
  250. - Provider detection and attribution
  251. - Input/output message capture (configurable)
  252. - Streaming response support
  253. - Cost tracking integration
  254. Usage:
  255. ```python
  256. import litellm
  257. import sentry_sdk
  258. # Initialize Sentry with the LiteLLM integration
  259. sentry_sdk.init(
  260. dsn="your-dsn",
  261. send_default_pii=True
  262. integrations=[
  263. sentry_sdk.integrations.LiteLLMIntegration(
  264. include_prompts=True # Set to False to exclude message content
  265. )
  266. ]
  267. )
  268. # All LiteLLM calls will now be monitored
  269. response = litellm.completion(
  270. model="gpt-3.5-turbo",
  271. messages=[{"role": "user", "content": "Hello!"}]
  272. )
  273. ```
  274. Configuration:
  275. - include_prompts (bool): Whether to include prompts and responses in spans.
  276. Defaults to True. Set to False to exclude potentially sensitive data.
  277. """
  278. identifier = "litellm"
  279. origin = f"auto.ai.{identifier}"
  280. def __init__(self: "LiteLLMIntegration", include_prompts: bool = True) -> None:
  281. self.include_prompts = include_prompts
  282. @staticmethod
  283. def setup_once() -> None:
  284. """Set up LiteLLM callbacks for monitoring."""
  285. litellm.input_callback = input_callback or []
  286. if _input_callback not in litellm.input_callback:
  287. litellm.input_callback.append(_input_callback)
  288. if _async_input_callback not in litellm.input_callback:
  289. litellm.input_callback.append(_async_input_callback)
  290. litellm.success_callback = success_callback or []
  291. if _success_callback not in litellm.success_callback:
  292. litellm.success_callback.append(_success_callback)
  293. if _async_success_callback not in litellm.success_callback:
  294. litellm.success_callback.append(_async_success_callback)
  295. litellm.failure_callback = failure_callback or []
  296. if _failure_callback not in litellm.failure_callback:
  297. litellm.failure_callback.append(_failure_callback)