utils.py 34 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001
  1. import copy
  2. import json
  3. import inspect
  4. from functools import wraps
  5. from .consts import ORIGIN, TOOL_ATTRIBUTES_MAP, GEN_AI_SYSTEM
  6. from sentry_sdk._types import BLOB_DATA_SUBSTITUTE
  7. from typing import (
  8. TYPE_CHECKING,
  9. Iterable,
  10. Any,
  11. Callable,
  12. List,
  13. Optional,
  14. Union,
  15. TypedDict,
  16. Dict,
  17. )
  18. import sentry_sdk
  19. from sentry_sdk.ai.utils import (
  20. set_data_normalized,
  21. truncate_and_annotate_messages,
  22. normalize_message_roles,
  23. transform_google_content_part,
  24. get_modality_from_mime_type,
  25. )
  26. from sentry_sdk.consts import OP, SPANDATA
  27. from sentry_sdk.scope import should_send_default_pii
  28. from sentry_sdk.utils import (
  29. capture_internal_exceptions,
  30. event_from_exception,
  31. safe_serialize,
  32. )
  33. from google.genai.types import GenerateContentConfig, Part, Content
  34. from itertools import chain
  35. if TYPE_CHECKING:
  36. from sentry_sdk.tracing import Span
  37. from sentry_sdk._types import TextPart
  38. from google.genai.types import (
  39. GenerateContentResponse,
  40. ContentListUnion,
  41. ContentUnionDict,
  42. Tool,
  43. Model,
  44. EmbedContentResponse,
  45. ContentUnion,
  46. )
  47. class UsageData(TypedDict):
  48. """Structure for token usage data."""
  49. input_tokens: int
  50. input_tokens_cached: int
  51. output_tokens: int
  52. output_tokens_reasoning: int
  53. total_tokens: int
  54. def extract_usage_data(
  55. response: "Union[GenerateContentResponse, dict[str, Any]]",
  56. ) -> "UsageData":
  57. """Extract usage data from response into a structured format.
  58. Args:
  59. response: The GenerateContentResponse object or dictionary containing usage metadata
  60. Returns:
  61. UsageData: Dictionary with input_tokens, input_tokens_cached,
  62. output_tokens, and output_tokens_reasoning fields
  63. """
  64. usage_data = UsageData(
  65. input_tokens=0,
  66. input_tokens_cached=0,
  67. output_tokens=0,
  68. output_tokens_reasoning=0,
  69. total_tokens=0,
  70. )
  71. # Handle dictionary response (from streaming)
  72. if isinstance(response, dict):
  73. usage = response.get("usage_metadata", {})
  74. if not usage:
  75. return usage_data
  76. prompt_tokens = usage.get("prompt_token_count", 0) or 0
  77. tool_use_prompt_tokens = usage.get("tool_use_prompt_token_count", 0) or 0
  78. usage_data["input_tokens"] = prompt_tokens + tool_use_prompt_tokens
  79. cached_tokens = usage.get("cached_content_token_count", 0) or 0
  80. usage_data["input_tokens_cached"] = cached_tokens
  81. reasoning_tokens = usage.get("thoughts_token_count", 0) or 0
  82. usage_data["output_tokens_reasoning"] = reasoning_tokens
  83. candidates_tokens = usage.get("candidates_token_count", 0) or 0
  84. # python-genai reports output and reasoning tokens separately
  85. # reasoning should be sub-category of output tokens
  86. usage_data["output_tokens"] = candidates_tokens + reasoning_tokens
  87. total_tokens = usage.get("total_token_count", 0) or 0
  88. usage_data["total_tokens"] = total_tokens
  89. return usage_data
  90. if not hasattr(response, "usage_metadata"):
  91. return usage_data
  92. usage = response.usage_metadata
  93. # Input tokens include both prompt and tool use prompt tokens
  94. prompt_tokens = getattr(usage, "prompt_token_count", 0) or 0
  95. tool_use_prompt_tokens = getattr(usage, "tool_use_prompt_token_count", 0) or 0
  96. usage_data["input_tokens"] = prompt_tokens + tool_use_prompt_tokens
  97. # Cached input tokens
  98. cached_tokens = getattr(usage, "cached_content_token_count", 0) or 0
  99. usage_data["input_tokens_cached"] = cached_tokens
  100. # Reasoning tokens
  101. reasoning_tokens = getattr(usage, "thoughts_token_count", 0) or 0
  102. usage_data["output_tokens_reasoning"] = reasoning_tokens
  103. # output_tokens = candidates_tokens + reasoning_tokens
  104. # google-genai reports output and reasoning tokens separately
  105. candidates_tokens = getattr(usage, "candidates_token_count", 0) or 0
  106. usage_data["output_tokens"] = candidates_tokens + reasoning_tokens
  107. total_tokens = getattr(usage, "total_token_count", 0) or 0
  108. usage_data["total_tokens"] = total_tokens
  109. return usage_data
  110. def _capture_exception(exc: "Any") -> None:
  111. """Capture exception with Google GenAI mechanism."""
  112. event, hint = event_from_exception(
  113. exc,
  114. client_options=sentry_sdk.get_client().options,
  115. mechanism={"type": "google_genai", "handled": False},
  116. )
  117. sentry_sdk.capture_event(event, hint=hint)
  118. def get_model_name(model: "Union[str, Model]") -> str:
  119. """Extract model name from model parameter."""
  120. if isinstance(model, str):
  121. return model
  122. # Handle case where model might be an object with a name attribute
  123. if hasattr(model, "name"):
  124. return str(model.name)
  125. return str(model)
  126. def extract_contents_messages(contents: "ContentListUnion") -> "List[Dict[str, Any]]":
  127. """Extract messages from contents parameter which can have various formats.
  128. Returns a list of message dictionaries in the format:
  129. - System: {"role": "system", "content": "string"}
  130. - User/Assistant: {"role": "user"|"assistant", "content": [{"text": "...", "type": "text"}, ...]}
  131. """
  132. if contents is None:
  133. return []
  134. messages = []
  135. # Handle string case
  136. if isinstance(contents, str):
  137. return [{"role": "user", "content": contents}]
  138. # Handle list case - process each item (non-recursive, flatten at top level)
  139. if isinstance(contents, list):
  140. for item in contents:
  141. item_messages = extract_contents_messages(item)
  142. messages.extend(item_messages)
  143. return messages
  144. # Handle dictionary case (ContentDict)
  145. if isinstance(contents, dict):
  146. role = contents.get("role", "user")
  147. parts = contents.get("parts")
  148. if parts:
  149. content_parts = []
  150. tool_messages = []
  151. for part in parts:
  152. part_result = _extract_part_content(part)
  153. if part_result is None:
  154. continue
  155. if isinstance(part_result, dict) and part_result.get("role") == "tool":
  156. # Tool message - add separately
  157. tool_messages.append(part_result)
  158. else:
  159. # Regular content part
  160. content_parts.append(part_result)
  161. # Add main message if we have content parts
  162. if content_parts:
  163. # Normalize role: "model" -> "assistant"
  164. normalized_role = "assistant" if role == "model" else role or "user"
  165. messages.append({"role": normalized_role, "content": content_parts})
  166. # Add tool messages
  167. messages.extend(tool_messages)
  168. elif "text" in contents:
  169. # Simple text in dict
  170. messages.append(
  171. {
  172. "role": role or "user",
  173. "content": [{"text": contents["text"], "type": "text"}],
  174. }
  175. )
  176. return messages
  177. # Handle Content object
  178. if hasattr(contents, "parts") and contents.parts:
  179. role = getattr(contents, "role", None) or "user"
  180. content_parts = []
  181. tool_messages = []
  182. for part in contents.parts:
  183. part_result = _extract_part_content(part)
  184. if part_result is None:
  185. continue
  186. if isinstance(part_result, dict) and part_result.get("role") == "tool":
  187. tool_messages.append(part_result)
  188. else:
  189. content_parts.append(part_result)
  190. if content_parts:
  191. normalized_role = "assistant" if role == "model" else role
  192. messages.append({"role": normalized_role, "content": content_parts})
  193. messages.extend(tool_messages)
  194. return messages
  195. # Handle Part object directly
  196. part_result = _extract_part_content(contents)
  197. if part_result:
  198. if isinstance(part_result, dict) and part_result.get("role") == "tool":
  199. return [part_result]
  200. else:
  201. return [{"role": "user", "content": [part_result]}]
  202. # Handle PIL.Image.Image
  203. try:
  204. from PIL import Image as PILImage # type: ignore[import-not-found]
  205. if isinstance(contents, PILImage.Image):
  206. blob_part = _extract_pil_image(contents)
  207. if blob_part:
  208. return [{"role": "user", "content": [blob_part]}]
  209. except ImportError:
  210. pass
  211. # Handle File object
  212. if hasattr(contents, "uri") and hasattr(contents, "mime_type"):
  213. # File object
  214. file_uri = getattr(contents, "uri", None)
  215. mime_type = getattr(contents, "mime_type", None)
  216. # Process if we have file_uri, even if mime_type is missing
  217. if file_uri is not None:
  218. # Default to empty string if mime_type is None
  219. if mime_type is None:
  220. mime_type = ""
  221. blob_part = {
  222. "type": "uri",
  223. "modality": get_modality_from_mime_type(mime_type),
  224. "mime_type": mime_type,
  225. "uri": file_uri,
  226. }
  227. return [{"role": "user", "content": [blob_part]}]
  228. # Handle direct text attribute
  229. if hasattr(contents, "text") and contents.text:
  230. return [
  231. {"role": "user", "content": [{"text": str(contents.text), "type": "text"}]}
  232. ]
  233. return []
  234. def _extract_part_content(part: "Any") -> "Optional[dict[str, Any]]":
  235. """Extract content from a Part object or dict.
  236. Returns:
  237. - dict for content part (text/blob) or tool message
  238. - None if part should be skipped
  239. """
  240. if part is None:
  241. return None
  242. # Handle dict Part
  243. if isinstance(part, dict):
  244. # Check for function_response first (tool message)
  245. if "function_response" in part:
  246. return _extract_tool_message_from_part(part)
  247. if part.get("text"):
  248. return {"text": part["text"], "type": "text"}
  249. # Try using Google-specific transform for dict formats (inline_data, file_data)
  250. result = transform_google_content_part(part)
  251. if result is not None:
  252. # For inline_data with bytes data, substitute the content
  253. if "inline_data" in part:
  254. inline_data = part["inline_data"]
  255. if isinstance(inline_data, dict) and isinstance(
  256. inline_data.get("data"), bytes
  257. ):
  258. result["content"] = BLOB_DATA_SUBSTITUTE
  259. return result
  260. return None
  261. # Handle Part object
  262. # Check for function_response (tool message)
  263. if hasattr(part, "function_response") and part.function_response:
  264. return _extract_tool_message_from_part(part)
  265. # Handle text
  266. if hasattr(part, "text") and part.text:
  267. return {"text": part.text, "type": "text"}
  268. # Handle file_data
  269. if hasattr(part, "file_data") and part.file_data:
  270. file_data = part.file_data
  271. file_uri = getattr(file_data, "file_uri", None)
  272. mime_type = getattr(file_data, "mime_type", None)
  273. # Process if we have file_uri, even if mime_type is missing (consistent with dict handling)
  274. if file_uri is not None:
  275. # Default to empty string if mime_type is None (consistent with transform_google_content_part)
  276. if mime_type is None:
  277. mime_type = ""
  278. return {
  279. "type": "uri",
  280. "modality": get_modality_from_mime_type(mime_type),
  281. "mime_type": mime_type,
  282. "uri": file_uri,
  283. }
  284. # Handle inline_data
  285. if hasattr(part, "inline_data") and part.inline_data:
  286. inline_data = part.inline_data
  287. data = getattr(inline_data, "data", None)
  288. mime_type = getattr(inline_data, "mime_type", None)
  289. # Process if we have data, even if mime_type is missing/empty (consistent with dict handling)
  290. if data is not None:
  291. # Default to empty string if mime_type is None (consistent with transform_google_content_part)
  292. if mime_type is None:
  293. mime_type = ""
  294. # Handle both bytes (binary data) and str (base64-encoded data)
  295. if isinstance(data, bytes):
  296. content = BLOB_DATA_SUBSTITUTE
  297. else:
  298. # For non-bytes data (e.g., base64 strings), use as-is
  299. content = data
  300. return {
  301. "type": "blob",
  302. "modality": get_modality_from_mime_type(mime_type),
  303. "mime_type": mime_type,
  304. "content": content,
  305. }
  306. return None
  307. def _extract_tool_message_from_part(part: "Any") -> "Optional[dict[str, Any]]":
  308. """Extract tool message from a Part with function_response.
  309. Returns:
  310. {"role": "tool", "content": {"toolCallId": "...", "toolName": "...", "output": "..."}}
  311. or None if not a valid tool message
  312. """
  313. function_response = None
  314. if isinstance(part, dict):
  315. function_response = part.get("function_response")
  316. elif hasattr(part, "function_response"):
  317. function_response = part.function_response
  318. if not function_response:
  319. return None
  320. # Extract fields from function_response
  321. tool_call_id = None
  322. tool_name = None
  323. output = None
  324. if isinstance(function_response, dict):
  325. tool_call_id = function_response.get("id")
  326. tool_name = function_response.get("name")
  327. response_dict = function_response.get("response", {})
  328. # Prefer "output" key if present, otherwise use entire response
  329. output = response_dict.get("output", response_dict)
  330. else:
  331. # FunctionResponse object
  332. tool_call_id = getattr(function_response, "id", None)
  333. tool_name = getattr(function_response, "name", None)
  334. response_obj = getattr(function_response, "response", None)
  335. if response_obj is None:
  336. response_obj = {}
  337. if isinstance(response_obj, dict):
  338. output = response_obj.get("output", response_obj)
  339. else:
  340. output = response_obj
  341. if not tool_name:
  342. return None
  343. return {
  344. "role": "tool",
  345. "content": {
  346. "toolCallId": str(tool_call_id) if tool_call_id else None,
  347. "toolName": str(tool_name),
  348. "output": safe_serialize(output) if output is not None else None,
  349. },
  350. }
  351. def _extract_pil_image(image: "Any") -> "Optional[dict[str, Any]]":
  352. """Extract blob part from PIL.Image.Image."""
  353. try:
  354. from PIL import Image as PILImage
  355. if not isinstance(image, PILImage.Image):
  356. return None
  357. # Get format, default to JPEG
  358. format_str = image.format or "JPEG"
  359. suffix = format_str.lower()
  360. mime_type = f"image/{suffix}"
  361. return {
  362. "type": "blob",
  363. "modality": get_modality_from_mime_type(mime_type),
  364. "mime_type": mime_type,
  365. "content": BLOB_DATA_SUBSTITUTE,
  366. }
  367. except Exception:
  368. return None
  369. def extract_contents_text(contents: "ContentListUnion") -> "Optional[str]":
  370. """Extract text from contents parameter which can have various formats.
  371. This is a compatibility function that extracts text from messages.
  372. For new code, use extract_contents_messages instead.
  373. """
  374. messages = extract_contents_messages(contents)
  375. if not messages:
  376. return None
  377. texts = []
  378. for message in messages:
  379. content = message.get("content")
  380. if isinstance(content, str):
  381. texts.append(content)
  382. elif isinstance(content, list):
  383. for part in content:
  384. if isinstance(part, dict) and part.get("type") == "text":
  385. texts.append(part.get("text", ""))
  386. return " ".join(texts) if texts else None
  387. def _format_tools_for_span(
  388. tools: "Iterable[Tool | Callable[..., Any]]",
  389. ) -> "Optional[List[dict[str, Any]]]":
  390. """Format tools parameter for span data."""
  391. formatted_tools = []
  392. for tool in tools:
  393. if callable(tool):
  394. # Handle callable functions passed directly
  395. formatted_tools.append(
  396. {
  397. "name": getattr(tool, "__name__", "unknown"),
  398. "description": getattr(tool, "__doc__", None),
  399. }
  400. )
  401. elif (
  402. hasattr(tool, "function_declarations")
  403. and tool.function_declarations is not None
  404. ):
  405. # Tool object with function declarations
  406. for func_decl in tool.function_declarations:
  407. formatted_tools.append(
  408. {
  409. "name": getattr(func_decl, "name", None),
  410. "description": getattr(func_decl, "description", None),
  411. }
  412. )
  413. else:
  414. # Check for predefined tool attributes - each of these tools
  415. # is an attribute of the tool object, by default set to None
  416. for attr_name, description in TOOL_ATTRIBUTES_MAP.items():
  417. if getattr(tool, attr_name, None):
  418. formatted_tools.append(
  419. {
  420. "name": attr_name,
  421. "description": description,
  422. }
  423. )
  424. break
  425. return formatted_tools if formatted_tools else None
  426. def extract_tool_calls(
  427. response: "GenerateContentResponse",
  428. ) -> "Optional[List[dict[str, Any]]]":
  429. """Extract tool/function calls from response candidates and automatic function calling history."""
  430. tool_calls = []
  431. # Extract from candidates, sometimes tool calls are nested under the content.parts object
  432. if getattr(response, "candidates", []):
  433. for candidate in response.candidates:
  434. if not hasattr(candidate, "content") or not getattr(
  435. candidate.content, "parts", []
  436. ):
  437. continue
  438. for part in candidate.content.parts:
  439. if getattr(part, "function_call", None):
  440. function_call = part.function_call
  441. tool_call = {
  442. "name": getattr(function_call, "name", None),
  443. "type": "function_call",
  444. }
  445. # Extract arguments if available
  446. if getattr(function_call, "args", None):
  447. tool_call["arguments"] = safe_serialize(function_call.args)
  448. tool_calls.append(tool_call)
  449. # Extract from automatic_function_calling_history
  450. # This is the history of tool calls made by the model
  451. if getattr(response, "automatic_function_calling_history", None):
  452. for content in response.automatic_function_calling_history:
  453. if not getattr(content, "parts", None):
  454. continue
  455. for part in getattr(content, "parts", []):
  456. if getattr(part, "function_call", None):
  457. function_call = part.function_call
  458. tool_call = {
  459. "name": getattr(function_call, "name", None),
  460. "type": "function_call",
  461. }
  462. # Extract arguments if available
  463. if hasattr(function_call, "args"):
  464. tool_call["arguments"] = safe_serialize(function_call.args)
  465. tool_calls.append(tool_call)
  466. return tool_calls if tool_calls else None
  467. def _capture_tool_input(
  468. args: "tuple[Any, ...]", kwargs: "dict[str, Any]", tool: "Tool"
  469. ) -> "dict[str, Any]":
  470. """Capture tool input from args and kwargs."""
  471. tool_input = kwargs.copy() if kwargs else {}
  472. # If we have positional args, try to map them to the function signature
  473. if args:
  474. try:
  475. sig = inspect.signature(tool)
  476. param_names = list(sig.parameters.keys())
  477. for i, arg in enumerate(args):
  478. if i < len(param_names):
  479. tool_input[param_names[i]] = arg
  480. except Exception:
  481. # Fallback if we can't get the signature
  482. tool_input["args"] = args
  483. return tool_input
  484. def _create_tool_span(tool_name: str, tool_doc: "Optional[str]") -> "Span":
  485. """Create a span for tool execution."""
  486. span = sentry_sdk.start_span(
  487. op=OP.GEN_AI_EXECUTE_TOOL,
  488. name=f"execute_tool {tool_name}",
  489. origin=ORIGIN,
  490. )
  491. span.set_data(SPANDATA.GEN_AI_TOOL_NAME, tool_name)
  492. if tool_doc:
  493. span.set_data(SPANDATA.GEN_AI_TOOL_DESCRIPTION, tool_doc)
  494. return span
  495. def wrapped_tool(tool: "Tool | Callable[..., Any]") -> "Tool | Callable[..., Any]":
  496. """Wrap a tool to emit execute_tool spans when called."""
  497. if not callable(tool):
  498. # Not a callable function, return as-is (predefined tools)
  499. return tool
  500. tool_name = getattr(tool, "__name__", "unknown")
  501. tool_doc = tool.__doc__
  502. if inspect.iscoroutinefunction(tool):
  503. # Async function
  504. @wraps(tool)
  505. async def async_wrapped(*args: "Any", **kwargs: "Any") -> "Any":
  506. with _create_tool_span(tool_name, tool_doc) as span:
  507. # Capture tool input
  508. tool_input = _capture_tool_input(args, kwargs, tool)
  509. with capture_internal_exceptions():
  510. span.set_data(
  511. SPANDATA.GEN_AI_TOOL_INPUT, safe_serialize(tool_input)
  512. )
  513. try:
  514. result = await tool(*args, **kwargs)
  515. # Capture tool output
  516. with capture_internal_exceptions():
  517. span.set_data(
  518. SPANDATA.GEN_AI_TOOL_OUTPUT, safe_serialize(result)
  519. )
  520. return result
  521. except Exception as exc:
  522. _capture_exception(exc)
  523. raise
  524. return async_wrapped
  525. else:
  526. # Sync function
  527. @wraps(tool)
  528. def sync_wrapped(*args: "Any", **kwargs: "Any") -> "Any":
  529. with _create_tool_span(tool_name, tool_doc) as span:
  530. # Capture tool input
  531. tool_input = _capture_tool_input(args, kwargs, tool)
  532. with capture_internal_exceptions():
  533. span.set_data(
  534. SPANDATA.GEN_AI_TOOL_INPUT, safe_serialize(tool_input)
  535. )
  536. try:
  537. result = tool(*args, **kwargs)
  538. # Capture tool output
  539. with capture_internal_exceptions():
  540. span.set_data(
  541. SPANDATA.GEN_AI_TOOL_OUTPUT, safe_serialize(result)
  542. )
  543. return result
  544. except Exception as exc:
  545. _capture_exception(exc)
  546. raise
  547. return sync_wrapped
  548. def wrapped_config_with_tools(
  549. config: "GenerateContentConfig",
  550. ) -> "GenerateContentConfig":
  551. """Wrap tools in config to emit execute_tool spans. Tools are sometimes passed directly as
  552. callable functions as a part of the config object."""
  553. if not config or not getattr(config, "tools", None):
  554. return config
  555. result = copy.copy(config)
  556. result.tools = [wrapped_tool(tool) for tool in config.tools]
  557. return result
  558. def _extract_response_text(
  559. response: "GenerateContentResponse",
  560. ) -> "Optional[List[str]]":
  561. """Extract text from response candidates."""
  562. if not response or not getattr(response, "candidates", []):
  563. return None
  564. texts = []
  565. for candidate in response.candidates:
  566. if not hasattr(candidate, "content") or not hasattr(candidate.content, "parts"):
  567. continue
  568. if candidate.content is None or candidate.content.parts is None:
  569. continue
  570. for part in candidate.content.parts:
  571. if getattr(part, "text", None):
  572. texts.append(part.text)
  573. return texts if texts else None
  574. def extract_finish_reasons(
  575. response: "GenerateContentResponse",
  576. ) -> "Optional[List[str]]":
  577. """Extract finish reasons from response candidates."""
  578. if not response or not getattr(response, "candidates", []):
  579. return None
  580. finish_reasons = []
  581. for candidate in response.candidates:
  582. if getattr(candidate, "finish_reason", None):
  583. # Convert enum value to string if necessary
  584. reason = str(candidate.finish_reason)
  585. # Remove enum prefix if present (e.g., "FinishReason.STOP" -> "STOP")
  586. if "." in reason:
  587. reason = reason.split(".")[-1]
  588. finish_reasons.append(reason)
  589. return finish_reasons if finish_reasons else None
  590. def _transform_system_instruction_one_level(
  591. system_instructions: "Union[ContentUnionDict, ContentUnion]",
  592. can_be_content: bool,
  593. ) -> "list[TextPart]":
  594. text_parts: "list[TextPart]" = []
  595. if isinstance(system_instructions, str):
  596. return [{"type": "text", "content": system_instructions}]
  597. if isinstance(system_instructions, Part) and system_instructions.text:
  598. return [{"type": "text", "content": system_instructions.text}]
  599. if can_be_content and isinstance(system_instructions, Content):
  600. if isinstance(system_instructions.parts, list):
  601. for part in system_instructions.parts:
  602. if isinstance(part.text, str):
  603. text_parts.append({"type": "text", "content": part.text})
  604. return text_parts
  605. if isinstance(system_instructions, dict) and system_instructions.get("text"):
  606. return [{"type": "text", "content": system_instructions["text"]}]
  607. elif can_be_content and isinstance(system_instructions, dict):
  608. parts = system_instructions.get("parts", [])
  609. for part in parts:
  610. if isinstance(part, Part) and isinstance(part.text, str):
  611. text_parts.append({"type": "text", "content": part.text})
  612. elif isinstance(part, dict) and isinstance(part.get("text"), str):
  613. text_parts.append({"type": "text", "content": part["text"]})
  614. return text_parts
  615. return text_parts
  616. def _transform_system_instructions(
  617. system_instructions: "Union[ContentUnionDict, ContentUnion]",
  618. ) -> "list[TextPart]":
  619. text_parts: "list[TextPart]" = []
  620. if isinstance(system_instructions, list):
  621. text_parts = list(
  622. chain.from_iterable(
  623. _transform_system_instruction_one_level(
  624. instructions, can_be_content=False
  625. )
  626. for instructions in system_instructions
  627. )
  628. )
  629. return text_parts
  630. return _transform_system_instruction_one_level(
  631. system_instructions, can_be_content=True
  632. )
  633. def set_span_data_for_request(
  634. span: "Span",
  635. integration: "Any",
  636. model: str,
  637. contents: "ContentListUnion",
  638. kwargs: "dict[str, Any]",
  639. ) -> None:
  640. """Set span data for the request."""
  641. span.set_data(SPANDATA.GEN_AI_SYSTEM, GEN_AI_SYSTEM)
  642. span.set_data(SPANDATA.GEN_AI_REQUEST_MODEL, model)
  643. if kwargs.get("stream", False):
  644. span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, True)
  645. config: "Optional[GenerateContentConfig]" = kwargs.get("config")
  646. # Set input messages/prompts if PII is allowed
  647. if should_send_default_pii() and integration.include_prompts:
  648. messages = []
  649. # Add system instruction if present
  650. system_instructions = None
  651. if config and hasattr(config, "system_instruction"):
  652. system_instructions = config.system_instruction
  653. elif isinstance(config, dict) and "system_instruction" in config:
  654. system_instructions = config.get("system_instruction")
  655. if system_instructions is not None:
  656. span.set_data(
  657. SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS,
  658. json.dumps(_transform_system_instructions(system_instructions)),
  659. )
  660. # Extract messages from contents
  661. contents_messages = extract_contents_messages(contents)
  662. messages.extend(contents_messages)
  663. if messages:
  664. normalized_messages = normalize_message_roles(messages)
  665. scope = sentry_sdk.get_current_scope()
  666. messages_data = truncate_and_annotate_messages(
  667. normalized_messages, span, scope
  668. )
  669. if messages_data is not None:
  670. set_data_normalized(
  671. span,
  672. SPANDATA.GEN_AI_REQUEST_MESSAGES,
  673. messages_data,
  674. unpack=False,
  675. )
  676. # Extract parameters directly from config (not nested under generation_config)
  677. for param, span_key in [
  678. ("temperature", SPANDATA.GEN_AI_REQUEST_TEMPERATURE),
  679. ("top_p", SPANDATA.GEN_AI_REQUEST_TOP_P),
  680. ("top_k", SPANDATA.GEN_AI_REQUEST_TOP_K),
  681. ("max_output_tokens", SPANDATA.GEN_AI_REQUEST_MAX_TOKENS),
  682. ("presence_penalty", SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY),
  683. ("frequency_penalty", SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY),
  684. ("seed", SPANDATA.GEN_AI_REQUEST_SEED),
  685. ]:
  686. if hasattr(config, param):
  687. value = getattr(config, param)
  688. if value is not None:
  689. span.set_data(span_key, value)
  690. # Set tools if available
  691. if config is not None and hasattr(config, "tools"):
  692. tools = config.tools
  693. if tools:
  694. formatted_tools = _format_tools_for_span(tools)
  695. if formatted_tools:
  696. set_data_normalized(
  697. span,
  698. SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS,
  699. formatted_tools,
  700. unpack=False,
  701. )
  702. def set_span_data_for_response(
  703. span: "Span", integration: "Any", response: "GenerateContentResponse"
  704. ) -> None:
  705. """Set span data for the response."""
  706. if not response:
  707. return
  708. if should_send_default_pii() and integration.include_prompts:
  709. response_texts = _extract_response_text(response)
  710. if response_texts:
  711. # Format as JSON string array as per documentation
  712. span.set_data(SPANDATA.GEN_AI_RESPONSE_TEXT, safe_serialize(response_texts))
  713. tool_calls = extract_tool_calls(response)
  714. if tool_calls:
  715. # Tool calls should be JSON serialized
  716. span.set_data(SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS, safe_serialize(tool_calls))
  717. finish_reasons = extract_finish_reasons(response)
  718. if finish_reasons:
  719. set_data_normalized(
  720. span, SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS, finish_reasons
  721. )
  722. if getattr(response, "response_id", None):
  723. span.set_data(SPANDATA.GEN_AI_RESPONSE_ID, response.response_id)
  724. if getattr(response, "model_version", None):
  725. span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, response.model_version)
  726. usage_data = extract_usage_data(response)
  727. if usage_data["input_tokens"]:
  728. span.set_data(SPANDATA.GEN_AI_USAGE_INPUT_TOKENS, usage_data["input_tokens"])
  729. if usage_data["input_tokens_cached"]:
  730. span.set_data(
  731. SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED,
  732. usage_data["input_tokens_cached"],
  733. )
  734. if usage_data["output_tokens"]:
  735. span.set_data(SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS, usage_data["output_tokens"])
  736. if usage_data["output_tokens_reasoning"]:
  737. span.set_data(
  738. SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS_REASONING,
  739. usage_data["output_tokens_reasoning"],
  740. )
  741. if usage_data["total_tokens"]:
  742. span.set_data(SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS, usage_data["total_tokens"])
  743. def prepare_generate_content_args(
  744. args: "tuple[Any, ...]", kwargs: "dict[str, Any]"
  745. ) -> "tuple[Any, Any, str]":
  746. """Extract and prepare common arguments for generate_content methods."""
  747. model = args[0] if args else kwargs.get("model", "unknown")
  748. contents = args[1] if len(args) > 1 else kwargs.get("contents")
  749. model_name = get_model_name(model)
  750. config = kwargs.get("config")
  751. wrapped_config = wrapped_config_with_tools(config)
  752. if wrapped_config is not config:
  753. kwargs["config"] = wrapped_config
  754. return model, contents, model_name
  755. def prepare_embed_content_args(
  756. args: "tuple[Any, ...]", kwargs: "dict[str, Any]"
  757. ) -> "tuple[str, Any]":
  758. """Extract and prepare common arguments for embed_content methods.
  759. Returns:
  760. tuple: (model_name, contents)
  761. """
  762. model = kwargs.get("model", "unknown")
  763. contents = kwargs.get("contents")
  764. model_name = get_model_name(model)
  765. return model_name, contents
  766. def set_span_data_for_embed_request(
  767. span: "Span", integration: "Any", contents: "Any", kwargs: "dict[str, Any]"
  768. ) -> None:
  769. """Set span data for embedding request."""
  770. # Include input contents if PII is allowed
  771. if should_send_default_pii() and integration.include_prompts:
  772. if contents:
  773. # For embeddings, contents is typically a list of strings/texts
  774. input_texts = []
  775. # Handle various content formats
  776. if isinstance(contents, str):
  777. input_texts = [contents]
  778. elif isinstance(contents, list):
  779. for item in contents:
  780. text = extract_contents_text(item)
  781. if text:
  782. input_texts.append(text)
  783. else:
  784. text = extract_contents_text(contents)
  785. if text:
  786. input_texts = [text]
  787. if input_texts:
  788. set_data_normalized(
  789. span,
  790. SPANDATA.GEN_AI_EMBEDDINGS_INPUT,
  791. input_texts,
  792. unpack=False,
  793. )
  794. def set_span_data_for_embed_response(
  795. span: "Span", integration: "Any", response: "EmbedContentResponse"
  796. ) -> None:
  797. """Set span data for embedding response."""
  798. if not response:
  799. return
  800. # Extract token counts from embeddings statistics (Vertex AI only)
  801. # Each embedding has its own statistics with token_count
  802. if hasattr(response, "embeddings") and response.embeddings:
  803. total_tokens = 0
  804. for embedding in response.embeddings:
  805. if hasattr(embedding, "statistics") and embedding.statistics:
  806. token_count = getattr(embedding.statistics, "token_count", None)
  807. if token_count is not None:
  808. total_tokens += int(token_count)
  809. # Set token count if we found any
  810. if total_tokens > 0:
  811. span.set_data(SPANDATA.GEN_AI_USAGE_INPUT_TOKENS, total_tokens)