From 1f32952d0066a9dc1ff1482cef48c3cbe0acb663 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Wed, 17 Dec 2025 10:45:45 +0100 Subject: [PATCH 01/10] fix(ai): redact message parts content of type blob --- sentry_sdk/ai/utils.py | 51 +++++++++++++++++ tests/test_ai_monitoring.py | 106 +++++++++++++++++++++++++++++++++++- 2 files changed, 156 insertions(+), 1 deletion(-) diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index 1d2b4483c9..73155b0305 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -5,6 +5,8 @@ from sys import getsizeof from typing import TYPE_CHECKING +from sentry_sdk._types import SENSITIVE_DATA_SUBSTITUTE + if TYPE_CHECKING: from typing import Any, Callable, Dict, List, Optional, Tuple @@ -141,6 +143,53 @@ def _find_truncation_index(messages: "List[Dict[str, Any]]", max_bytes: int) -> return 0 +def redact_blob_message_parts(messages): + # type: (List[Dict[str, Any]]) -> Tuple[List[Dict[str, Any]], int] + """ + Redact blob message parts from the messages, by removing the "content" key. + e.g: + { + "role": "user", + "content": [ + { + "text": "How many ponies do you see in the image?", + "type": "text" + }, + { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "data:image/jpeg;base64,..." + } + ] + } + becomes: + { + "role": "user", + "content": [ + { + "text": "How many ponies do you see in the image?", + "type": "text" + }, + { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "[Filtered]" + } + ] + } + """ + + for message in messages: + content = message.get("content") + if isinstance(content, list): + for item in content: + if item.get("type") == "blob": + item["content"] = SENSITIVE_DATA_SUBSTITUTE + return messages + + def truncate_messages_by_size( messages: "List[Dict[str, Any]]", max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES, @@ -186,6 +235,8 @@ def truncate_and_annotate_messages( if not messages: return None + messages = redact_blob_message_parts(messages) + truncated_messages, removed_count = truncate_messages_by_size(messages, max_bytes) if removed_count > 0: scope._gen_ai_original_message_count[span.span_id] = len(messages) diff --git a/tests/test_ai_monitoring.py b/tests/test_ai_monitoring.py index 8d3d4ba204..e9f3712cd3 100644 --- a/tests/test_ai_monitoring.py +++ b/tests/test_ai_monitoring.py @@ -4,7 +4,7 @@ import pytest import sentry_sdk -from sentry_sdk._types import AnnotatedValue +from sentry_sdk._types import AnnotatedValue, SENSITIVE_DATA_SUBSTITUTE from sentry_sdk.ai.monitoring import ai_track from sentry_sdk.ai.utils import ( MAX_GEN_AI_MESSAGE_BYTES, @@ -13,6 +13,7 @@ truncate_and_annotate_messages, truncate_messages_by_size, _find_truncation_index, + redact_blob_message_parts, ) from sentry_sdk.serializer import serialize from sentry_sdk.utils import safe_serialize @@ -542,3 +543,106 @@ def __init__(self): assert isinstance(messages_value, AnnotatedValue) assert messages_value.metadata["len"] == stored_original_length assert len(messages_value.value) == len(truncated_messages) + + +class TestRedactBlobMessageParts: + def test_redacts_single_blob_content(self): + """Test that blob content is redacted in a message with single blob part""" + messages = [ + { + "role": "user", + "content": [ + { + "text": "How many ponies do you see in the image?", + "type": "text", + }, + { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "data:image/jpeg;base64,/9j/4AAQSkZJRg==", + }, + ], + } + ] + + result = redact_blob_message_parts(messages) + + assert result == messages # Returns the same list + assert ( + messages[0]["content"][0]["text"] + == "How many ponies do you see in the image?" + ) + assert messages[0]["content"][0]["type"] == "text" + assert messages[0]["content"][1]["type"] == "blob" + assert messages[0]["content"][1]["modality"] == "image" + assert messages[0]["content"][1]["mime_type"] == "image/jpeg" + assert messages[0]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE + + def test_redacts_multiple_blob_parts(self): + """Test that multiple blob parts in a single message are all redacted""" + messages = [ + { + "role": "user", + "content": [ + {"text": "Compare these images", "type": "text"}, + { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "data:image/jpeg;base64,first_image", + }, + { + "type": "blob", + "modality": "image", + "mime_type": "image/png", + "content": "data:image/png;base64,second_image", + }, + ], + } + ] + + result = redact_blob_message_parts(messages) + + assert result == messages + assert messages[0]["content"][0]["text"] == "Compare these images" + assert messages[0]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE + assert messages[0]["content"][2]["content"] == SENSITIVE_DATA_SUBSTITUTE + + def test_redacts_blobs_in_multiple_messages(self): + """Test that blob parts are redacted across multiple messages""" + messages = [ + { + "role": "user", + "content": [ + {"text": "First message", "type": "text"}, + { + "type": "blob", + "modality": "image", + "content": "data:image/jpeg;base64,first", + }, + ], + }, + { + "role": "assistant", + "content": "I see the image.", + }, + { + "role": "user", + "content": [ + {"text": "Second message", "type": "text"}, + { + "type": "blob", + "modality": "image", + "content": "data:image/jpeg;base64,second", + }, + ], + }, + ] + + result = redact_blob_message_parts(messages) + + assert result == messages + assert messages[0]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE + assert messages[1]["content"] == "I see the image." # Unchanged + assert messages[2]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE From 795bcea241f7777e646a4da14c870a3049bdbe90 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Wed, 17 Dec 2025 11:05:04 +0100 Subject: [PATCH 02/10] fix(ai): skip non dict messages --- sentry_sdk/ai/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index 73155b0305..ae507e898b 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -182,6 +182,9 @@ def redact_blob_message_parts(messages): """ for message in messages: + if not isinstance(message, dict): + continue + content = message.get("content") if isinstance(content, list): for item in content: From a623e137d26e982c0d85258256c0ba013f9ecb24 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Wed, 17 Dec 2025 11:21:43 +0100 Subject: [PATCH 03/10] fix(ai): typing --- sentry_sdk/ai/utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index ae507e898b..1b61c7a113 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -143,8 +143,9 @@ def _find_truncation_index(messages: "List[Dict[str, Any]]", max_bytes: int) -> return 0 -def redact_blob_message_parts(messages): - # type: (List[Dict[str, Any]]) -> Tuple[List[Dict[str, Any]], int] +def redact_blob_message_parts( + messages: "List[Dict[str, Any]]", +) -> "List[Dict[str, Any]]": """ Redact blob message parts from the messages, by removing the "content" key. e.g: From 3d3ce5bbdca43f14194edbbbee11d3b6dcd6d8a3 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Wed, 17 Dec 2025 11:37:12 +0100 Subject: [PATCH 04/10] fix(ai): content items may not be dicts --- sentry_sdk/ai/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index 1b61c7a113..78a64ab737 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -189,7 +189,7 @@ def redact_blob_message_parts( content = message.get("content") if isinstance(content, list): for item in content: - if item.get("type") == "blob": + if isinstance(item, dict) and item.get("type") == "blob": item["content"] = SENSITIVE_DATA_SUBSTITUTE return messages From 269400865a76fb7848a98fe75ba3d19841ffba85 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Mon, 5 Jan 2026 16:13:42 +0100 Subject: [PATCH 05/10] fix(integrations): Anthropic: add content transformation for images and documents --- sentry_sdk/integrations/anthropic.py | 139 +++- .../integrations/anthropic/test_anthropic.py | 704 ++++++++++++++++++ 2 files changed, 840 insertions(+), 3 deletions(-) diff --git a/sentry_sdk/integrations/anthropic.py b/sentry_sdk/integrations/anthropic.py index 5257e3bf60..41f1c387a7 100644 --- a/sentry_sdk/integrations/anthropic.py +++ b/sentry_sdk/integrations/anthropic.py @@ -120,6 +120,104 @@ def _collect_ai_data( return model, input_tokens, output_tokens, content_blocks +def _transform_content_block(content_block: "dict[str, Any]") -> "dict[str, Any]": + """ + Transform an Anthropic content block to a Sentry-compatible format. + + Handles binary data (images, documents) by converting them to the standardized format: + - base64 encoded data -> type: "blob" + - URL references -> type: "uri" + - file_id references -> type: "file" + """ + block_type = content_block.get("type") + + # Handle image blocks + if block_type == "image": + source = content_block.get("source", {}) + source_type = source.get("type") + media_type = source.get("media_type", "") + + if source_type == "base64": + return { + "type": "blob", + "modality": "image", + "mime_type": media_type, + "content": source.get("data", ""), + } + elif source_type == "url": + return { + "type": "uri", + "modality": "image", + "mime_type": media_type, + "uri": source.get("url", ""), + } + elif source_type == "file": + return { + "type": "file", + "modality": "image", + "mime_type": media_type, + "file_id": source.get("file_id", ""), + } + + # Handle document blocks (PDFs, etc.) + elif block_type == "document": + source = content_block.get("source", {}) + source_type = source.get("type") + media_type = source.get("media_type", "") + + if source_type == "base64": + return { + "type": "blob", + "modality": "document", + "mime_type": media_type, + "content": source.get("data", ""), + } + elif source_type == "url": + return { + "type": "uri", + "modality": "document", + "mime_type": media_type, + "uri": source.get("url", ""), + } + elif source_type == "file": + return { + "type": "file", + "modality": "document", + "mime_type": media_type, + "file_id": source.get("file_id", ""), + } + elif source_type == "text": + # Plain text documents - keep as is but mark the type + return { + "type": "text", + "text": source.get("data", ""), + } + + # For text blocks and other types, return as-is + return content_block + + +def _transform_message_content( + content: "Any", +) -> "Any": + """ + Transform message content, handling both string content and list of content blocks. + """ + if isinstance(content, str): + return content + + if isinstance(content, (list, tuple)): + transformed = [] + for block in content: + if isinstance(block, dict): + transformed.append(_transform_content_block(block)) + else: + transformed.append(block) + return transformed + + return content + + def _set_input_data( span: "Span", kwargs: "dict[str, Any]", integration: "AnthropicIntegration" ) -> None: @@ -164,19 +262,54 @@ def _set_input_data( and "content" in message and isinstance(message["content"], (list, tuple)) ): + has_tool_result = False + transformed_content = [] for item in message["content"]: - if item.get("type") == "tool_result": + if isinstance(item, dict) and item.get("type") == "tool_result": + has_tool_result = True normalized_messages.append( { "role": GEN_AI_ALLOWED_MESSAGE_ROLES.TOOL, "content": { # type: ignore[dict-item] "tool_use_id": item.get("tool_use_id"), - "output": item.get("content"), + "output": _transform_message_content( + item.get("content") + ), }, } ) + else: + # Transform content blocks (images, documents, etc.) + transformed_content.append( + _transform_content_block(item) + if isinstance(item, dict) + else item + ) + + # If there are non-tool-result items, add them as a message + if transformed_content and not has_tool_result: + normalized_messages.append( + { + "role": message.get("role"), + "content": transformed_content, + } + ) + elif transformed_content and has_tool_result: + # Mixed content: tool results + other content + normalized_messages.append( + { + "role": message.get("role"), + "content": transformed_content, + } + ) else: - normalized_messages.append(message) + # Transform content for non-list messages or assistant messages + transformed_message = message.copy() + if "content" in transformed_message: + transformed_message["content"] = _transform_message_content( + transformed_message["content"] + ) + normalized_messages.append(transformed_message) role_normalized_messages = normalize_message_roles(normalized_messages) scope = sentry_sdk.get_current_scope() diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index 2204505d47..11ebebf666 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -47,6 +47,8 @@ async def __call__(self, *args, **kwargs): AnthropicIntegration, _set_output_data, _collect_ai_data, + _transform_content_block, + _transform_message_content, ) from sentry_sdk.utils import package_version @@ -1446,3 +1448,705 @@ def test_system_prompt_with_complex_structure(sentry_init, capture_events): assert stored_messages[0]["content"][1]["text"] == "Be concise and clear." assert stored_messages[1]["role"] == "user" assert stored_messages[1]["content"] == "Hello" + + +# Tests for _transform_content_block helper function + + +def test_transform_content_block_base64_image(): + """Test that base64 encoded images are transformed to blob format.""" + content_block = { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/jpeg", + "data": "base64encodeddata...", + }, + } + + result = _transform_content_block(content_block) + + assert result == { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "base64encodeddata...", + } + + +def test_transform_content_block_url_image(): + """Test that URL-referenced images are transformed to uri format.""" + content_block = { + "type": "image", + "source": { + "type": "url", + "url": "https://example.com/image.jpg", + }, + } + + result = _transform_content_block(content_block) + + assert result == { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": "https://example.com/image.jpg", + } + + +def test_transform_content_block_file_image(): + """Test that file_id-referenced images are transformed to file format.""" + content_block = { + "type": "image", + "source": { + "type": "file", + "file_id": "file_abc123", + }, + } + + result = _transform_content_block(content_block) + + assert result == { + "type": "file", + "modality": "image", + "mime_type": "", + "file_id": "file_abc123", + } + + +def test_transform_content_block_base64_document(): + """Test that base64 encoded PDFs are transformed to blob format.""" + content_block = { + "type": "document", + "source": { + "type": "base64", + "media_type": "application/pdf", + "data": "base64encodedpdfdata...", + }, + } + + result = _transform_content_block(content_block) + + assert result == { + "type": "blob", + "modality": "document", + "mime_type": "application/pdf", + "content": "base64encodedpdfdata...", + } + + +def test_transform_content_block_url_document(): + """Test that URL-referenced documents are transformed to uri format.""" + content_block = { + "type": "document", + "source": { + "type": "url", + "url": "https://example.com/document.pdf", + }, + } + + result = _transform_content_block(content_block) + + assert result == { + "type": "uri", + "modality": "document", + "mime_type": "", + "uri": "https://example.com/document.pdf", + } + + +def test_transform_content_block_file_document(): + """Test that file_id-referenced documents are transformed to file format.""" + content_block = { + "type": "document", + "source": { + "type": "file", + "file_id": "file_doc456", + "media_type": "application/pdf", + }, + } + + result = _transform_content_block(content_block) + + assert result == { + "type": "file", + "modality": "document", + "mime_type": "application/pdf", + "file_id": "file_doc456", + } + + +def test_transform_content_block_text_document(): + """Test that plain text documents are transformed correctly.""" + content_block = { + "type": "document", + "source": { + "type": "text", + "media_type": "text/plain", + "data": "This is plain text content.", + }, + } + + result = _transform_content_block(content_block) + + assert result == { + "type": "text", + "text": "This is plain text content.", + } + + +def test_transform_content_block_text_block(): + """Test that regular text blocks are returned as-is.""" + content_block = { + "type": "text", + "text": "Hello, world!", + } + + result = _transform_content_block(content_block) + + assert result == content_block + + +def test_transform_message_content_string(): + """Test that string content is returned as-is.""" + result = _transform_message_content("Hello, world!") + assert result == "Hello, world!" + + +def test_transform_message_content_list(): + """Test that list content is transformed correctly.""" + content = [ + {"type": "text", "text": "Hello!"}, + { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/png", + "data": "base64data...", + }, + }, + ] + + result = _transform_message_content(content) + + assert len(result) == 2 + assert result[0] == {"type": "text", "text": "Hello!"} + assert result[1] == { + "type": "blob", + "modality": "image", + "mime_type": "image/png", + "content": "base64data...", + } + + +# Integration tests for binary data in messages + + +def test_message_with_base64_image(sentry_init, capture_events): + """Test that messages with base64 images are properly captured.""" + sentry_init( + integrations=[AnthropicIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + client = Anthropic(api_key="z") + client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) + + messages = [ + { + "role": "user", + "content": [ + {"type": "text", "text": "What's in this image?"}, + { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/jpeg", + "data": "base64encodeddatahere...", + }, + }, + ], + } + ] + + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") + + assert len(events) == 1 + (event,) = events + (span,) = event["spans"] + + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + + assert len(stored_messages) == 1 + assert stored_messages[0]["role"] == "user" + content = stored_messages[0]["content"] + assert len(content) == 2 + assert content[0] == {"type": "text", "text": "What's in this image?"} + assert content[1] == { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "base64encodeddatahere...", + } + + +def test_message_with_url_image(sentry_init, capture_events): + """Test that messages with URL-referenced images are properly captured.""" + sentry_init( + integrations=[AnthropicIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + client = Anthropic(api_key="z") + client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) + + messages = [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Describe this image."}, + { + "type": "image", + "source": { + "type": "url", + "url": "https://example.com/photo.png", + }, + }, + ], + } + ] + + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") + + assert len(events) == 1 + (event,) = events + (span,) = event["spans"] + + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + content = stored_messages[0]["content"] + assert content[1] == { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": "https://example.com/photo.png", + } + + +def test_message_with_file_image(sentry_init, capture_events): + """Test that messages with file_id-referenced images are properly captured.""" + sentry_init( + integrations=[AnthropicIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + client = Anthropic(api_key="z") + client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) + + messages = [ + { + "role": "user", + "content": [ + {"type": "text", "text": "What do you see?"}, + { + "type": "image", + "source": { + "type": "file", + "file_id": "file_img_12345", + "media_type": "image/webp", + }, + }, + ], + } + ] + + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") + + assert len(events) == 1 + (event,) = events + (span,) = event["spans"] + + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + content = stored_messages[0]["content"] + assert content[1] == { + "type": "file", + "modality": "image", + "mime_type": "image/webp", + "file_id": "file_img_12345", + } + + +def test_message_with_base64_pdf(sentry_init, capture_events): + """Test that messages with base64-encoded PDF documents are properly captured.""" + sentry_init( + integrations=[AnthropicIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + client = Anthropic(api_key="z") + client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) + + messages = [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Summarize this document."}, + { + "type": "document", + "source": { + "type": "base64", + "media_type": "application/pdf", + "data": "JVBERi0xLjQKJeLj...base64pdfdata", + }, + }, + ], + } + ] + + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") + + assert len(events) == 1 + (event,) = events + (span,) = event["spans"] + + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + content = stored_messages[0]["content"] + assert content[1] == { + "type": "blob", + "modality": "document", + "mime_type": "application/pdf", + "content": "JVBERi0xLjQKJeLj...base64pdfdata", + } + + +def test_message_with_url_pdf(sentry_init, capture_events): + """Test that messages with URL-referenced PDF documents are properly captured.""" + sentry_init( + integrations=[AnthropicIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + client = Anthropic(api_key="z") + client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) + + messages = [ + { + "role": "user", + "content": [ + {"type": "text", "text": "What is in this PDF?"}, + { + "type": "document", + "source": { + "type": "url", + "url": "https://example.com/report.pdf", + }, + }, + ], + } + ] + + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") + + assert len(events) == 1 + (event,) = events + (span,) = event["spans"] + + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + content = stored_messages[0]["content"] + assert content[1] == { + "type": "uri", + "modality": "document", + "mime_type": "", + "uri": "https://example.com/report.pdf", + } + + +def test_message_with_file_document(sentry_init, capture_events): + """Test that messages with file_id-referenced documents are properly captured.""" + sentry_init( + integrations=[AnthropicIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + client = Anthropic(api_key="z") + client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) + + messages = [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Analyze this document."}, + { + "type": "document", + "source": { + "type": "file", + "file_id": "file_doc_67890", + "media_type": "application/pdf", + }, + }, + ], + } + ] + + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") + + assert len(events) == 1 + (event,) = events + (span,) = event["spans"] + + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + content = stored_messages[0]["content"] + assert content[1] == { + "type": "file", + "modality": "document", + "mime_type": "application/pdf", + "file_id": "file_doc_67890", + } + + +def test_message_with_mixed_content(sentry_init, capture_events): + """Test that messages with mixed content (text, images, documents) are properly captured.""" + sentry_init( + integrations=[AnthropicIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + client = Anthropic(api_key="z") + client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) + + messages = [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Compare this image with the document."}, + { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/png", + "data": "iVBORw0KGgo...base64imagedata", + }, + }, + { + "type": "image", + "source": { + "type": "url", + "url": "https://example.com/comparison.jpg", + }, + }, + { + "type": "document", + "source": { + "type": "base64", + "media_type": "application/pdf", + "data": "JVBERi0xLjQK...base64pdfdata", + }, + }, + {"type": "text", "text": "Please provide a detailed analysis."}, + ], + } + ] + + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") + + assert len(events) == 1 + (event,) = events + (span,) = event["spans"] + + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + content = stored_messages[0]["content"] + + assert len(content) == 5 + assert content[0] == { + "type": "text", + "text": "Compare this image with the document.", + } + assert content[1] == { + "type": "blob", + "modality": "image", + "mime_type": "image/png", + "content": "iVBORw0KGgo...base64imagedata", + } + assert content[2] == { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": "https://example.com/comparison.jpg", + } + assert content[3] == { + "type": "blob", + "modality": "document", + "mime_type": "application/pdf", + "content": "JVBERi0xLjQK...base64pdfdata", + } + assert content[4] == { + "type": "text", + "text": "Please provide a detailed analysis.", + } + + +def test_message_with_multiple_images_different_formats(sentry_init, capture_events): + """Test that messages with multiple images of different source types are handled.""" + sentry_init( + integrations=[AnthropicIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + client = Anthropic(api_key="z") + client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) + + messages = [ + { + "role": "user", + "content": [ + { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/jpeg", + "data": "base64data1...", + }, + }, + { + "type": "image", + "source": { + "type": "url", + "url": "https://example.com/img2.gif", + }, + }, + { + "type": "image", + "source": { + "type": "file", + "file_id": "file_img_789", + "media_type": "image/webp", + }, + }, + {"type": "text", "text": "Compare these three images."}, + ], + } + ] + + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") + + assert len(events) == 1 + (event,) = events + (span,) = event["spans"] + + stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + content = stored_messages[0]["content"] + + assert len(content) == 4 + assert content[0] == { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "base64data1...", + } + assert content[1] == { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": "https://example.com/img2.gif", + } + assert content[2] == { + "type": "file", + "modality": "image", + "mime_type": "image/webp", + "file_id": "file_img_789", + } + assert content[3] == {"type": "text", "text": "Compare these three images."} + + +def test_binary_content_not_stored_when_pii_disabled(sentry_init, capture_events): + """Test that binary content is not stored when send_default_pii is False.""" + sentry_init( + integrations=[AnthropicIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=False, + ) + events = capture_events() + client = Anthropic(api_key="z") + client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) + + messages = [ + { + "role": "user", + "content": [ + {"type": "text", "text": "What's in this image?"}, + { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/jpeg", + "data": "base64encodeddatahere...", + }, + }, + ], + } + ] + + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") + + assert len(events) == 1 + (event,) = events + (span,) = event["spans"] + + # Messages should not be stored + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + + +def test_binary_content_not_stored_when_prompts_disabled(sentry_init, capture_events): + """Test that binary content is not stored when include_prompts is False.""" + sentry_init( + integrations=[AnthropicIntegration(include_prompts=False)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + client = Anthropic(api_key="z") + client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) + + messages = [ + { + "role": "user", + "content": [ + {"type": "text", "text": "What's in this image?"}, + { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/jpeg", + "data": "base64encodeddatahere...", + }, + }, + ], + } + ] + + with start_transaction(name="anthropic"): + client.messages.create(max_tokens=1024, messages=messages, model="model") + + assert len(events) == 1 + (event,) = events + (span,) = event["spans"] + + # Messages should not be stored + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] From 210c4f1dd3e4e59d336e8c0056ebe5f5ffa983b8 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Thu, 8 Jan 2026 14:51:26 +0100 Subject: [PATCH 06/10] fix(integrations): streamline content transformation and enhance redaction for tool results --- sentry_sdk/integrations/anthropic.py | 40 +++++-------------- .../integrations/anthropic/test_anthropic.py | 10 ++--- 2 files changed, 16 insertions(+), 34 deletions(-) diff --git a/sentry_sdk/integrations/anthropic.py b/sentry_sdk/integrations/anthropic.py index 41f1c387a7..6c17bc169e 100644 --- a/sentry_sdk/integrations/anthropic.py +++ b/sentry_sdk/integrations/anthropic.py @@ -262,40 +262,22 @@ def _set_input_data( and "content" in message and isinstance(message["content"], (list, tuple)) ): - has_tool_result = False transformed_content = [] for item in message["content"]: + # Skip tool_result items - they can contain images/documents + # with nested structures that are difficult to redact properly if isinstance(item, dict) and item.get("type") == "tool_result": - has_tool_result = True - normalized_messages.append( - { - "role": GEN_AI_ALLOWED_MESSAGE_ROLES.TOOL, - "content": { # type: ignore[dict-item] - "tool_use_id": item.get("tool_use_id"), - "output": _transform_message_content( - item.get("content") - ), - }, - } - ) - else: - # Transform content blocks (images, documents, etc.) - transformed_content.append( - _transform_content_block(item) - if isinstance(item, dict) - else item - ) + continue - # If there are non-tool-result items, add them as a message - if transformed_content and not has_tool_result: - normalized_messages.append( - { - "role": message.get("role"), - "content": transformed_content, - } + # Transform content blocks (images, documents, etc.) + transformed_content.append( + _transform_content_block(item) + if isinstance(item, dict) + else item ) - elif transformed_content and has_tool_result: - # Mixed content: tool results + other content + + # If there are non-tool-result items, add them as a message + if transformed_content: normalized_messages.append( { "role": message.get("role"), diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index 11ebebf666..838323b4bd 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -1689,7 +1689,7 @@ def test_message_with_base64_image(sentry_init, capture_events): "type": "blob", "modality": "image", "mime_type": "image/jpeg", - "content": "base64encodeddatahere...", + "content": "[Filtered]", } @@ -1823,7 +1823,7 @@ def test_message_with_base64_pdf(sentry_init, capture_events): "type": "blob", "modality": "document", "mime_type": "application/pdf", - "content": "JVBERi0xLjQKJeLj...base64pdfdata", + "content": "[Filtered]", } @@ -1979,7 +1979,7 @@ def test_message_with_mixed_content(sentry_init, capture_events): "type": "blob", "modality": "image", "mime_type": "image/png", - "content": "iVBORw0KGgo...base64imagedata", + "content": "[Filtered]", } assert content[2] == { "type": "uri", @@ -1991,7 +1991,7 @@ def test_message_with_mixed_content(sentry_init, capture_events): "type": "blob", "modality": "document", "mime_type": "application/pdf", - "content": "JVBERi0xLjQK...base64pdfdata", + "content": "[Filtered]", } assert content[4] == { "type": "text", @@ -2057,7 +2057,7 @@ def test_message_with_multiple_images_different_formats(sentry_init, capture_eve "type": "blob", "modality": "image", "mime_type": "image/jpeg", - "content": "base64data1...", + "content": "[Filtered]", } assert content[1] == { "type": "uri", From febcaa95f42c2e08c17d26f9de82bbf8b3438a13 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Tue, 13 Jan 2026 14:02:04 +0100 Subject: [PATCH 07/10] test: fix test failure --- tests/integrations/anthropic/test_anthropic.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index 838323b4bd..004167a764 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -42,6 +42,7 @@ async def __call__(self, *args, **kwargs): from anthropic.types.content_block import ContentBlock as TextBlock from sentry_sdk import start_transaction, start_span +from sentry_sdk._types import BLOB_DATA_SUBSTITUTE from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.integrations.anthropic import ( AnthropicIntegration, @@ -1689,7 +1690,7 @@ def test_message_with_base64_image(sentry_init, capture_events): "type": "blob", "modality": "image", "mime_type": "image/jpeg", - "content": "[Filtered]", + "content": BLOB_DATA_SUBSTITUTE, } @@ -1823,7 +1824,7 @@ def test_message_with_base64_pdf(sentry_init, capture_events): "type": "blob", "modality": "document", "mime_type": "application/pdf", - "content": "[Filtered]", + "content": BLOB_DATA_SUBSTITUTE, } @@ -1979,7 +1980,7 @@ def test_message_with_mixed_content(sentry_init, capture_events): "type": "blob", "modality": "image", "mime_type": "image/png", - "content": "[Filtered]", + "content": BLOB_DATA_SUBSTITUTE, } assert content[2] == { "type": "uri", @@ -1991,7 +1992,7 @@ def test_message_with_mixed_content(sentry_init, capture_events): "type": "blob", "modality": "document", "mime_type": "application/pdf", - "content": "[Filtered]", + "content": BLOB_DATA_SUBSTITUTE, } assert content[4] == { "type": "text", @@ -2057,7 +2058,7 @@ def test_message_with_multiple_images_different_formats(sentry_init, capture_eve "type": "blob", "modality": "image", "mime_type": "image/jpeg", - "content": "[Filtered]", + "content": BLOB_DATA_SUBSTITUTE, } assert content[1] == { "type": "uri", From cd97a489a88b4a68f50b785b59b260f3fdcc94e0 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Tue, 13 Jan 2026 14:19:31 +0100 Subject: [PATCH 08/10] fix: review comment --- sentry_sdk/integrations/anthropic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/integrations/anthropic.py b/sentry_sdk/integrations/anthropic.py index 6c17bc169e..56e1bcba85 100644 --- a/sentry_sdk/integrations/anthropic.py +++ b/sentry_sdk/integrations/anthropic.py @@ -133,7 +133,7 @@ def _transform_content_block(content_block: "dict[str, Any]") -> "dict[str, Any] # Handle image blocks if block_type == "image": - source = content_block.get("source", {}) + source = content_block.get("source") or {} source_type = source.get("type") media_type = source.get("media_type", "") @@ -161,7 +161,7 @@ def _transform_content_block(content_block: "dict[str, Any]") -> "dict[str, Any] # Handle document blocks (PDFs, etc.) elif block_type == "document": - source = content_block.get("source", {}) + source = content_block.get("source") or {} source_type = source.get("type") media_type = source.get("media_type", "") From bd781654c11ef4f1892ad8891296da92e250bb60 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Thu, 15 Jan 2026 14:01:42 +0100 Subject: [PATCH 09/10] feat(ai): Add shared content transformation functions for multimodal AI messages Add transform_content_part() and transform_message_content() functions to standardize content part handling across all AI integrations. These functions transform various SDK-specific formats (OpenAI, Anthropic, Google, LangChain) into a unified format: - blob: base64-encoded binary data - uri: URL references (including file URIs) - file: file ID references Also adds get_modality_from_mime_type() helper to infer content modality (image/audio/video/document) from MIME types. --- sentry_sdk/ai/utils.py | 237 ++++++++++++++++++ tests/test_ai_monitoring.py | 484 ++++++++++++++++++++++++++++++++++++ 2 files changed, 721 insertions(+) diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index 71f7544a1c..b7b3b790d2 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -72,6 +72,243 @@ def parse_data_uri(url: str) -> "Tuple[str, str]": return mime_type, content +def get_modality_from_mime_type(mime_type: str) -> str: + """ + Infer the content modality from a MIME type string. + + Args: + mime_type: A MIME type string (e.g., "image/jpeg", "audio/mp3") + + Returns: + One of: "image", "audio", "video", or "document" + Defaults to "image" for unknown or empty MIME types. + + Examples: + "image/jpeg" -> "image" + "audio/mp3" -> "audio" + "video/mp4" -> "video" + "application/pdf" -> "document" + "text/plain" -> "document" + """ + if not mime_type: + return "image" # Default fallback + + mime_lower = mime_type.lower() + if mime_lower.startswith("image/"): + return "image" + elif mime_lower.startswith("audio/"): + return "audio" + elif mime_lower.startswith("video/"): + return "video" + elif mime_lower.startswith("application/") or mime_lower.startswith("text/"): + return "document" + else: + return "image" # Default fallback for unknown types + + +def transform_content_part( + content_part: "Dict[str, Any]", +) -> "Optional[Dict[str, Any]]": + """ + Transform a content part from various AI SDK formats to Sentry's standardized format. + + Supported input formats: + - OpenAI/LiteLLM: {"type": "image_url", "image_url": {"url": "..."}} + - Anthropic: {"type": "image|document", "source": {"type": "base64|url|file", ...}} + - Google: {"inline_data": {...}} or {"file_data": {...}} + - Generic: {"type": "image|audio|video|file", "base64|url|file_id": "...", "mime_type": "..."} + + Output format (one of): + - {"type": "blob", "modality": "...", "mime_type": "...", "content": "..."} + - {"type": "uri", "modality": "...", "mime_type": "...", "uri": "..."} + - {"type": "file", "modality": "...", "mime_type": "...", "file_id": "..."} + + Args: + content_part: A dictionary representing a content part from an AI SDK + + Returns: + A transformed dictionary in standardized format, or None if the format + is unrecognized or transformation fails. + """ + if not isinstance(content_part, dict): + return None + + block_type = content_part.get("type") + + # Handle OpenAI/LiteLLM image_url format + # {"type": "image_url", "image_url": {"url": "..."}} or {"type": "image_url", "image_url": "..."} + if block_type == "image_url": + image_url_data = content_part.get("image_url") + if isinstance(image_url_data, str): + url = image_url_data + elif isinstance(image_url_data, dict): + url = image_url_data.get("url", "") + else: + return None + + if not url: + return None + + # Check if it's a data URI (base64 encoded) + if url.startswith("data:"): + try: + mime_type, content = parse_data_uri(url) + return { + "type": "blob", + "modality": get_modality_from_mime_type(mime_type), + "mime_type": mime_type, + "content": content, + } + except ValueError: + # If parsing fails, return as URI + return { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": url, + } + else: + # Regular URL + return { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": url, + } + + # Handle Anthropic format with source dict + # {"type": "image|document", "source": {"type": "base64|url|file", "media_type": "...", "data|url|file_id": "..."}} + if block_type in ("image", "document") and "source" in content_part: + source = content_part.get("source") + if not isinstance(source, dict): + return None + + source_type = source.get("type") + media_type = source.get("media_type", "") + modality = ( + "document" + if block_type == "document" + else get_modality_from_mime_type(media_type) + ) + + if source_type == "base64": + return { + "type": "blob", + "modality": modality, + "mime_type": media_type, + "content": source.get("data", ""), + } + elif source_type == "url": + return { + "type": "uri", + "modality": modality, + "mime_type": media_type, + "uri": source.get("url", ""), + } + elif source_type == "file": + return { + "type": "file", + "modality": modality, + "mime_type": media_type, + "file_id": source.get("file_id", ""), + } + return None + + # Handle Google inline_data format + # {"inline_data": {"mime_type": "...", "data": "..."}} + if "inline_data" in content_part: + inline_data = content_part.get("inline_data") + if isinstance(inline_data, dict): + mime_type = inline_data.get("mime_type", "") + return { + "type": "blob", + "modality": get_modality_from_mime_type(mime_type), + "mime_type": mime_type, + "content": inline_data.get("data", ""), + } + return None + + # Handle Google file_data format + # {"file_data": {"mime_type": "...", "file_uri": "..."}} + if "file_data" in content_part: + file_data = content_part.get("file_data") + if isinstance(file_data, dict): + mime_type = file_data.get("mime_type", "") + return { + "type": "uri", + "modality": get_modality_from_mime_type(mime_type), + "mime_type": mime_type, + "uri": file_data.get("file_uri", ""), + } + return None + + # Handle generic format with direct fields (LangChain style) + # {"type": "image|audio|video|file", "base64|url|file_id": "...", "mime_type": "..."} + if block_type in ("image", "audio", "video", "file"): + mime_type = content_part.get("mime_type", "") + modality = block_type if block_type != "file" else "document" + + # Check for base64 encoded content + if "base64" in content_part: + return { + "type": "blob", + "modality": modality, + "mime_type": mime_type, + "content": content_part.get("base64", ""), + } + # Check for URL reference + elif "url" in content_part: + return { + "type": "uri", + "modality": modality, + "mime_type": mime_type, + "uri": content_part.get("url", ""), + } + # Check for file_id reference + elif "file_id" in content_part: + return { + "type": "file", + "modality": modality, + "mime_type": mime_type, + "file_id": content_part.get("file_id", ""), + } + + # Unrecognized format + return None + + +def transform_message_content(content: "Any") -> "Any": + """ + Transform message content, handling both string content and list of content blocks. + + For list content, each item is transformed using transform_content_part(). + Items that cannot be transformed (return None) are kept as-is. + + Args: + content: Message content - can be a string, list of content blocks, or other + + Returns: + - String content: returned as-is + - List content: list with each transformable item converted to standardized format + - Other: returned as-is + """ + if isinstance(content, str): + return content + + if isinstance(content, (list, tuple)): + transformed = [] + for item in content: + if isinstance(item, dict): + result = transform_content_part(item) + # If transformation succeeded, use the result; otherwise keep original + transformed.append(result if result is not None else item) + else: + transformed.append(item) + return transformed + + return content + + def _normalize_data(data: "Any", unpack: bool = True) -> "Any": # convert pydantic data (e.g. OpenAI v1+) to json compatible format if hasattr(data, "model_dump"): diff --git a/tests/test_ai_monitoring.py b/tests/test_ai_monitoring.py index 1ff354f473..209d24e502 100644 --- a/tests/test_ai_monitoring.py +++ b/tests/test_ai_monitoring.py @@ -19,6 +19,9 @@ _find_truncation_index, parse_data_uri, redact_blob_message_parts, + get_modality_from_mime_type, + transform_content_part, + transform_message_content, ) from sentry_sdk.serializer import serialize from sentry_sdk.utils import safe_serialize @@ -842,3 +845,484 @@ def test_handles_uri_without_data_prefix(self): assert mime_type == "image/jpeg" assert content == "/9j/4AAQ" + + +class TestGetModalityFromMimeType: + def test_image_mime_types(self): + """Test that image MIME types return 'image' modality""" + assert get_modality_from_mime_type("image/jpeg") == "image" + assert get_modality_from_mime_type("image/png") == "image" + assert get_modality_from_mime_type("image/gif") == "image" + assert get_modality_from_mime_type("image/webp") == "image" + assert get_modality_from_mime_type("IMAGE/JPEG") == "image" # case insensitive + + def test_audio_mime_types(self): + """Test that audio MIME types return 'audio' modality""" + assert get_modality_from_mime_type("audio/mp3") == "audio" + assert get_modality_from_mime_type("audio/wav") == "audio" + assert get_modality_from_mime_type("audio/ogg") == "audio" + assert get_modality_from_mime_type("AUDIO/MP3") == "audio" # case insensitive + + def test_video_mime_types(self): + """Test that video MIME types return 'video' modality""" + assert get_modality_from_mime_type("video/mp4") == "video" + assert get_modality_from_mime_type("video/webm") == "video" + assert get_modality_from_mime_type("video/quicktime") == "video" + assert get_modality_from_mime_type("VIDEO/MP4") == "video" # case insensitive + + def test_document_mime_types(self): + """Test that application and text MIME types return 'document' modality""" + assert get_modality_from_mime_type("application/pdf") == "document" + assert get_modality_from_mime_type("application/json") == "document" + assert get_modality_from_mime_type("text/plain") == "document" + assert get_modality_from_mime_type("text/html") == "document" + + def test_empty_mime_type_returns_image(self): + """Test that empty MIME type defaults to 'image'""" + assert get_modality_from_mime_type("") == "image" + + def test_none_mime_type_returns_image(self): + """Test that None-like values default to 'image'""" + assert get_modality_from_mime_type(None) == "image" + + def test_unknown_mime_type_returns_image(self): + """Test that unknown MIME types default to 'image'""" + assert get_modality_from_mime_type("unknown/type") == "image" + assert get_modality_from_mime_type("custom/format") == "image" + + +class TestTransformContentPart: + # OpenAI/LiteLLM format tests + def test_openai_image_url_with_data_uri(self): + """Test transforming OpenAI image_url with base64 data URI""" + content_part = { + "type": "image_url", + "image_url": {"url": "data:image/jpeg;base64,/9j/4AAQSkZJRg=="}, + } + result = transform_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "/9j/4AAQSkZJRg==", + } + + def test_openai_image_url_with_regular_url(self): + """Test transforming OpenAI image_url with regular URL""" + content_part = { + "type": "image_url", + "image_url": {"url": "https://example.com/image.jpg"}, + } + result = transform_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": "https://example.com/image.jpg", + } + + def test_openai_image_url_string_format(self): + """Test transforming OpenAI image_url where image_url is a string""" + content_part = { + "type": "image_url", + "image_url": "https://example.com/image.jpg", + } + result = transform_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": "https://example.com/image.jpg", + } + + def test_openai_image_url_invalid_data_uri(self): + """Test transforming OpenAI image_url with invalid data URI falls back to URI""" + content_part = { + "type": "image_url", + "image_url": {"url": "data:image/jpeg;base64"}, # Missing comma + } + result = transform_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": "data:image/jpeg;base64", + } + + # Anthropic format tests + def test_anthropic_image_base64(self): + """Test transforming Anthropic image with base64 source""" + content_part = { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/png", + "data": "iVBORw0KGgo=", + }, + } + result = transform_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "image", + "mime_type": "image/png", + "content": "iVBORw0KGgo=", + } + + def test_anthropic_image_url(self): + """Test transforming Anthropic image with URL source""" + content_part = { + "type": "image", + "source": { + "type": "url", + "media_type": "image/jpeg", + "url": "https://example.com/image.jpg", + }, + } + result = transform_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "image", + "mime_type": "image/jpeg", + "uri": "https://example.com/image.jpg", + } + + def test_anthropic_image_file(self): + """Test transforming Anthropic image with file source""" + content_part = { + "type": "image", + "source": { + "type": "file", + "media_type": "image/jpeg", + "file_id": "file_123", + }, + } + result = transform_content_part(content_part) + + assert result == { + "type": "file", + "modality": "image", + "mime_type": "image/jpeg", + "file_id": "file_123", + } + + def test_anthropic_document_base64(self): + """Test transforming Anthropic document with base64 source""" + content_part = { + "type": "document", + "source": { + "type": "base64", + "media_type": "application/pdf", + "data": "JVBERi0xLjQ=", + }, + } + result = transform_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "document", + "mime_type": "application/pdf", + "content": "JVBERi0xLjQ=", + } + + def test_anthropic_document_url(self): + """Test transforming Anthropic document with URL source""" + content_part = { + "type": "document", + "source": { + "type": "url", + "media_type": "application/pdf", + "url": "https://example.com/doc.pdf", + }, + } + result = transform_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "document", + "mime_type": "application/pdf", + "uri": "https://example.com/doc.pdf", + } + + # Google format tests + def test_google_inline_data(self): + """Test transforming Google inline_data format""" + content_part = { + "inline_data": { + "mime_type": "image/jpeg", + "data": "/9j/4AAQSkZJRg==", + } + } + result = transform_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "/9j/4AAQSkZJRg==", + } + + def test_google_file_data(self): + """Test transforming Google file_data format""" + content_part = { + "file_data": { + "mime_type": "video/mp4", + "file_uri": "gs://bucket/video.mp4", + } + } + result = transform_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "video", + "mime_type": "video/mp4", + "uri": "gs://bucket/video.mp4", + } + + def test_google_inline_data_audio(self): + """Test transforming Google inline_data with audio""" + content_part = { + "inline_data": { + "mime_type": "audio/wav", + "data": "UklGRiQA", + } + } + result = transform_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "audio", + "mime_type": "audio/wav", + "content": "UklGRiQA", + } + + # Generic format tests (LangChain style) + def test_generic_image_base64(self): + """Test transforming generic format with base64""" + content_part = { + "type": "image", + "base64": "/9j/4AAQSkZJRg==", + "mime_type": "image/jpeg", + } + result = transform_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "/9j/4AAQSkZJRg==", + } + + def test_generic_audio_url(self): + """Test transforming generic format with URL""" + content_part = { + "type": "audio", + "url": "https://example.com/audio.mp3", + "mime_type": "audio/mp3", + } + result = transform_content_part(content_part) + + assert result == { + "type": "uri", + "modality": "audio", + "mime_type": "audio/mp3", + "uri": "https://example.com/audio.mp3", + } + + def test_generic_file_with_file_id(self): + """Test transforming generic format with file_id""" + content_part = { + "type": "file", + "file_id": "file_456", + "mime_type": "application/pdf", + } + result = transform_content_part(content_part) + + assert result == { + "type": "file", + "modality": "document", + "mime_type": "application/pdf", + "file_id": "file_456", + } + + def test_generic_video_base64(self): + """Test transforming generic video format""" + content_part = { + "type": "video", + "base64": "AAAA", + "mime_type": "video/mp4", + } + result = transform_content_part(content_part) + + assert result == { + "type": "blob", + "modality": "video", + "mime_type": "video/mp4", + "content": "AAAA", + } + + # Edge cases and error handling + def test_text_block_returns_none(self): + """Test that text blocks return None (not transformed)""" + content_part = {"type": "text", "text": "Hello world"} + result = transform_content_part(content_part) + + assert result is None + + def test_non_dict_returns_none(self): + """Test that non-dict input returns None""" + assert transform_content_part("string") is None + assert transform_content_part(123) is None + assert transform_content_part(None) is None + assert transform_content_part([1, 2, 3]) is None + + def test_empty_dict_returns_none(self): + """Test that empty dict returns None""" + assert transform_content_part({}) is None + + def test_unknown_type_returns_none(self): + """Test that unknown type returns None""" + content_part = {"type": "unknown", "data": "something"} + assert transform_content_part(content_part) is None + + def test_openai_image_url_empty_url_returns_none(self): + """Test that image_url with empty URL returns None""" + content_part = {"type": "image_url", "image_url": {"url": ""}} + assert transform_content_part(content_part) is None + + def test_anthropic_invalid_source_returns_none(self): + """Test that Anthropic format with invalid source returns None""" + content_part = {"type": "image", "source": "not_a_dict"} + assert transform_content_part(content_part) is None + + def test_anthropic_unknown_source_type_returns_none(self): + """Test that Anthropic format with unknown source type returns None""" + content_part = { + "type": "image", + "source": {"type": "unknown", "data": "something"}, + } + assert transform_content_part(content_part) is None + + def test_google_inline_data_not_dict_returns_none(self): + """Test that Google inline_data with non-dict value returns None""" + content_part = {"inline_data": "not_a_dict"} + assert transform_content_part(content_part) is None + + def test_google_file_data_not_dict_returns_none(self): + """Test that Google file_data with non-dict value returns None""" + content_part = {"file_data": "not_a_dict"} + assert transform_content_part(content_part) is None + + +class TestTransformMessageContent: + def test_string_content_returned_as_is(self): + """Test that string content is returned unchanged""" + content = "Hello, world!" + result = transform_message_content(content) + + assert result == "Hello, world!" + + def test_list_with_transformable_items(self): + """Test transforming a list with transformable content parts""" + content = [ + {"type": "text", "text": "What's in this image?"}, + { + "type": "image_url", + "image_url": {"url": "data:image/jpeg;base64,/9j/4AAQ"}, + }, + ] + result = transform_message_content(content) + + assert len(result) == 2 + # Text block should be unchanged (transform returns None, so original kept) + assert result[0] == {"type": "text", "text": "What's in this image?"} + # Image should be transformed + assert result[1] == { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "/9j/4AAQ", + } + + def test_list_with_non_dict_items(self): + """Test that non-dict items in list are kept as-is""" + content = ["text string", 123, {"type": "text", "text": "hi"}] + result = transform_message_content(content) + + assert result == ["text string", 123, {"type": "text", "text": "hi"}] + + def test_tuple_content(self): + """Test that tuple content is also handled""" + content = ( + {"type": "text", "text": "Hello"}, + { + "type": "image_url", + "image_url": {"url": "https://example.com/img.jpg"}, + }, + ) + result = transform_message_content(content) + + assert len(result) == 2 + assert result[0] == {"type": "text", "text": "Hello"} + assert result[1] == { + "type": "uri", + "modality": "image", + "mime_type": "", + "uri": "https://example.com/img.jpg", + } + + def test_other_types_returned_as_is(self): + """Test that other types are returned unchanged""" + assert transform_message_content(123) == 123 + assert transform_message_content(None) is None + assert transform_message_content({"key": "value"}) == {"key": "value"} + + def test_mixed_content_types(self): + """Test transforming mixed content with multiple formats""" + content = [ + {"type": "text", "text": "Look at these:"}, + { + "type": "image_url", + "image_url": {"url": "data:image/png;base64,iVBORw0"}, + }, + { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/jpeg", + "data": "/9j/4AAQ", + }, + }, + {"inline_data": {"mime_type": "audio/wav", "data": "UklGRiQA"}}, + ] + result = transform_message_content(content) + + assert len(result) == 4 + assert result[0] == {"type": "text", "text": "Look at these:"} + assert result[1] == { + "type": "blob", + "modality": "image", + "mime_type": "image/png", + "content": "iVBORw0", + } + assert result[2] == { + "type": "blob", + "modality": "image", + "mime_type": "image/jpeg", + "content": "/9j/4AAQ", + } + assert result[3] == { + "type": "blob", + "modality": "audio", + "mime_type": "audio/wav", + "content": "UklGRiQA", + } + + def test_empty_list(self): + """Test that empty list is returned as empty list""" + assert transform_message_content([]) == [] From 67c7da6b49fc9eeffe876e71008f854473266a69 Mon Sep 17 00:00:00 2001 From: Fabian Schindler Date: Thu, 15 Jan 2026 14:07:49 +0100 Subject: [PATCH 10/10] refactor(anthropic): Use shared transform_content_part from ai/utils Replace local _transform_content_block and _transform_message_content functions with the shared transform_content_part function. Keep Anthropic-specific handling for text-type documents via wrapper function. --- sentry_sdk/integrations/anthropic.py | 115 ++++-------------- .../integrations/anthropic/test_anthropic.py | 55 +++++---- 2 files changed, 51 insertions(+), 119 deletions(-) diff --git a/sentry_sdk/integrations/anthropic.py b/sentry_sdk/integrations/anthropic.py index 730178ee86..1552b495aa 100644 --- a/sentry_sdk/integrations/anthropic.py +++ b/sentry_sdk/integrations/anthropic.py @@ -11,6 +11,7 @@ normalize_message_roles, truncate_and_annotate_messages, get_start_span_function, + transform_content_part, ) from sentry_sdk.consts import OP, SPANDATA, SPANSTATUS from sentry_sdk.integrations import _check_minimum_version, DidNotEnable, Integration @@ -122,102 +123,25 @@ def _collect_ai_data( return model, input_tokens, output_tokens, content_blocks -def _transform_content_block(content_block: "dict[str, Any]") -> "dict[str, Any]": +def _transform_anthropic_content_block( + content_block: "dict[str, Any]", +) -> "dict[str, Any]": """ - Transform an Anthropic content block to a Sentry-compatible format. - - Handles binary data (images, documents) by converting them to the standardized format: - - base64 encoded data -> type: "blob" - - URL references -> type: "uri" - - file_id references -> type: "file" + Transform an Anthropic content block using the shared transform_content_part function, + with special handling for Anthropic's text-type documents. """ - block_type = content_block.get("type") - - # Handle image blocks - if block_type == "image": - source = content_block.get("source") or {} - source_type = source.get("type") - media_type = source.get("media_type", "") - - if source_type == "base64": - return { - "type": "blob", - "modality": "image", - "mime_type": media_type, - "content": source.get("data", ""), - } - elif source_type == "url": - return { - "type": "uri", - "modality": "image", - "mime_type": media_type, - "uri": source.get("url", ""), - } - elif source_type == "file": - return { - "type": "file", - "modality": "image", - "mime_type": media_type, - "file_id": source.get("file_id", ""), - } - - # Handle document blocks (PDFs, etc.) - elif block_type == "document": - source = content_block.get("source") or {} - source_type = source.get("type") - media_type = source.get("media_type", "") - - if source_type == "base64": - return { - "type": "blob", - "modality": "document", - "mime_type": media_type, - "content": source.get("data", ""), - } - elif source_type == "url": - return { - "type": "uri", - "modality": "document", - "mime_type": media_type, - "uri": source.get("url", ""), - } - elif source_type == "file": - return { - "type": "file", - "modality": "document", - "mime_type": media_type, - "file_id": source.get("file_id", ""), - } - elif source_type == "text": - # Plain text documents - keep as is but mark the type + # Handle Anthropic's text-type documents specially (not covered by shared function) + if content_block.get("type") == "document": + source = content_block.get("source") + if isinstance(source, dict) and source.get("type") == "text": return { "type": "text", "text": source.get("data", ""), } - # For text blocks and other types, return as-is - return content_block - - -def _transform_message_content( - content: "Any", -) -> "Any": - """ - Transform message content, handling both string content and list of content blocks. - """ - if isinstance(content, str): - return content - - if isinstance(content, (list, tuple)): - transformed = [] - for block in content: - if isinstance(block, dict): - transformed.append(_transform_content_block(block)) - else: - transformed.append(block) - return transformed - - return content + # Use shared transformation for standard formats + result = transform_content_part(content_block) + return result if result is not None else content_block def _set_input_data( @@ -273,7 +197,7 @@ def _set_input_data( # Transform content blocks (images, documents, etc.) transformed_content.append( - _transform_content_block(item) + _transform_anthropic_content_block(item) if isinstance(item, dict) else item ) @@ -290,9 +214,14 @@ def _set_input_data( # Transform content for non-list messages or assistant messages transformed_message = message.copy() if "content" in transformed_message: - transformed_message["content"] = _transform_message_content( - transformed_message["content"] - ) + content = transformed_message["content"] + if isinstance(content, (list, tuple)): + transformed_message["content"] = [ + _transform_anthropic_content_block(item) + if isinstance(item, dict) + else item + for item in content + ] normalized_messages.append(transformed_message) role_normalized_messages = normalize_message_roles(normalized_messages) diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index 004167a764..a8b2feba37 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -48,9 +48,9 @@ async def __call__(self, *args, **kwargs): AnthropicIntegration, _set_output_data, _collect_ai_data, - _transform_content_block, - _transform_message_content, + _transform_anthropic_content_block, ) +from sentry_sdk.ai.utils import transform_content_part, transform_message_content from sentry_sdk.utils import package_version @@ -1451,10 +1451,10 @@ def test_system_prompt_with_complex_structure(sentry_init, capture_events): assert stored_messages[1]["content"] == "Hello" -# Tests for _transform_content_block helper function +# Tests for transform_content_part (shared) and _transform_anthropic_content_block helper functions -def test_transform_content_block_base64_image(): +def test_transform_content_part_anthropic_base64_image(): """Test that base64 encoded images are transformed to blob format.""" content_block = { "type": "image", @@ -1465,7 +1465,7 @@ def test_transform_content_block_base64_image(): }, } - result = _transform_content_block(content_block) + result = transform_content_part(content_block) assert result == { "type": "blob", @@ -1475,7 +1475,7 @@ def test_transform_content_block_base64_image(): } -def test_transform_content_block_url_image(): +def test_transform_content_part_anthropic_url_image(): """Test that URL-referenced images are transformed to uri format.""" content_block = { "type": "image", @@ -1485,7 +1485,7 @@ def test_transform_content_block_url_image(): }, } - result = _transform_content_block(content_block) + result = transform_content_part(content_block) assert result == { "type": "uri", @@ -1495,7 +1495,7 @@ def test_transform_content_block_url_image(): } -def test_transform_content_block_file_image(): +def test_transform_content_part_anthropic_file_image(): """Test that file_id-referenced images are transformed to file format.""" content_block = { "type": "image", @@ -1505,7 +1505,7 @@ def test_transform_content_block_file_image(): }, } - result = _transform_content_block(content_block) + result = transform_content_part(content_block) assert result == { "type": "file", @@ -1515,7 +1515,7 @@ def test_transform_content_block_file_image(): } -def test_transform_content_block_base64_document(): +def test_transform_content_part_anthropic_base64_document(): """Test that base64 encoded PDFs are transformed to blob format.""" content_block = { "type": "document", @@ -1526,7 +1526,7 @@ def test_transform_content_block_base64_document(): }, } - result = _transform_content_block(content_block) + result = transform_content_part(content_block) assert result == { "type": "blob", @@ -1536,7 +1536,7 @@ def test_transform_content_block_base64_document(): } -def test_transform_content_block_url_document(): +def test_transform_content_part_anthropic_url_document(): """Test that URL-referenced documents are transformed to uri format.""" content_block = { "type": "document", @@ -1546,7 +1546,7 @@ def test_transform_content_block_url_document(): }, } - result = _transform_content_block(content_block) + result = transform_content_part(content_block) assert result == { "type": "uri", @@ -1556,7 +1556,7 @@ def test_transform_content_block_url_document(): } -def test_transform_content_block_file_document(): +def test_transform_content_part_anthropic_file_document(): """Test that file_id-referenced documents are transformed to file format.""" content_block = { "type": "document", @@ -1567,7 +1567,7 @@ def test_transform_content_block_file_document(): }, } - result = _transform_content_block(content_block) + result = transform_content_part(content_block) assert result == { "type": "file", @@ -1577,8 +1577,8 @@ def test_transform_content_block_file_document(): } -def test_transform_content_block_text_document(): - """Test that plain text documents are transformed correctly.""" +def test_transform_anthropic_content_block_text_document(): + """Test that plain text documents are transformed correctly (Anthropic-specific).""" content_block = { "type": "document", "source": { @@ -1588,7 +1588,8 @@ def test_transform_content_block_text_document(): }, } - result = _transform_content_block(content_block) + # Use Anthropic-specific helper for text-type documents + result = _transform_anthropic_content_block(content_block) assert result == { "type": "text", @@ -1596,26 +1597,27 @@ def test_transform_content_block_text_document(): } -def test_transform_content_block_text_block(): - """Test that regular text blocks are returned as-is.""" +def test_transform_content_part_text_block(): + """Test that regular text blocks return None (not transformed).""" content_block = { "type": "text", "text": "Hello, world!", } - result = _transform_content_block(content_block) + # Shared transform_content_part returns None for text blocks + result = transform_content_part(content_block) - assert result == content_block + assert result is None def test_transform_message_content_string(): """Test that string content is returned as-is.""" - result = _transform_message_content("Hello, world!") + result = transform_message_content("Hello, world!") assert result == "Hello, world!" -def test_transform_message_content_list(): - """Test that list content is transformed correctly.""" +def test_transform_message_content_list_anthropic(): + """Test that list content with Anthropic format is transformed correctly.""" content = [ {"type": "text", "text": "Hello!"}, { @@ -1628,9 +1630,10 @@ def test_transform_message_content_list(): }, ] - result = _transform_message_content(content) + result = transform_message_content(content) assert len(result) == 2 + # Text block stays as-is (transform returns None, keeps original) assert result[0] == {"type": "text", "text": "Hello!"} assert result[1] == { "type": "blob",