From 1f32952d0066a9dc1ff1482cef48c3cbe0acb663 Mon Sep 17 00:00:00 2001
From: Fabian Schindler <fabian.schindler@sentry.io>
Date: Wed, 17 Dec 2025 10:45:45 +0100
Subject: [PATCH 01/14] fix(ai): redact message parts content of type blob

---
 sentry_sdk/ai/utils.py      |  51 +++++++++++++++++
 tests/test_ai_monitoring.py | 106 +++++++++++++++++++++++++++++++++++-
 2 files changed, 156 insertions(+), 1 deletion(-)

diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py
index 1d2b4483c9..73155b0305 100644
--- a/sentry_sdk/ai/utils.py
+++ b/sentry_sdk/ai/utils.py
@@ -5,6 +5,8 @@
 from sys import getsizeof
 from typing import TYPE_CHECKING
 
+from sentry_sdk._types import SENSITIVE_DATA_SUBSTITUTE
+
 if TYPE_CHECKING:
     from typing import Any, Callable, Dict, List, Optional, Tuple
 
@@ -141,6 +143,53 @@ def _find_truncation_index(messages: "List[Dict[str, Any]]", max_bytes: int) ->
     return 0
 
 
+def redact_blob_message_parts(messages):
+    # type: (List[Dict[str, Any]]) -> Tuple[List[Dict[str, Any]], int]
+    """
+    Redact blob message parts from the messages, by removing the "content" key.
+    e.g:
+    {
+        "role": "user",
+        "content": [
+            {
+                "text": "How many ponies do you see in the image?",
+                "type": "text"
+            },
+            {
+                "type": "blob",
+                "modality": "image",
+                "mime_type": "image/jpeg",
+                "content": "data:image/jpeg;base64,..."
+            }
+        ]
+    }
+    becomes:
+    {
+        "role": "user",
+        "content": [
+            {
+                "text": "How many ponies do you see in the image?",
+                "type": "text"
+            },
+            {
+                "type": "blob",
+                "modality": "image",
+                "mime_type": "image/jpeg",
+                "content": "[Filtered]"
+            }
+        ]
+    }
+    """
+
+    for message in messages:
+        content = message.get("content")
+        if isinstance(content, list):
+            for item in content:
+                if item.get("type") == "blob":
+                    item["content"] = SENSITIVE_DATA_SUBSTITUTE
+    return messages
+
+
 def truncate_messages_by_size(
     messages: "List[Dict[str, Any]]",
     max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES,
@@ -186,6 +235,8 @@ def truncate_and_annotate_messages(
     if not messages:
         return None
 
+    messages = redact_blob_message_parts(messages)
+
     truncated_messages, removed_count = truncate_messages_by_size(messages, max_bytes)
     if removed_count > 0:
         scope._gen_ai_original_message_count[span.span_id] = len(messages)
diff --git a/tests/test_ai_monitoring.py b/tests/test_ai_monitoring.py
index 8d3d4ba204..e9f3712cd3 100644
--- a/tests/test_ai_monitoring.py
+++ b/tests/test_ai_monitoring.py
@@ -4,7 +4,7 @@
 import pytest
 
 import sentry_sdk
-from sentry_sdk._types import AnnotatedValue
+from sentry_sdk._types import AnnotatedValue, SENSITIVE_DATA_SUBSTITUTE
 from sentry_sdk.ai.monitoring import ai_track
 from sentry_sdk.ai.utils import (
     MAX_GEN_AI_MESSAGE_BYTES,
@@ -13,6 +13,7 @@
     truncate_and_annotate_messages,
     truncate_messages_by_size,
     _find_truncation_index,
+    redact_blob_message_parts,
 )
 from sentry_sdk.serializer import serialize
 from sentry_sdk.utils import safe_serialize
@@ -542,3 +543,106 @@ def __init__(self):
         assert isinstance(messages_value, AnnotatedValue)
         assert messages_value.metadata["len"] == stored_original_length
         assert len(messages_value.value) == len(truncated_messages)
+
+
+class TestRedactBlobMessageParts:
+    def test_redacts_single_blob_content(self):
+        """Test that blob content is redacted in a message with single blob part"""
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "text": "How many ponies do you see in the image?",
+                        "type": "text",
+                    },
+                    {
+                        "type": "blob",
+                        "modality": "image",
+                        "mime_type": "image/jpeg",
+                        "content": "data:image/jpeg;base64,/9j/4AAQSkZJRg==",
+                    },
+                ],
+            }
+        ]
+
+        result = redact_blob_message_parts(messages)
+
+        assert result == messages  # Returns the same list
+        assert (
+            messages[0]["content"][0]["text"]
+            == "How many ponies do you see in the image?"
+        )
+        assert messages[0]["content"][0]["type"] == "text"
+        assert messages[0]["content"][1]["type"] == "blob"
+        assert messages[0]["content"][1]["modality"] == "image"
+        assert messages[0]["content"][1]["mime_type"] == "image/jpeg"
+        assert messages[0]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE
+
+    def test_redacts_multiple_blob_parts(self):
+        """Test that multiple blob parts in a single message are all redacted"""
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"text": "Compare these images", "type": "text"},
+                    {
+                        "type": "blob",
+                        "modality": "image",
+                        "mime_type": "image/jpeg",
+                        "content": "data:image/jpeg;base64,first_image",
+                    },
+                    {
+                        "type": "blob",
+                        "modality": "image",
+                        "mime_type": "image/png",
+                        "content": "data:image/png;base64,second_image",
+                    },
+                ],
+            }
+        ]
+
+        result = redact_blob_message_parts(messages)
+
+        assert result == messages
+        assert messages[0]["content"][0]["text"] == "Compare these images"
+        assert messages[0]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE
+        assert messages[0]["content"][2]["content"] == SENSITIVE_DATA_SUBSTITUTE
+
+    def test_redacts_blobs_in_multiple_messages(self):
+        """Test that blob parts are redacted across multiple messages"""
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"text": "First message", "type": "text"},
+                    {
+                        "type": "blob",
+                        "modality": "image",
+                        "content": "data:image/jpeg;base64,first",
+                    },
+                ],
+            },
+            {
+                "role": "assistant",
+                "content": "I see the image.",
+            },
+            {
+                "role": "user",
+                "content": [
+                    {"text": "Second message", "type": "text"},
+                    {
+                        "type": "blob",
+                        "modality": "image",
+                        "content": "data:image/jpeg;base64,second",
+                    },
+                ],
+            },
+        ]
+
+        result = redact_blob_message_parts(messages)
+
+        assert result == messages
+        assert messages[0]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE
+        assert messages[1]["content"] == "I see the image."  # Unchanged
+        assert messages[2]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE

From 795bcea241f7777e646a4da14c870a3049bdbe90 Mon Sep 17 00:00:00 2001
From: Fabian Schindler <fabian.schindler@sentry.io>
Date: Wed, 17 Dec 2025 11:05:04 +0100
Subject: [PATCH 02/14] fix(ai): skip non dict messages

---
 sentry_sdk/ai/utils.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py
index 73155b0305..ae507e898b 100644
--- a/sentry_sdk/ai/utils.py
+++ b/sentry_sdk/ai/utils.py
@@ -182,6 +182,9 @@ def redact_blob_message_parts(messages):
     """
 
     for message in messages:
+        if not isinstance(message, dict):
+            continue
+
         content = message.get("content")
         if isinstance(content, list):
             for item in content:

From a623e137d26e982c0d85258256c0ba013f9ecb24 Mon Sep 17 00:00:00 2001
From: Fabian Schindler <fabian.schindler@sentry.io>
Date: Wed, 17 Dec 2025 11:21:43 +0100
Subject: [PATCH 03/14] fix(ai): typing

---
 sentry_sdk/ai/utils.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py
index ae507e898b..1b61c7a113 100644
--- a/sentry_sdk/ai/utils.py
+++ b/sentry_sdk/ai/utils.py
@@ -143,8 +143,9 @@ def _find_truncation_index(messages: "List[Dict[str, Any]]", max_bytes: int) ->
     return 0
 
 
-def redact_blob_message_parts(messages):
-    # type: (List[Dict[str, Any]]) -> Tuple[List[Dict[str, Any]], int]
+def redact_blob_message_parts(
+    messages: "List[Dict[str, Any]]",
+) -> "List[Dict[str, Any]]":
     """
     Redact blob message parts from the messages, by removing the "content" key.
     e.g:

From 3d3ce5bbdca43f14194edbbbee11d3b6dcd6d8a3 Mon Sep 17 00:00:00 2001
From: Fabian Schindler <fabian.schindler@sentry.io>
Date: Wed, 17 Dec 2025 11:37:12 +0100
Subject: [PATCH 04/14] fix(ai): content items may not be dicts

---
 sentry_sdk/ai/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py
index 1b61c7a113..78a64ab737 100644
--- a/sentry_sdk/ai/utils.py
+++ b/sentry_sdk/ai/utils.py
@@ -189,7 +189,7 @@ def redact_blob_message_parts(
         content = message.get("content")
         if isinstance(content, list):
             for item in content:
-                if item.get("type") == "blob":
+                if isinstance(item, dict) and item.get("type") == "blob":
                     item["content"] = SENSITIVE_DATA_SUBSTITUTE
     return messages
 

From ce29e47a2aa0cf7b3bb58a0bfc4c47cc781bfe5b Mon Sep 17 00:00:00 2001
From: Fabian Schindler <fabian.schindler@sentry.io>
Date: Wed, 17 Dec 2025 12:21:28 +0100
Subject: [PATCH 05/14] fix(integrations): OpenAI input messages are now being
 converted to the schema we expect for the `gen_ai.request.messages`

---
 sentry_sdk/integrations/openai.py | 66 ++++++++++++++++++++++++++++++-
 1 file changed, 65 insertions(+), 1 deletion(-)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index 53d464c3c4..79724f389d 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -18,7 +18,7 @@
     safe_serialize,
 )
 
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Dict
 
 if TYPE_CHECKING:
     from typing import Any, Iterable, List, Optional, Callable, AsyncIterator, Iterator
@@ -177,6 +177,68 @@ def _calculate_token_usage(
     )
 
 
+def _convert_message_parts(messages: "List[Dict[str, Any]]") -> "List[Dict[str, Any]]":
+    """
+    Convert the message parts from OpenAI format to the `gen_ai.request.messages` format.
+    e.g:
+    {
+        "role": "user",
+        "content": [
+            {
+                "text": "How many ponies do you see in the image?",
+                "type": "text"
+            },
+            {
+                "type": "image_url",
+                "image_url": {
+                    "url": "data:image/jpeg;base64,...",
+                    "detail": "high"
+                }
+            }
+        ]
+    }
+    becomes:
+    {
+        "role": "user",
+        "content": [
+            {
+                "text": "How many ponies do you see in the image?",
+                "type": "text"
+            },
+            {
+                "type": "blob",
+                "modality": "image",
+                "mime_type": "image/jpeg",
+                "content": "data:image/jpeg;base64,..."
+            }
+        ]
+    }
+    """
+
+    def _map_item(item: "Dict[str, Any]") -> "Dict[str, Any]":
+        if item.get("type") == "image_url":
+            image_url = item.get("image_url") or {}
+            if image_url.get("url", "").startswith("data:"):
+                return {
+                    "type": "blob",
+                    "modality": "image",
+                    "mime_type": item["image_url"]["url"].split(";base64,")[0],
+                    "content": item["image_url"]["url"].split(";base64,")[1],
+                }
+            else:
+                return {
+                    "type": "uri",
+                    "uri": item["image_url"]["url"],
+                }
+        return item
+
+    for message in messages:
+        content = message.get("content")
+        if isinstance(content, list):
+            message["content"] = [_map_item(item) for item in content]
+    return messages
+
+
 def _set_input_data(
     span: "Span",
     kwargs: "dict[str, Any]",
@@ -198,6 +260,8 @@ def _set_input_data(
         and integration.include_prompts
     ):
         normalized_messages = normalize_message_roles(messages)
+        normalized_messages = _convert_message_parts(normalized_messages)
+
         scope = sentry_sdk.get_current_scope()
         messages_data = truncate_and_annotate_messages(normalized_messages, span, scope)
         if messages_data is not None:

From 7074f0b78cd9b33acb552c93512c97c73922faf0 Mon Sep 17 00:00:00 2001
From: Fabian Schindler <fabian.schindler@sentry.io>
Date: Wed, 17 Dec 2025 13:50:47 +0100
Subject: [PATCH 06/14] test(integrations): add test for message conversion

---
 tests/integrations/openai/test_openai.py | 72 ++++++++++++++++++++++++
 1 file changed, 72 insertions(+)

diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index 814289c887..f4f616fad3 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -43,6 +43,7 @@
 from sentry_sdk.integrations.openai import (
     OpenAIIntegration,
     _calculate_token_usage,
+    _convert_message_parts,
 )
 from sentry_sdk.ai.utils import MAX_GEN_AI_MESSAGE_BYTES
 from sentry_sdk._types import AnnotatedValue
@@ -1509,6 +1510,77 @@ def test_openai_message_role_mapping(sentry_init, capture_events):
     assert "ai" not in roles
 
 
+def test_convert_message_parts_image_url_to_blob():
+    """Test that OpenAI image_url message parts are correctly converted to blob format"""
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "text": "How many ponies do you see in the image?",
+                    "type": "text",
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": "data:image/jpeg;base64,/9j/4AAQSkZJRg==",
+                        "detail": "high",
+                    },
+                },
+            ],
+        }
+    ]
+
+    converted = _convert_message_parts(messages)
+
+    assert len(converted) == 1
+    assert converted[0]["role"] == "user"
+    assert isinstance(converted[0]["content"], list)
+    assert len(converted[0]["content"]) == 2
+
+    # First item (text) should remain unchanged
+    assert converted[0]["content"][0] == {
+        "text": "How many ponies do you see in the image?",
+        "type": "text",
+    }
+
+    # Second item (image_url) should be converted to blob format
+    blob_item = converted[0]["content"][1]
+    assert blob_item["type"] == "blob"
+    assert blob_item["modality"] == "image"
+    assert blob_item["mime_type"] == "data:image/jpeg"
+    assert blob_item["content"] == "/9j/4AAQSkZJRg=="
+    # Verify the original image_url structure is replaced
+    assert "image_url" not in blob_item
+
+
+def test_convert_message_parts_image_url_to_uri():
+    """Test that OpenAI image_url with non-data URLs are converted to uri format"""
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": "https://example.com/image.jpg",
+                        "detail": "low",
+                    },
+                },
+            ],
+        }
+    ]
+
+    converted = _convert_message_parts(messages)
+
+    assert len(converted) == 1
+    uri_item = converted[0]["content"][0]
+    assert uri_item["type"] == "uri"
+    assert uri_item["uri"] == "https://example.com/image.jpg"
+    # Verify the original image_url structure is replaced
+    assert "image_url" not in uri_item
+
+
 def test_openai_message_truncation(sentry_init, capture_events):
     """Test that large messages are truncated properly in OpenAI integration."""
     sentry_init(

From e8a1adc45ac06c1b12dbab28df7703ed19bd5656 Mon Sep 17 00:00:00 2001
From: Fabian Schindler <fabian.schindler@sentry.io>
Date: Thu, 8 Jan 2026 09:34:15 +0100
Subject: [PATCH 07/14] feat(integrations): add transformation functions for
 OpenAI Agents content and update message handling

---
 .../openai_agents/spans/invoke_agent.py       |  52 +++++--
 .../integrations/openai_agents/utils.py       | 128 +++++++++++++++++-
 .../openai_agents/test_openai_agents.py       |  94 ++++++++++++-
 3 files changed, 258 insertions(+), 16 deletions(-)

diff --git a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py
index c3a3a04dc9..1e2d7e758c 100644
--- a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py
+++ b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py
@@ -3,6 +3,7 @@
     get_start_span_function,
     set_data_normalized,
     normalize_message_roles,
+    normalize_message_role,
     truncate_and_annotate_messages,
 )
 from sentry_sdk.consts import OP, SPANDATA
@@ -10,7 +11,11 @@
 from sentry_sdk.utils import safe_serialize
 
 from ..consts import SPAN_ORIGIN
-from ..utils import _set_agent_data, _set_usage_data
+from ..utils import (
+    _set_agent_data,
+    _set_usage_data,
+    _transform_openai_agents_message_content,
+)
 
 from typing import TYPE_CHECKING
 
@@ -49,17 +54,40 @@ def invoke_agent_span(
 
         original_input = kwargs.get("original_input")
         if original_input is not None:
-            message = (
-                original_input
-                if isinstance(original_input, str)
-                else safe_serialize(original_input)
-            )
-            messages.append(
-                {
-                    "content": [{"text": message, "type": "text"}],
-                    "role": "user",
-                }
-            )
+            if isinstance(original_input, str):
+                # String input: wrap in text block
+                messages.append(
+                    {
+                        "content": [{"text": original_input, "type": "text"}],
+                        "role": "user",
+                    }
+                )
+            elif isinstance(original_input, list) and len(original_input) > 0:
+                # Check if list contains message objects (with type="message")
+                # or content parts (input_text, input_image, etc.)
+                first_item = original_input[0]
+                if isinstance(first_item, dict) and first_item.get("type") == "message":
+                    # List of message objects - process each individually
+                    for msg in original_input:
+                        if isinstance(msg, dict) and msg.get("type") == "message":
+                            role = normalize_message_role(msg.get("role", "user"))
+                            content = msg.get("content")
+                            transformed = _transform_openai_agents_message_content(
+                                content
+                            )
+                            if isinstance(transformed, str):
+                                transformed = [{"text": transformed, "type": "text"}]
+                            elif not isinstance(transformed, list):
+                                transformed = [
+                                    {"text": str(transformed), "type": "text"}
+                                ]
+                            messages.append({"content": transformed, "role": role})
+                else:
+                    # List of content parts - transform and wrap as user message
+                    content = _transform_openai_agents_message_content(original_input)
+                    if not isinstance(content, list):
+                        content = [{"text": str(content), "type": "text"}]
+                    messages.append({"content": content, "role": "user"})
 
         if len(messages) > 0:
             normalized_messages = normalize_message_roles(messages)
diff --git a/sentry_sdk/integrations/openai_agents/utils.py b/sentry_sdk/integrations/openai_agents/utils.py
index a24d0e909d..1f78856512 100644
--- a/sentry_sdk/integrations/openai_agents/utils.py
+++ b/sentry_sdk/integrations/openai_agents/utils.py
@@ -27,6 +27,126 @@
     raise DidNotEnable("OpenAI Agents not installed")
 
 
+def _transform_openai_agents_content_part(
+    content_part: "dict[str, Any]",
+) -> "dict[str, Any]":
+    """
+    Transform an OpenAI Agents content part to Sentry-compatible format.
+
+    Handles multimodal content (images, audio, files) by converting them
+    to the standardized format:
+    - base64 encoded data -> type: "blob"
+    - URL references -> type: "uri"
+    - file_id references -> type: "file"
+    """
+    if not isinstance(content_part, dict):
+        return content_part
+
+    part_type = content_part.get("type")
+
+    # Handle input_text (OpenAI Agents SDK text format) -> normalize to standard text format
+    if part_type == "input_text":
+        return {
+            "type": "text",
+            "text": content_part.get("text", ""),
+        }
+
+    # Handle image_url (OpenAI vision format) and input_image (OpenAI Agents SDK format)
+    if part_type in ("image_url", "input_image"):
+        # Get URL from either format
+        if part_type == "image_url":
+            image_url = content_part.get("image_url", {})
+            url = (
+                image_url.get("url", "")
+                if isinstance(image_url, dict)
+                else str(image_url)
+            )
+        else:
+            # input_image format has image_url directly
+            url = content_part.get("image_url", "")
+
+        if url.startswith("data:"):
+            # Parse data URI: data:image/jpeg;base64,/9j/4AAQ...
+            try:
+                header, content = url.split(",", 1)
+                mime_type = header.split(":")[1].split(";")[0] if ":" in header else ""
+                return {
+                    "type": "blob",
+                    "modality": "image",
+                    "mime_type": mime_type,
+                    "content": content,
+                }
+            except (ValueError, IndexError):
+                # If parsing fails, return as URI
+                return {
+                    "type": "uri",
+                    "modality": "image",
+                    "mime_type": "",
+                    "uri": url,
+                }
+        else:
+            return {
+                "type": "uri",
+                "modality": "image",
+                "mime_type": "",
+                "uri": url,
+            }
+
+    # Handle input_audio (OpenAI audio input format)
+    if part_type == "input_audio":
+        input_audio = content_part.get("input_audio", {})
+        audio_format = input_audio.get("format", "")
+        mime_type = f"audio/{audio_format}" if audio_format else ""
+        return {
+            "type": "blob",
+            "modality": "audio",
+            "mime_type": mime_type,
+            "content": input_audio.get("data", ""),
+        }
+
+    # Handle image_file (Assistants API file-based images)
+    if part_type == "image_file":
+        image_file = content_part.get("image_file", {})
+        return {
+            "type": "file",
+            "modality": "image",
+            "mime_type": "",
+            "file_id": image_file.get("file_id", ""),
+        }
+
+    # Handle file (document attachments)
+    if part_type == "file":
+        file_data = content_part.get("file", {})
+        return {
+            "type": "file",
+            "modality": "document",
+            "mime_type": "",
+            "file_id": file_data.get("file_id", ""),
+        }
+
+    return content_part
+
+
+def _transform_openai_agents_message_content(content: "Any") -> "Any":
+    """
+    Transform OpenAI Agents message content, handling both string content and
+    list of content parts.
+    """
+    if isinstance(content, str):
+        return content
+
+    if isinstance(content, (list, tuple)):
+        transformed = []
+        for item in content:
+            if isinstance(item, dict):
+                transformed.append(_transform_openai_agents_content_part(item))
+            else:
+                transformed.append(item)
+        return transformed
+
+    return content
+
+
 def _capture_exception(exc: "Any") -> None:
     set_span_errored()
 
@@ -128,13 +248,15 @@ def _set_input_data(
         if "role" in message:
             normalized_role = normalize_message_role(message.get("role"))
             content = message.get("content")
+            # Transform content to handle multimodal data (images, audio, files)
+            transformed_content = _transform_openai_agents_message_content(content)
             request_messages.append(
                 {
                     "role": normalized_role,
                     "content": (
-                        [{"type": "text", "text": content}]
-                        if isinstance(content, str)
-                        else content
+                        [{"type": "text", "text": transformed_content}]
+                        if isinstance(transformed_content, str)
+                        else transformed_content
                     ),
                 }
             )
diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index c5cb25dfee..9eede6c54b 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -9,7 +9,12 @@
 from sentry_sdk import start_span
 from sentry_sdk.consts import SPANDATA
 from sentry_sdk.integrations.openai_agents import OpenAIAgentsIntegration
-from sentry_sdk.integrations.openai_agents.utils import _set_input_data, safe_serialize
+from sentry_sdk.integrations.openai_agents.utils import (
+    _set_input_data,
+    safe_serialize,
+    _transform_openai_agents_content_part,
+    _transform_openai_agents_message_content,
+)
 from sentry_sdk.utils import parse_version
 
 import agents
@@ -1998,3 +2003,90 @@ def test_openai_agents_message_truncation(sentry_init, capture_events):
         assert len(parsed_messages) == 2
         assert "small message 4" in str(parsed_messages[0])
         assert "small message 5" in str(parsed_messages[1])
+
+
+def test_transform_image_url_to_blob():
+    """Test that OpenAI image_url with data URI is converted to blob format."""
+    content_part = {
+        "type": "image_url",
+        "image_url": {
+            "url": "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD",
+            "detail": "high",
+        },
+    }
+    result = _transform_openai_agents_content_part(content_part)
+    assert result == {
+        "type": "blob",
+        "modality": "image",
+        "mime_type": "image/jpeg",
+        "content": "/9j/4AAQSkZJRgABAQAAAQABAAD",
+    }
+
+
+def test_transform_image_url_to_uri():
+    """Test that OpenAI image_url with HTTP URL is converted to uri format."""
+    content_part = {
+        "type": "image_url",
+        "image_url": {
+            "url": "https://example.com/image.jpg",
+            "detail": "low",
+        },
+    }
+    result = _transform_openai_agents_content_part(content_part)
+    assert result == {
+        "type": "uri",
+        "modality": "image",
+        "mime_type": "",
+        "uri": "https://example.com/image.jpg",
+    }
+
+
+def test_transform_message_content_with_image():
+    """Test that message content with image is properly transformed."""
+    content = [
+        {"type": "text", "text": "What is in this image?"},
+        {
+            "type": "image_url",
+            "image_url": {
+                "url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUg==",
+            },
+        },
+    ]
+    result = _transform_openai_agents_message_content(content)
+    assert len(result) == 2
+    assert result[0] == {"type": "text", "text": "What is in this image?"}
+    assert result[1] == {
+        "type": "blob",
+        "modality": "image",
+        "mime_type": "image/png",
+        "content": "iVBORw0KGgoAAAANSUhEUg==",
+    }
+
+
+def test_transform_input_image_to_blob():
+    """Test that OpenAI Agents SDK input_image format is converted to blob format."""
+    # OpenAI Agents SDK uses input_image type with image_url as a direct string
+    content_part = {
+        "type": "input_image",
+        "image_url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUg==",
+    }
+    result = _transform_openai_agents_content_part(content_part)
+    assert result == {
+        "type": "blob",
+        "modality": "image",
+        "mime_type": "image/png",
+        "content": "iVBORw0KGgoAAAANSUhEUg==",
+    }
+
+
+def test_transform_input_text_to_text():
+    """Test that OpenAI Agents SDK input_text format is normalized to text format."""
+    content_part = {
+        "type": "input_text",
+        "text": "Hello, world!",
+    }
+    result = _transform_openai_agents_content_part(content_part)
+    assert result == {
+        "type": "text",
+        "text": "Hello, world!",
+    }

From c1a2239c7946ef95703c22bb6d879deed7368895 Mon Sep 17 00:00:00 2001
From: Fabian Schindler <fabian.schindler@sentry.io>
Date: Thu, 8 Jan 2026 15:19:04 +0100
Subject: [PATCH 08/14] feat(ai): implement parse_data_uri function and
 integrate it into OpenAI message handling

---
 sentry_sdk/ai/utils.py                        | 33 +++++++++
 sentry_sdk/integrations/openai.py             | 29 +++++---
 .../integrations/openai_agents/utils.py       |  7 +-
 tests/integrations/openai/test_openai.py      | 30 ++++++++-
 tests/test_ai_monitoring.py                   | 67 +++++++++++++++++++
 5 files changed, 153 insertions(+), 13 deletions(-)

diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py
index 78a64ab737..6fdf763ad6 100644
--- a/sentry_sdk/ai/utils.py
+++ b/sentry_sdk/ai/utils.py
@@ -40,6 +40,39 @@ class GEN_AI_ALLOWED_MESSAGE_ROLES:
         GEN_AI_MESSAGE_ROLE_MAPPING[source_role] = target_role
 
 
+def parse_data_uri(url):
+    # type: (str) -> Tuple[str, str]
+    """
+    Parse a data URI and return (mime_type, content).
+
+    Data URI format (RFC 2397): data:[<mediatype>][;base64],<data>
+
+    Examples:
+        data:image/jpeg;base64,/9j/4AAQ... → ("image/jpeg", "/9j/4AAQ...")
+        data:text/plain,Hello → ("text/plain", "Hello")
+        data:;base64,SGVsbG8= → ("", "SGVsbG8=")
+
+    Raises:
+        ValueError: If the URL is not a valid data URI (missing comma separator)
+    """
+    if "," not in url:
+        raise ValueError("Invalid data URI: missing comma separator")
+
+    header, content = url.split(",", 1)
+
+    # Extract mime type from header
+    # Format: "data:<mime>[;param1][;param2]..." e.g. "data:image/jpeg;base64"
+    # Remove "data:" prefix, then take everything before the first semicolon
+    if header.startswith("data:"):
+        mime_part = header[5:]  # Remove "data:" prefix
+    else:
+        mime_part = header
+
+    mime_type = mime_part.split(";")[0]
+
+    return mime_type, content
+
+
 def _normalize_data(data: "Any", unpack: bool = True) -> "Any":
     # convert pydantic data (e.g. OpenAI v1+) to json compatible format
     if hasattr(data, "model_dump"):
diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index 79724f389d..fc41d79bf8 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -6,6 +6,7 @@
 from sentry_sdk.ai.utils import (
     set_data_normalized,
     normalize_message_roles,
+    parse_data_uri,
     truncate_and_annotate_messages,
 )
 from sentry_sdk.consts import SPANDATA
@@ -218,21 +219,33 @@ def _convert_message_parts(messages: "List[Dict[str, Any]]") -> "List[Dict[str,
     def _map_item(item: "Dict[str, Any]") -> "Dict[str, Any]":
         if item.get("type") == "image_url":
             image_url = item.get("image_url") or {}
-            if image_url.get("url", "").startswith("data:"):
-                return {
-                    "type": "blob",
-                    "modality": "image",
-                    "mime_type": item["image_url"]["url"].split(";base64,")[0],
-                    "content": item["image_url"]["url"].split(";base64,")[1],
-                }
+            url = image_url.get("url", "")
+            if url.startswith("data:"):
+                try:
+                    mime_type, content = parse_data_uri(url)
+                    return {
+                        "type": "blob",
+                        "modality": "image",
+                        "mime_type": mime_type,
+                        "content": content,
+                    }
+                except ValueError:
+                    # If parsing fails, return as URI
+                    return {
+                        "type": "uri",
+                        "modality": "image",
+                        "uri": url,
+                    }
             else:
                 return {
                     "type": "uri",
-                    "uri": item["image_url"]["url"],
+                    "uri": url,
                 }
         return item
 
     for message in messages:
+        if not isinstance(message, dict):
+            continue
         content = message.get("content")
         if isinstance(content, list):
             message["content"] = [_map_item(item) for item in content]
diff --git a/sentry_sdk/integrations/openai_agents/utils.py b/sentry_sdk/integrations/openai_agents/utils.py
index 1f78856512..a95fac422a 100644
--- a/sentry_sdk/integrations/openai_agents/utils.py
+++ b/sentry_sdk/integrations/openai_agents/utils.py
@@ -2,6 +2,7 @@
 from sentry_sdk.ai.utils import (
     GEN_AI_ALLOWED_MESSAGE_ROLES,
     normalize_message_roles,
+    parse_data_uri,
     set_data_normalized,
     normalize_message_role,
     truncate_and_annotate_messages,
@@ -66,17 +67,15 @@ def _transform_openai_agents_content_part(
             url = content_part.get("image_url", "")
 
         if url.startswith("data:"):
-            # Parse data URI: data:image/jpeg;base64,/9j/4AAQ...
             try:
-                header, content = url.split(",", 1)
-                mime_type = header.split(":")[1].split(";")[0] if ":" in header else ""
+                mime_type, content = parse_data_uri(url)
                 return {
                     "type": "blob",
                     "modality": "image",
                     "mime_type": mime_type,
                     "content": content,
                 }
-            except (ValueError, IndexError):
+            except ValueError:
                 # If parsing fails, return as URI
                 return {
                     "type": "uri",
diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index f4f616fad3..3f971afaee 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -1548,7 +1548,7 @@ def test_convert_message_parts_image_url_to_blob():
     blob_item = converted[0]["content"][1]
     assert blob_item["type"] == "blob"
     assert blob_item["modality"] == "image"
-    assert blob_item["mime_type"] == "data:image/jpeg"
+    assert blob_item["mime_type"] == "image/jpeg"
     assert blob_item["content"] == "/9j/4AAQSkZJRg=="
     # Verify the original image_url structure is replaced
     assert "image_url" not in blob_item
@@ -1581,6 +1581,34 @@ def test_convert_message_parts_image_url_to_uri():
     assert "image_url" not in uri_item
 
 
+def test_convert_message_parts_malformed_data_uri():
+    """Test that malformed data URIs are handled gracefully without crashing"""
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        # Malformed: missing ;base64, and comma separator
+                        "url": "data:image/jpeg",
+                    },
+                },
+            ],
+        }
+    ]
+
+    # Should not raise an exception
+    converted = _convert_message_parts(messages)
+
+    assert len(converted) == 1
+    # Malformed data URI should fall back to uri type
+    item = converted[0]["content"][0]
+    assert item["type"] == "uri"
+    assert item["uri"] == "data:image/jpeg"
+    assert item["modality"] == "image"
+
+
 def test_openai_message_truncation(sentry_init, capture_events):
     """Test that large messages are truncated properly in OpenAI integration."""
     sentry_init(
diff --git a/tests/test_ai_monitoring.py b/tests/test_ai_monitoring.py
index e9f3712cd3..9e86aac5d4 100644
--- a/tests/test_ai_monitoring.py
+++ b/tests/test_ai_monitoring.py
@@ -13,6 +13,7 @@
     truncate_and_annotate_messages,
     truncate_messages_by_size,
     _find_truncation_index,
+    parse_data_uri,
     redact_blob_message_parts,
 )
 from sentry_sdk.serializer import serialize
@@ -646,3 +647,69 @@ def test_redacts_blobs_in_multiple_messages(self):
         assert messages[0]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE
         assert messages[1]["content"] == "I see the image."  # Unchanged
         assert messages[2]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE
+
+
+class TestParseDataUri:
+    """Tests for the parse_data_uri utility function."""
+
+    def test_standard_base64_image(self):
+        """Test parsing a standard base64 encoded image data URI."""
+        url = "data:image/jpeg;base64,/9j/4AAQSkZJRg=="
+        mime_type, content = parse_data_uri(url)
+        assert mime_type == "image/jpeg"
+        assert content == "/9j/4AAQSkZJRg=="
+
+    def test_png_image(self):
+        """Test parsing a PNG image data URI."""
+        url = "data:image/png;base64,iVBORw0KGgo="
+        mime_type, content = parse_data_uri(url)
+        assert mime_type == "image/png"
+        assert content == "iVBORw0KGgo="
+
+    def test_plain_text_without_base64(self):
+        """Test parsing a plain text data URI without base64 encoding."""
+        url = "data:text/plain,Hello%20World"
+        mime_type, content = parse_data_uri(url)
+        assert mime_type == "text/plain"
+        assert content == "Hello%20World"
+
+    def test_no_mime_type_with_base64(self):
+        """Test parsing a data URI with no mime type but base64 encoding."""
+        url = "data:;base64,SGVsbG8="
+        mime_type, content = parse_data_uri(url)
+        assert mime_type == ""
+        assert content == "SGVsbG8="
+
+    def test_no_mime_type_no_base64(self):
+        """Test parsing a minimal data URI."""
+        url = "data:,Hello"
+        mime_type, content = parse_data_uri(url)
+        assert mime_type == ""
+        assert content == "Hello"
+
+    def test_content_with_commas(self):
+        """Test that content with commas is handled correctly."""
+        url = "data:text/csv,a,b,c,d"
+        mime_type, content = parse_data_uri(url)
+        assert mime_type == "text/csv"
+        assert content == "a,b,c,d"
+
+    def test_missing_comma_raises_value_error(self):
+        """Test that a data URI without a comma raises ValueError."""
+        url = "data:image/jpeg"
+        with pytest.raises(ValueError, match="missing comma separator"):
+            parse_data_uri(url)
+
+    def test_empty_content(self):
+        """Test parsing a data URI with empty content."""
+        url = "data:text/plain,"
+        mime_type, content = parse_data_uri(url)
+        assert mime_type == "text/plain"
+        assert content == ""
+
+    def test_mime_type_with_charset(self):
+        """Test parsing a data URI with charset parameter."""
+        url = "data:text/html;charset=utf-8,<h1>Hello</h1>"
+        mime_type, content = parse_data_uri(url)
+        assert mime_type == "text/html"
+        assert content == "<h1>Hello</h1>"

From 04b27f4c183b080582f06dc6e8e0d07a23cc12fc Mon Sep 17 00:00:00 2001
From: Fabian Schindler <fabian.schindler@sentry.io>
Date: Tue, 13 Jan 2026 14:17:01 +0100
Subject: [PATCH 09/14] fix: review comment

---
 sentry_sdk/integrations/openai.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index 5cdd674a93..ea7ee25063 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -220,6 +220,9 @@ def _convert_message_parts(messages: "List[Dict[str, Any]]") -> "List[Dict[str,
     """
 
     def _map_item(item: "Dict[str, Any]") -> "Dict[str, Any]":
+        if not isinstance(item, dict):
+            return item
+
         if item.get("type") == "image_url":
             image_url = item.get("image_url") or {}
             url = image_url.get("url", "")
@@ -242,6 +245,7 @@ def _map_item(item: "Dict[str, Any]") -> "Dict[str, Any]":
             else:
                 return {
                     "type": "uri",
+                    "modality": "image",
                     "uri": url,
                 }
         return item

From b74bdb9c00f4252cfdf8578bf4eb94864d4b2537 Mon Sep 17 00:00:00 2001
From: Fabian Schindler <fabian.schindler@sentry.io>
Date: Wed, 14 Jan 2026 17:19:40 +0100
Subject: [PATCH 10/14] fix(integrations): addressing review comments

---
 .../integrations/openai_agents/utils.py       | 59 ++++++++-------
 .../openai_agents/test_openai_agents.py       | 72 +++++++++++++++++++
 2 files changed, 106 insertions(+), 25 deletions(-)

diff --git a/sentry_sdk/integrations/openai_agents/utils.py b/sentry_sdk/integrations/openai_agents/utils.py
index a95fac422a..7e31d4621a 100644
--- a/sentry_sdk/integrations/openai_agents/utils.py
+++ b/sentry_sdk/integrations/openai_agents/utils.py
@@ -56,7 +56,7 @@ def _transform_openai_agents_content_part(
     if part_type in ("image_url", "input_image"):
         # Get URL from either format
         if part_type == "image_url":
-            image_url = content_part.get("image_url", {})
+            image_url = content_part.get("image_url") or {}
             url = (
                 image_url.get("url", "")
                 if isinstance(image_url, dict)
@@ -64,7 +64,7 @@ def _transform_openai_agents_content_part(
             )
         else:
             # input_image format has image_url directly
-            url = content_part.get("image_url", "")
+            url = content_part.get("image_url") or ""
 
         if url.startswith("data:"):
             try:
@@ -93,35 +93,44 @@ def _transform_openai_agents_content_part(
 
     # Handle input_audio (OpenAI audio input format)
     if part_type == "input_audio":
-        input_audio = content_part.get("input_audio", {})
-        audio_format = input_audio.get("format", "")
-        mime_type = f"audio/{audio_format}" if audio_format else ""
-        return {
-            "type": "blob",
-            "modality": "audio",
-            "mime_type": mime_type,
-            "content": input_audio.get("data", ""),
-        }
+        input_audio = content_part.get("input_audio") or {}
+        if isinstance(input_audio, dict):
+            audio_format = input_audio.get("format", "")
+            mime_type = f"audio/{audio_format}" if audio_format else ""
+            return {
+                "type": "blob",
+                "modality": "audio",
+                "mime_type": mime_type,
+                "content": input_audio.get("data", ""),
+            }
+        else:
+            return content_part
 
     # Handle image_file (Assistants API file-based images)
     if part_type == "image_file":
-        image_file = content_part.get("image_file", {})
-        return {
-            "type": "file",
-            "modality": "image",
-            "mime_type": "",
-            "file_id": image_file.get("file_id", ""),
-        }
+        image_file = content_part.get("image_file") or {}
+        if isinstance(image_file, dict):
+            return {
+                "type": "file",
+                "modality": "image",
+                "mime_type": "",
+                "file_id": image_file.get("file_id", ""),
+            }
+        else:
+            return content_part
 
     # Handle file (document attachments)
     if part_type == "file":
-        file_data = content_part.get("file", {})
-        return {
-            "type": "file",
-            "modality": "document",
-            "mime_type": "",
-            "file_id": file_data.get("file_id", ""),
-        }
+        file_data = content_part.get("file") or {}
+        if isinstance(file_data, dict):
+            return {
+                "type": "file",
+                "modality": "document",
+                "mime_type": "",
+                "file_id": file_data.get("file_id", ""),
+            }
+        else:
+            return content_part
 
     return content_part
 
diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index 9eede6c54b..43fabb8d60 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -2005,6 +2005,78 @@ def test_openai_agents_message_truncation(sentry_init, capture_events):
         assert "small message 5" in str(parsed_messages[1])
 
 
+def test_transform_does_not_modify_original():
+    """Test that transformation does not modify the original content."""
+    import copy
+
+    content_part = {
+        "type": "image_url",
+        "image_url": {
+            "url": "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD",
+            "detail": "high",
+        },
+    }
+    original = copy.deepcopy(content_part)
+    _transform_openai_agents_content_part(content_part)
+    assert content_part == original, "Original content_part should not be modified"
+
+    content = [
+        {"type": "text", "text": "What is in this image?"},
+        {
+            "type": "image_url",
+            "image_url": {
+                "url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUg==",
+            },
+        },
+    ]
+    original_content = copy.deepcopy(content)
+    _transform_openai_agents_message_content(content)
+    assert content == original_content, "Original content list should not be modified"
+
+
+def test_transform_handles_none_values():
+    """Test that transformation handles None values gracefully without crashing."""
+    # input_image with image_url explicitly set to None - should not crash
+    content_part = {"type": "input_image", "image_url": None}
+    result = _transform_openai_agents_content_part(content_part)
+    assert result == {"type": "uri", "modality": "image", "mime_type": "", "uri": ""}
+
+    # image_url with nested dict set to None - should not crash
+    content_part = {"type": "image_url", "image_url": None}
+    result = _transform_openai_agents_content_part(content_part)
+    assert result == {"type": "uri", "modality": "image", "mime_type": "", "uri": ""}
+
+    # input_audio with None value - gracefully returns empty blob
+    content_part = {"type": "input_audio", "input_audio": None}
+    result = _transform_openai_agents_content_part(content_part)
+    assert result == {
+        "type": "blob",
+        "modality": "audio",
+        "mime_type": "",
+        "content": "",
+    }
+
+    # image_file with None value - gracefully returns empty file reference
+    content_part = {"type": "image_file", "image_file": None}
+    result = _transform_openai_agents_content_part(content_part)
+    assert result == {
+        "type": "file",
+        "modality": "image",
+        "mime_type": "",
+        "file_id": "",
+    }
+
+    # file with None value - gracefully returns empty file reference
+    content_part = {"type": "file", "file": None}
+    result = _transform_openai_agents_content_part(content_part)
+    assert result == {
+        "type": "file",
+        "modality": "document",
+        "mime_type": "",
+        "file_id": "",
+    }
+
+
 def test_transform_image_url_to_blob():
     """Test that OpenAI image_url with data URI is converted to blob format."""
     content_part = {

From 80809048e19777a1e35c0fcf540b4c2c69082a9a Mon Sep 17 00:00:00 2001
From: Fabian Schindler <fabian.schindler@sentry.io>
Date: Thu, 15 Jan 2026 09:38:25 +0100
Subject: [PATCH 11/14] fix: review comment

---
 sentry_sdk/integrations/openai.py        |  9 ++++-
 tests/integrations/openai/test_openai.py | 49 ++++++++++++++++++++++++
 2 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index ea7ee25063..d848a95575 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -224,8 +224,13 @@ def _map_item(item: "Dict[str, Any]") -> "Dict[str, Any]":
             return item
 
         if item.get("type") == "image_url":
-            image_url = item.get("image_url") or {}
-            url = image_url.get("url", "")
+            image_url = item.get("image_url")
+            if isinstance(image_url, str):
+                url = image_url
+            elif isinstance(image_url, dict):
+                url = image_url.get("url", "")
+            else:
+                url = ""
             if url.startswith("data:"):
                 try:
                     mime_type, content = parse_data_uri(url)
diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index 3f971afaee..6e62fc5ec0 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -1609,6 +1609,55 @@ def test_convert_message_parts_malformed_data_uri():
     assert item["modality"] == "image"
 
 
+def test_convert_message_parts_image_url_as_string():
+    """Test that image_url as a string (instead of dict) is handled gracefully"""
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "image_url",
+                    # Some implementations pass image_url as a string directly
+                    "image_url": "https://example.com/image.jpg",
+                },
+            ],
+        }
+    ]
+
+    # Should not raise an exception
+    converted = _convert_message_parts(messages)
+
+    assert len(converted) == 1
+    item = converted[0]["content"][0]
+    assert item["type"] == "uri"
+    assert item["modality"] == "image"
+    assert item["uri"] == "https://example.com/image.jpg"
+
+
+def test_convert_message_parts_image_url_as_string_data_uri():
+    """Test that image_url as a data URI string is correctly converted to blob"""
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "image_url",
+                    "image_url": "data:image/png;base64,iVBORw0KGgo=",
+                },
+            ],
+        }
+    ]
+
+    converted = _convert_message_parts(messages)
+
+    assert len(converted) == 1
+    item = converted[0]["content"][0]
+    assert item["type"] == "blob"
+    assert item["modality"] == "image"
+    assert item["mime_type"] == "image/png"
+    assert item["content"] == "iVBORw0KGgo="
+
+
 def test_openai_message_truncation(sentry_init, capture_events):
     """Test that large messages are truncated properly in OpenAI integration."""
     sentry_init(

From 05b1a7999332c15a6f11594abb2250f8046403b4 Mon Sep 17 00:00:00 2001
From: Fabian Schindler <fabian.schindler@sentry.io>
Date: Thu, 15 Jan 2026 12:54:58 +0100
Subject: [PATCH 12/14] fix(integrations): extract text content from OpenAI
 responses instead of full message dicts

- Extract choice.message.content for gen_ai.response.text instead of model_dump()
- Add separate gen_ai.response.tool_calls extraction for Chat Completions API
- Handle audio transcripts in responses
- Extract shared extract_response_output() to ai/utils.py for Responses API output
- Refactor OpenAI and OpenAI Agents integrations to use shared utility
---
 sentry_sdk/ai/utils.py                        |  40 +++
 sentry_sdk/integrations/openai.py             |  68 +++--
 .../integrations/openai_agents/utils.py       |  29 +-
 tests/integrations/openai/test_openai.py      | 263 ++++++++++++++++++
 4 files changed, 351 insertions(+), 49 deletions(-)

diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py
index 71f7544a1c..afc550331f 100644
--- a/sentry_sdk/ai/utils.py
+++ b/sentry_sdk/ai/utils.py
@@ -107,6 +107,46 @@ def set_data_normalized(
         span.set_data(key, json.dumps(normalized))
 
 
+def extract_response_output(
+    output_items: "Any",
+) -> "Tuple[List[Any], List[Dict[str, Any]]]":
+    """
+    Extract response text and tool calls from OpenAI Responses API output.
+
+    This handles the output format from OpenAI's Responses API where each output
+    item has a `type` field that can be "message" or "function_call".
+
+    Args:
+        output_items: Iterable of output items from the response
+
+    Returns:
+        Tuple of (response_texts, tool_calls) where:
+        - response_texts: List of text strings or dicts for unknown message types
+        - tool_calls: List of tool call dicts
+    """
+    response_texts = []  # type: List[Any]
+    tool_calls = []  # type: List[Dict[str, Any]]
+
+    for output in output_items:
+        if output.type == "function_call":
+            if hasattr(output, "model_dump"):
+                tool_calls.append(output.model_dump())
+            elif hasattr(output, "dict"):
+                tool_calls.append(output.dict())
+        elif output.type == "message":
+            for output_message in output.content:
+                try:
+                    response_texts.append(output_message.text)
+                except AttributeError:
+                    # Unknown output message type, just return the json
+                    if hasattr(output_message, "model_dump"):
+                        response_texts.append(output_message.model_dump())
+                    elif hasattr(output_message, "dict"):
+                        response_texts.append(output_message.dict())
+
+    return response_texts, tool_calls
+
+
 def normalize_message_role(role: str) -> str:
     """
     Normalize a message role to one of the 4 allowed gen_ai role values.
diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index d848a95575..acbf411ae0 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -4,6 +4,7 @@
 from sentry_sdk import consts
 from sentry_sdk.ai.monitoring import record_token_usage
 from sentry_sdk.ai.utils import (
+    extract_response_output,
     set_data_normalized,
     normalize_message_roles,
     parse_data_uri,
@@ -349,14 +350,45 @@ def _set_output_data(
 
     if hasattr(response, "choices"):
         if should_send_default_pii() and integration.include_prompts:
-            response_text = [
-                choice.message.model_dump()
-                for choice in response.choices
-                if choice.message is not None
-            ]
+            response_text = []  # type: list[str]
+            tool_calls = []  # type: list[Any]
+
+            for choice in response.choices:
+                if choice.message is None:
+                    continue
+
+                # Extract text content
+                content = getattr(choice.message, "content", None)
+                if content is not None:
+                    response_text.append(content)
+
+                # Extract audio transcript if available
+                audio = getattr(choice.message, "audio", None)
+                if audio is not None:
+                    transcript = getattr(audio, "transcript", None)
+                    if transcript is not None:
+                        response_text.append(transcript)
+
+                # Extract tool calls
+                message_tool_calls = getattr(choice.message, "tool_calls", None)
+                if message_tool_calls is not None:
+                    for tool_call in message_tool_calls:
+                        if hasattr(tool_call, "model_dump"):
+                            tool_calls.append(tool_call.model_dump())
+                        elif hasattr(tool_call, "dict"):
+                            tool_calls.append(tool_call.dict())
+
             if len(response_text) > 0:
                 set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, response_text)
 
+            if len(tool_calls) > 0:
+                set_data_normalized(
+                    span,
+                    SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS,
+                    tool_calls,
+                    unpack=False,
+                )
+
         _calculate_token_usage(messages, response, span, None, integration.count_tokens)
 
         if finish_span:
@@ -364,34 +396,18 @@ def _set_output_data(
 
     elif hasattr(response, "output"):
         if should_send_default_pii() and integration.include_prompts:
-            output_messages: "dict[str, list[Any]]" = {
-                "response": [],
-                "tool": [],
-            }
+            response_texts, tool_calls = extract_response_output(response.output)
 
-            for output in response.output:
-                if output.type == "function_call":
-                    output_messages["tool"].append(output.dict())
-                elif output.type == "message":
-                    for output_message in output.content:
-                        try:
-                            output_messages["response"].append(output_message.text)
-                        except AttributeError:
-                            # Unknown output message type, just return the json
-                            output_messages["response"].append(output_message.dict())
-
-            if len(output_messages["tool"]) > 0:
+            if len(tool_calls) > 0:
                 set_data_normalized(
                     span,
                     SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS,
-                    output_messages["tool"],
+                    tool_calls,
                     unpack=False,
                 )
 
-            if len(output_messages["response"]) > 0:
-                set_data_normalized(
-                    span, SPANDATA.GEN_AI_RESPONSE_TEXT, output_messages["response"]
-                )
+            if len(response_texts) > 0:
+                set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, response_texts)
 
         _calculate_token_usage(messages, response, span, None, integration.count_tokens)
 
diff --git a/sentry_sdk/integrations/openai_agents/utils.py b/sentry_sdk/integrations/openai_agents/utils.py
index 7e31d4621a..afa16dc609 100644
--- a/sentry_sdk/integrations/openai_agents/utils.py
+++ b/sentry_sdk/integrations/openai_agents/utils.py
@@ -1,6 +1,7 @@
 import sentry_sdk
 from sentry_sdk.ai.utils import (
     GEN_AI_ALLOWED_MESSAGE_ROLES,
+    extract_response_output,
     normalize_message_roles,
     parse_data_uri,
     set_data_normalized,
@@ -300,31 +301,13 @@ def _set_output_data(span: "sentry_sdk.tracing.Span", result: "Any") -> None:
     if not should_send_default_pii():
         return
 
-    output_messages: "dict[str, list[Any]]" = {
-        "response": [],
-        "tool": [],
-    }
+    response_texts, tool_calls = extract_response_output(result.output)
 
-    for output in result.output:
-        if output.type == "function_call":
-            output_messages["tool"].append(output.dict())
-        elif output.type == "message":
-            for output_message in output.content:
-                try:
-                    output_messages["response"].append(output_message.text)
-                except AttributeError:
-                    # Unknown output message type, just return the json
-                    output_messages["response"].append(output_message.dict())
-
-    if len(output_messages["tool"]) > 0:
-        span.set_data(
-            SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS, safe_serialize(output_messages["tool"])
-        )
+    if len(tool_calls) > 0:
+        span.set_data(SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS, safe_serialize(tool_calls))
 
-    if len(output_messages["response"]) > 0:
-        set_data_normalized(
-            span, SPANDATA.GEN_AI_RESPONSE_TEXT, output_messages["response"]
-        )
+    if len(response_texts) > 0:
+        set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, response_texts)
 
 
 def _create_mcp_execute_tool_spans(
diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index 6e62fc5ec0..10cf625ab7 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -17,6 +17,10 @@
 from openai.types.chat import ChatCompletion, ChatCompletionMessage, ChatCompletionChunk
 from openai.types.chat.chat_completion import Choice
 from openai.types.chat.chat_completion_chunk import ChoiceDelta, Choice as DeltaChoice
+from openai.types.chat.chat_completion_message_tool_call import (
+    ChatCompletionMessageToolCall,
+    Function as ToolCallFunction,
+)
 from openai.types.create_embedding_response import Usage as EmbeddingTokenUsage
 
 SKIP_RESPONSES_TESTS = False
@@ -1708,3 +1712,262 @@ def test_openai_message_truncation(sentry_init, capture_events):
             if SPANDATA.GEN_AI_REQUEST_MESSAGES in span_meta:
                 messages_meta = span_meta[SPANDATA.GEN_AI_REQUEST_MESSAGES]
                 assert "len" in messages_meta.get("", {})
+
+
+def test_response_text_is_string_not_dict(sentry_init, capture_events):
+    """Test that gen_ai.response.text is a string, not a message dict.
+
+    With set_data_normalized, a single-element list is unpacked to the element,
+    so ["the model response"] becomes just "the model response".
+    """
+    sentry_init(
+        integrations=[OpenAIIntegration(include_prompts=True)],
+        traces_sample_rate=1.0,
+        send_default_pii=True,
+    )
+    events = capture_events()
+
+    client = OpenAI(api_key="z")
+    client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION)
+
+    with start_transaction(name="openai tx"):
+        client.chat.completions.create(
+            model="some-model", messages=[{"role": "system", "content": "hello"}]
+        )
+
+    (event,) = events
+    span = event["spans"][0]
+
+    # Verify response text is in span data
+    assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["data"]
+
+    response_text = span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+
+    # For a single response, set_data_normalized unpacks the list, so it's the string directly
+    assert isinstance(response_text, str)
+    assert response_text == "the model response"
+
+    # Make sure it's NOT a JSON string containing a dict (the old buggy format)
+    # The old format was like '{"content": "...", "role": "assistant", ...}'
+    try:
+        parsed = json.loads(response_text)
+        # If it parses as JSON, it should NOT be a dict
+        assert not isinstance(parsed, dict), "Response text should not be a dict"
+    except json.JSONDecodeError:
+        # If it's not valid JSON, that's fine - it's just the raw string
+        pass
+
+
+def test_chat_completion_with_tool_calls(sentry_init, capture_events):
+    """Test that tool calls are properly extracted to gen_ai.response.tool_calls."""
+    sentry_init(
+        integrations=[OpenAIIntegration(include_prompts=True)],
+        traces_sample_rate=1.0,
+        send_default_pii=True,
+    )
+    events = capture_events()
+
+    # Create a response with tool calls using proper OpenAI types
+    tool_call_response = ChatCompletion(
+        id="chat-id",
+        choices=[
+            Choice(
+                index=0,
+                finish_reason="tool_calls",
+                message=ChatCompletionMessage(
+                    role="assistant",
+                    content=None,  # Content is None when there are tool calls
+                    tool_calls=[
+                        ChatCompletionMessageToolCall(
+                            id="call_123",
+                            type="function",
+                            function=ToolCallFunction(
+                                name="get_weather",
+                                arguments='{"location": "Paris"}',
+                            ),
+                        ),
+                    ],
+                ),
+            )
+        ],
+        created=10000000,
+        model="response-model-id",
+        object="chat.completion",
+        usage=CompletionUsage(
+            completion_tokens=10,
+            prompt_tokens=20,
+            total_tokens=30,
+        ),
+    )
+
+    client = OpenAI(api_key="z")
+    client.chat.completions._post = mock.Mock(return_value=tool_call_response)
+
+    with start_transaction(name="openai tx"):
+        client.chat.completions.create(
+            model="some-model",
+            messages=[{"role": "user", "content": "What's the weather in Paris?"}],
+        )
+
+    (event,) = events
+    span = event["spans"][0]
+
+    # Response text should NOT be present when content is None
+    assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+
+    # Tool calls should be extracted
+    assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in span["data"]
+    tool_calls_data = span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
+
+    # Should be serialized as JSON
+    assert isinstance(tool_calls_data, str)
+    parsed_tool_calls = json.loads(tool_calls_data)
+
+    assert isinstance(parsed_tool_calls, list)
+    assert len(parsed_tool_calls) == 1
+    assert parsed_tool_calls[0]["id"] == "call_123"
+    assert parsed_tool_calls[0]["type"] == "function"
+    assert parsed_tool_calls[0]["function"]["name"] == "get_weather"
+
+
+def test_chat_completion_with_content_and_tool_calls(sentry_init, capture_events):
+    """Test that both content and tool calls are captured when both are present."""
+    sentry_init(
+        integrations=[OpenAIIntegration(include_prompts=True)],
+        traces_sample_rate=1.0,
+        send_default_pii=True,
+    )
+    events = capture_events()
+
+    # Create a response with both content and tool calls using proper OpenAI types
+    response_with_both = ChatCompletion(
+        id="chat-id",
+        choices=[
+            Choice(
+                index=0,
+                finish_reason="tool_calls",
+                message=ChatCompletionMessage(
+                    role="assistant",
+                    content="I'll check the weather for you.",
+                    tool_calls=[
+                        ChatCompletionMessageToolCall(
+                            id="call_456",
+                            type="function",
+                            function=ToolCallFunction(
+                                name="get_weather",
+                                arguments='{"location": "London"}',
+                            ),
+                        ),
+                    ],
+                ),
+            )
+        ],
+        created=10000000,
+        model="response-model-id",
+        object="chat.completion",
+        usage=CompletionUsage(
+            completion_tokens=15,
+            prompt_tokens=25,
+            total_tokens=40,
+        ),
+    )
+
+    client = OpenAI(api_key="z")
+    client.chat.completions._post = mock.Mock(return_value=response_with_both)
+
+    with start_transaction(name="openai tx"):
+        client.chat.completions.create(
+            model="some-model",
+            messages=[{"role": "user", "content": "What's the weather in London?"}],
+        )
+
+    (event,) = events
+    span = event["spans"][0]
+
+    # Both should be present
+    assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["data"]
+    assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in span["data"]
+
+    # Verify response text - single element list gets unpacked to the element
+    response_text = span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+    assert response_text == "I'll check the weather for you."
+
+    # Verify tool calls - single element list gets unpacked, then re-serialized as JSON
+    tool_calls_data = span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]
+    assert isinstance(tool_calls_data, str)
+    tool_calls = json.loads(tool_calls_data)
+    assert isinstance(tool_calls, list)
+    assert len(tool_calls) == 1
+    assert tool_calls[0]["function"]["name"] == "get_weather"
+
+
+def test_chat_completion_multiple_choices(sentry_init, capture_events):
+    """Test that multiple choices are all captured in the response text."""
+    sentry_init(
+        integrations=[OpenAIIntegration(include_prompts=True)],
+        traces_sample_rate=1.0,
+        send_default_pii=True,
+    )
+    events = capture_events()
+
+    # Create a response with multiple choices
+    multi_choice_response = ChatCompletion(
+        id="chat-id",
+        choices=[
+            Choice(
+                index=0,
+                finish_reason="stop",
+                message=ChatCompletionMessage(
+                    role="assistant", content="Response option 1"
+                ),
+            ),
+            Choice(
+                index=1,
+                finish_reason="stop",
+                message=ChatCompletionMessage(
+                    role="assistant", content="Response option 2"
+                ),
+            ),
+            Choice(
+                index=2,
+                finish_reason="stop",
+                message=ChatCompletionMessage(
+                    role="assistant", content="Response option 3"
+                ),
+            ),
+        ],
+        created=10000000,
+        model="response-model-id",
+        object="chat.completion",
+        usage=CompletionUsage(
+            completion_tokens=30,
+            prompt_tokens=20,
+            total_tokens=50,
+        ),
+    )
+
+    client = OpenAI(api_key="z")
+    client.chat.completions._post = mock.Mock(return_value=multi_choice_response)
+
+    with start_transaction(name="openai tx"):
+        client.chat.completions.create(
+            model="some-model",
+            messages=[{"role": "user", "content": "Give me options"}],
+            n=3,
+        )
+
+    (event,) = events
+    span = event["spans"][0]
+
+    assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["data"]
+    response_text = json.loads(span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT])
+
+    # Should have all 3 responses as strings
+    assert len(response_text) == 3
+    assert response_text[0] == "Response option 1"
+    assert response_text[1] == "Response option 2"
+    assert response_text[2] == "Response option 3"
+
+    # All should be strings
+    for item in response_text:
+        assert isinstance(item, str)

From bd781654c11ef4f1892ad8891296da92e250bb60 Mon Sep 17 00:00:00 2001
From: Fabian Schindler <fabian.schindler@sentry.io>
Date: Thu, 15 Jan 2026 14:01:42 +0100
Subject: [PATCH 13/14] feat(ai): Add shared content transformation functions
 for multimodal AI messages

Add transform_content_part() and transform_message_content() functions
to standardize content part handling across all AI integrations.

These functions transform various SDK-specific formats (OpenAI, Anthropic,
Google, LangChain) into a unified format:
- blob: base64-encoded binary data
- uri: URL references (including file URIs)
- file: file ID references

Also adds get_modality_from_mime_type() helper to infer content modality
(image/audio/video/document) from MIME types.
---
 sentry_sdk/ai/utils.py      | 237 ++++++++++++++++++
 tests/test_ai_monitoring.py | 484 ++++++++++++++++++++++++++++++++++++
 2 files changed, 721 insertions(+)

diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py
index 71f7544a1c..b7b3b790d2 100644
--- a/sentry_sdk/ai/utils.py
+++ b/sentry_sdk/ai/utils.py
@@ -72,6 +72,243 @@ def parse_data_uri(url: str) -> "Tuple[str, str]":
     return mime_type, content
 
 
+def get_modality_from_mime_type(mime_type: str) -> str:
+    """
+    Infer the content modality from a MIME type string.
+
+    Args:
+        mime_type: A MIME type string (e.g., "image/jpeg", "audio/mp3")
+
+    Returns:
+        One of: "image", "audio", "video", or "document"
+        Defaults to "image" for unknown or empty MIME types.
+
+    Examples:
+        "image/jpeg" -> "image"
+        "audio/mp3" -> "audio"
+        "video/mp4" -> "video"
+        "application/pdf" -> "document"
+        "text/plain" -> "document"
+    """
+    if not mime_type:
+        return "image"  # Default fallback
+
+    mime_lower = mime_type.lower()
+    if mime_lower.startswith("image/"):
+        return "image"
+    elif mime_lower.startswith("audio/"):
+        return "audio"
+    elif mime_lower.startswith("video/"):
+        return "video"
+    elif mime_lower.startswith("application/") or mime_lower.startswith("text/"):
+        return "document"
+    else:
+        return "image"  # Default fallback for unknown types
+
+
+def transform_content_part(
+    content_part: "Dict[str, Any]",
+) -> "Optional[Dict[str, Any]]":
+    """
+    Transform a content part from various AI SDK formats to Sentry's standardized format.
+
+    Supported input formats:
+    - OpenAI/LiteLLM: {"type": "image_url", "image_url": {"url": "..."}}
+    - Anthropic: {"type": "image|document", "source": {"type": "base64|url|file", ...}}
+    - Google: {"inline_data": {...}} or {"file_data": {...}}
+    - Generic: {"type": "image|audio|video|file", "base64|url|file_id": "...", "mime_type": "..."}
+
+    Output format (one of):
+    - {"type": "blob", "modality": "...", "mime_type": "...", "content": "..."}
+    - {"type": "uri", "modality": "...", "mime_type": "...", "uri": "..."}
+    - {"type": "file", "modality": "...", "mime_type": "...", "file_id": "..."}
+
+    Args:
+        content_part: A dictionary representing a content part from an AI SDK
+
+    Returns:
+        A transformed dictionary in standardized format, or None if the format
+        is unrecognized or transformation fails.
+    """
+    if not isinstance(content_part, dict):
+        return None
+
+    block_type = content_part.get("type")
+
+    # Handle OpenAI/LiteLLM image_url format
+    # {"type": "image_url", "image_url": {"url": "..."}} or {"type": "image_url", "image_url": "..."}
+    if block_type == "image_url":
+        image_url_data = content_part.get("image_url")
+        if isinstance(image_url_data, str):
+            url = image_url_data
+        elif isinstance(image_url_data, dict):
+            url = image_url_data.get("url", "")
+        else:
+            return None
+
+        if not url:
+            return None
+
+        # Check if it's a data URI (base64 encoded)
+        if url.startswith("data:"):
+            try:
+                mime_type, content = parse_data_uri(url)
+                return {
+                    "type": "blob",
+                    "modality": get_modality_from_mime_type(mime_type),
+                    "mime_type": mime_type,
+                    "content": content,
+                }
+            except ValueError:
+                # If parsing fails, return as URI
+                return {
+                    "type": "uri",
+                    "modality": "image",
+                    "mime_type": "",
+                    "uri": url,
+                }
+        else:
+            # Regular URL
+            return {
+                "type": "uri",
+                "modality": "image",
+                "mime_type": "",
+                "uri": url,
+            }
+
+    # Handle Anthropic format with source dict
+    # {"type": "image|document", "source": {"type": "base64|url|file", "media_type": "...", "data|url|file_id": "..."}}
+    if block_type in ("image", "document") and "source" in content_part:
+        source = content_part.get("source")
+        if not isinstance(source, dict):
+            return None
+
+        source_type = source.get("type")
+        media_type = source.get("media_type", "")
+        modality = (
+            "document"
+            if block_type == "document"
+            else get_modality_from_mime_type(media_type)
+        )
+
+        if source_type == "base64":
+            return {
+                "type": "blob",
+                "modality": modality,
+                "mime_type": media_type,
+                "content": source.get("data", ""),
+            }
+        elif source_type == "url":
+            return {
+                "type": "uri",
+                "modality": modality,
+                "mime_type": media_type,
+                "uri": source.get("url", ""),
+            }
+        elif source_type == "file":
+            return {
+                "type": "file",
+                "modality": modality,
+                "mime_type": media_type,
+                "file_id": source.get("file_id", ""),
+            }
+        return None
+
+    # Handle Google inline_data format
+    # {"inline_data": {"mime_type": "...", "data": "..."}}
+    if "inline_data" in content_part:
+        inline_data = content_part.get("inline_data")
+        if isinstance(inline_data, dict):
+            mime_type = inline_data.get("mime_type", "")
+            return {
+                "type": "blob",
+                "modality": get_modality_from_mime_type(mime_type),
+                "mime_type": mime_type,
+                "content": inline_data.get("data", ""),
+            }
+        return None
+
+    # Handle Google file_data format
+    # {"file_data": {"mime_type": "...", "file_uri": "..."}}
+    if "file_data" in content_part:
+        file_data = content_part.get("file_data")
+        if isinstance(file_data, dict):
+            mime_type = file_data.get("mime_type", "")
+            return {
+                "type": "uri",
+                "modality": get_modality_from_mime_type(mime_type),
+                "mime_type": mime_type,
+                "uri": file_data.get("file_uri", ""),
+            }
+        return None
+
+    # Handle generic format with direct fields (LangChain style)
+    # {"type": "image|audio|video|file", "base64|url|file_id": "...", "mime_type": "..."}
+    if block_type in ("image", "audio", "video", "file"):
+        mime_type = content_part.get("mime_type", "")
+        modality = block_type if block_type != "file" else "document"
+
+        # Check for base64 encoded content
+        if "base64" in content_part:
+            return {
+                "type": "blob",
+                "modality": modality,
+                "mime_type": mime_type,
+                "content": content_part.get("base64", ""),
+            }
+        # Check for URL reference
+        elif "url" in content_part:
+            return {
+                "type": "uri",
+                "modality": modality,
+                "mime_type": mime_type,
+                "uri": content_part.get("url", ""),
+            }
+        # Check for file_id reference
+        elif "file_id" in content_part:
+            return {
+                "type": "file",
+                "modality": modality,
+                "mime_type": mime_type,
+                "file_id": content_part.get("file_id", ""),
+            }
+
+    # Unrecognized format
+    return None
+
+
+def transform_message_content(content: "Any") -> "Any":
+    """
+    Transform message content, handling both string content and list of content blocks.
+
+    For list content, each item is transformed using transform_content_part().
+    Items that cannot be transformed (return None) are kept as-is.
+
+    Args:
+        content: Message content - can be a string, list of content blocks, or other
+
+    Returns:
+        - String content: returned as-is
+        - List content: list with each transformable item converted to standardized format
+        - Other: returned as-is
+    """
+    if isinstance(content, str):
+        return content
+
+    if isinstance(content, (list, tuple)):
+        transformed = []
+        for item in content:
+            if isinstance(item, dict):
+                result = transform_content_part(item)
+                # If transformation succeeded, use the result; otherwise keep original
+                transformed.append(result if result is not None else item)
+            else:
+                transformed.append(item)
+        return transformed
+
+    return content
+
+
 def _normalize_data(data: "Any", unpack: bool = True) -> "Any":
     # convert pydantic data (e.g. OpenAI v1+) to json compatible format
     if hasattr(data, "model_dump"):
diff --git a/tests/test_ai_monitoring.py b/tests/test_ai_monitoring.py
index 1ff354f473..209d24e502 100644
--- a/tests/test_ai_monitoring.py
+++ b/tests/test_ai_monitoring.py
@@ -19,6 +19,9 @@
     _find_truncation_index,
     parse_data_uri,
     redact_blob_message_parts,
+    get_modality_from_mime_type,
+    transform_content_part,
+    transform_message_content,
 )
 from sentry_sdk.serializer import serialize
 from sentry_sdk.utils import safe_serialize
@@ -842,3 +845,484 @@ def test_handles_uri_without_data_prefix(self):
 
         assert mime_type == "image/jpeg"
         assert content == "/9j/4AAQ"
+
+
+class TestGetModalityFromMimeType:
+    def test_image_mime_types(self):
+        """Test that image MIME types return 'image' modality"""
+        assert get_modality_from_mime_type("image/jpeg") == "image"
+        assert get_modality_from_mime_type("image/png") == "image"
+        assert get_modality_from_mime_type("image/gif") == "image"
+        assert get_modality_from_mime_type("image/webp") == "image"
+        assert get_modality_from_mime_type("IMAGE/JPEG") == "image"  # case insensitive
+
+    def test_audio_mime_types(self):
+        """Test that audio MIME types return 'audio' modality"""
+        assert get_modality_from_mime_type("audio/mp3") == "audio"
+        assert get_modality_from_mime_type("audio/wav") == "audio"
+        assert get_modality_from_mime_type("audio/ogg") == "audio"
+        assert get_modality_from_mime_type("AUDIO/MP3") == "audio"  # case insensitive
+
+    def test_video_mime_types(self):
+        """Test that video MIME types return 'video' modality"""
+        assert get_modality_from_mime_type("video/mp4") == "video"
+        assert get_modality_from_mime_type("video/webm") == "video"
+        assert get_modality_from_mime_type("video/quicktime") == "video"
+        assert get_modality_from_mime_type("VIDEO/MP4") == "video"  # case insensitive
+
+    def test_document_mime_types(self):
+        """Test that application and text MIME types return 'document' modality"""
+        assert get_modality_from_mime_type("application/pdf") == "document"
+        assert get_modality_from_mime_type("application/json") == "document"
+        assert get_modality_from_mime_type("text/plain") == "document"
+        assert get_modality_from_mime_type("text/html") == "document"
+
+    def test_empty_mime_type_returns_image(self):
+        """Test that empty MIME type defaults to 'image'"""
+        assert get_modality_from_mime_type("") == "image"
+
+    def test_none_mime_type_returns_image(self):
+        """Test that None-like values default to 'image'"""
+        assert get_modality_from_mime_type(None) == "image"
+
+    def test_unknown_mime_type_returns_image(self):
+        """Test that unknown MIME types default to 'image'"""
+        assert get_modality_from_mime_type("unknown/type") == "image"
+        assert get_modality_from_mime_type("custom/format") == "image"
+
+
+class TestTransformContentPart:
+    # OpenAI/LiteLLM format tests
+    def test_openai_image_url_with_data_uri(self):
+        """Test transforming OpenAI image_url with base64 data URI"""
+        content_part = {
+            "type": "image_url",
+            "image_url": {"url": "data:image/jpeg;base64,/9j/4AAQSkZJRg=="},
+        }
+        result = transform_content_part(content_part)
+
+        assert result == {
+            "type": "blob",
+            "modality": "image",
+            "mime_type": "image/jpeg",
+            "content": "/9j/4AAQSkZJRg==",
+        }
+
+    def test_openai_image_url_with_regular_url(self):
+        """Test transforming OpenAI image_url with regular URL"""
+        content_part = {
+            "type": "image_url",
+            "image_url": {"url": "https://example.com/image.jpg"},
+        }
+        result = transform_content_part(content_part)
+
+        assert result == {
+            "type": "uri",
+            "modality": "image",
+            "mime_type": "",
+            "uri": "https://example.com/image.jpg",
+        }
+
+    def test_openai_image_url_string_format(self):
+        """Test transforming OpenAI image_url where image_url is a string"""
+        content_part = {
+            "type": "image_url",
+            "image_url": "https://example.com/image.jpg",
+        }
+        result = transform_content_part(content_part)
+
+        assert result == {
+            "type": "uri",
+            "modality": "image",
+            "mime_type": "",
+            "uri": "https://example.com/image.jpg",
+        }
+
+    def test_openai_image_url_invalid_data_uri(self):
+        """Test transforming OpenAI image_url with invalid data URI falls back to URI"""
+        content_part = {
+            "type": "image_url",
+            "image_url": {"url": "data:image/jpeg;base64"},  # Missing comma
+        }
+        result = transform_content_part(content_part)
+
+        assert result == {
+            "type": "uri",
+            "modality": "image",
+            "mime_type": "",
+            "uri": "data:image/jpeg;base64",
+        }
+
+    # Anthropic format tests
+    def test_anthropic_image_base64(self):
+        """Test transforming Anthropic image with base64 source"""
+        content_part = {
+            "type": "image",
+            "source": {
+                "type": "base64",
+                "media_type": "image/png",
+                "data": "iVBORw0KGgo=",
+            },
+        }
+        result = transform_content_part(content_part)
+
+        assert result == {
+            "type": "blob",
+            "modality": "image",
+            "mime_type": "image/png",
+            "content": "iVBORw0KGgo=",
+        }
+
+    def test_anthropic_image_url(self):
+        """Test transforming Anthropic image with URL source"""
+        content_part = {
+            "type": "image",
+            "source": {
+                "type": "url",
+                "media_type": "image/jpeg",
+                "url": "https://example.com/image.jpg",
+            },
+        }
+        result = transform_content_part(content_part)
+
+        assert result == {
+            "type": "uri",
+            "modality": "image",
+            "mime_type": "image/jpeg",
+            "uri": "https://example.com/image.jpg",
+        }
+
+    def test_anthropic_image_file(self):
+        """Test transforming Anthropic image with file source"""
+        content_part = {
+            "type": "image",
+            "source": {
+                "type": "file",
+                "media_type": "image/jpeg",
+                "file_id": "file_123",
+            },
+        }
+        result = transform_content_part(content_part)
+
+        assert result == {
+            "type": "file",
+            "modality": "image",
+            "mime_type": "image/jpeg",
+            "file_id": "file_123",
+        }
+
+    def test_anthropic_document_base64(self):
+        """Test transforming Anthropic document with base64 source"""
+        content_part = {
+            "type": "document",
+            "source": {
+                "type": "base64",
+                "media_type": "application/pdf",
+                "data": "JVBERi0xLjQ=",
+            },
+        }
+        result = transform_content_part(content_part)
+
+        assert result == {
+            "type": "blob",
+            "modality": "document",
+            "mime_type": "application/pdf",
+            "content": "JVBERi0xLjQ=",
+        }
+
+    def test_anthropic_document_url(self):
+        """Test transforming Anthropic document with URL source"""
+        content_part = {
+            "type": "document",
+            "source": {
+                "type": "url",
+                "media_type": "application/pdf",
+                "url": "https://example.com/doc.pdf",
+            },
+        }
+        result = transform_content_part(content_part)
+
+        assert result == {
+            "type": "uri",
+            "modality": "document",
+            "mime_type": "application/pdf",
+            "uri": "https://example.com/doc.pdf",
+        }
+
+    # Google format tests
+    def test_google_inline_data(self):
+        """Test transforming Google inline_data format"""
+        content_part = {
+            "inline_data": {
+                "mime_type": "image/jpeg",
+                "data": "/9j/4AAQSkZJRg==",
+            }
+        }
+        result = transform_content_part(content_part)
+
+        assert result == {
+            "type": "blob",
+            "modality": "image",
+            "mime_type": "image/jpeg",
+            "content": "/9j/4AAQSkZJRg==",
+        }
+
+    def test_google_file_data(self):
+        """Test transforming Google file_data format"""
+        content_part = {
+            "file_data": {
+                "mime_type": "video/mp4",
+                "file_uri": "gs://bucket/video.mp4",
+            }
+        }
+        result = transform_content_part(content_part)
+
+        assert result == {
+            "type": "uri",
+            "modality": "video",
+            "mime_type": "video/mp4",
+            "uri": "gs://bucket/video.mp4",
+        }
+
+    def test_google_inline_data_audio(self):
+        """Test transforming Google inline_data with audio"""
+        content_part = {
+            "inline_data": {
+                "mime_type": "audio/wav",
+                "data": "UklGRiQA",
+            }
+        }
+        result = transform_content_part(content_part)
+
+        assert result == {
+            "type": "blob",
+            "modality": "audio",
+            "mime_type": "audio/wav",
+            "content": "UklGRiQA",
+        }
+
+    # Generic format tests (LangChain style)
+    def test_generic_image_base64(self):
+        """Test transforming generic format with base64"""
+        content_part = {
+            "type": "image",
+            "base64": "/9j/4AAQSkZJRg==",
+            "mime_type": "image/jpeg",
+        }
+        result = transform_content_part(content_part)
+
+        assert result == {
+            "type": "blob",
+            "modality": "image",
+            "mime_type": "image/jpeg",
+            "content": "/9j/4AAQSkZJRg==",
+        }
+
+    def test_generic_audio_url(self):
+        """Test transforming generic format with URL"""
+        content_part = {
+            "type": "audio",
+            "url": "https://example.com/audio.mp3",
+            "mime_type": "audio/mp3",
+        }
+        result = transform_content_part(content_part)
+
+        assert result == {
+            "type": "uri",
+            "modality": "audio",
+            "mime_type": "audio/mp3",
+            "uri": "https://example.com/audio.mp3",
+        }
+
+    def test_generic_file_with_file_id(self):
+        """Test transforming generic format with file_id"""
+        content_part = {
+            "type": "file",
+            "file_id": "file_456",
+            "mime_type": "application/pdf",
+        }
+        result = transform_content_part(content_part)
+
+        assert result == {
+            "type": "file",
+            "modality": "document",
+            "mime_type": "application/pdf",
+            "file_id": "file_456",
+        }
+
+    def test_generic_video_base64(self):
+        """Test transforming generic video format"""
+        content_part = {
+            "type": "video",
+            "base64": "AAAA",
+            "mime_type": "video/mp4",
+        }
+        result = transform_content_part(content_part)
+
+        assert result == {
+            "type": "blob",
+            "modality": "video",
+            "mime_type": "video/mp4",
+            "content": "AAAA",
+        }
+
+    # Edge cases and error handling
+    def test_text_block_returns_none(self):
+        """Test that text blocks return None (not transformed)"""
+        content_part = {"type": "text", "text": "Hello world"}
+        result = transform_content_part(content_part)
+
+        assert result is None
+
+    def test_non_dict_returns_none(self):
+        """Test that non-dict input returns None"""
+        assert transform_content_part("string") is None
+        assert transform_content_part(123) is None
+        assert transform_content_part(None) is None
+        assert transform_content_part([1, 2, 3]) is None
+
+    def test_empty_dict_returns_none(self):
+        """Test that empty dict returns None"""
+        assert transform_content_part({}) is None
+
+    def test_unknown_type_returns_none(self):
+        """Test that unknown type returns None"""
+        content_part = {"type": "unknown", "data": "something"}
+        assert transform_content_part(content_part) is None
+
+    def test_openai_image_url_empty_url_returns_none(self):
+        """Test that image_url with empty URL returns None"""
+        content_part = {"type": "image_url", "image_url": {"url": ""}}
+        assert transform_content_part(content_part) is None
+
+    def test_anthropic_invalid_source_returns_none(self):
+        """Test that Anthropic format with invalid source returns None"""
+        content_part = {"type": "image", "source": "not_a_dict"}
+        assert transform_content_part(content_part) is None
+
+    def test_anthropic_unknown_source_type_returns_none(self):
+        """Test that Anthropic format with unknown source type returns None"""
+        content_part = {
+            "type": "image",
+            "source": {"type": "unknown", "data": "something"},
+        }
+        assert transform_content_part(content_part) is None
+
+    def test_google_inline_data_not_dict_returns_none(self):
+        """Test that Google inline_data with non-dict value returns None"""
+        content_part = {"inline_data": "not_a_dict"}
+        assert transform_content_part(content_part) is None
+
+    def test_google_file_data_not_dict_returns_none(self):
+        """Test that Google file_data with non-dict value returns None"""
+        content_part = {"file_data": "not_a_dict"}
+        assert transform_content_part(content_part) is None
+
+
+class TestTransformMessageContent:
+    def test_string_content_returned_as_is(self):
+        """Test that string content is returned unchanged"""
+        content = "Hello, world!"
+        result = transform_message_content(content)
+
+        assert result == "Hello, world!"
+
+    def test_list_with_transformable_items(self):
+        """Test transforming a list with transformable content parts"""
+        content = [
+            {"type": "text", "text": "What's in this image?"},
+            {
+                "type": "image_url",
+                "image_url": {"url": "data:image/jpeg;base64,/9j/4AAQ"},
+            },
+        ]
+        result = transform_message_content(content)
+
+        assert len(result) == 2
+        # Text block should be unchanged (transform returns None, so original kept)
+        assert result[0] == {"type": "text", "text": "What's in this image?"}
+        # Image should be transformed
+        assert result[1] == {
+            "type": "blob",
+            "modality": "image",
+            "mime_type": "image/jpeg",
+            "content": "/9j/4AAQ",
+        }
+
+    def test_list_with_non_dict_items(self):
+        """Test that non-dict items in list are kept as-is"""
+        content = ["text string", 123, {"type": "text", "text": "hi"}]
+        result = transform_message_content(content)
+
+        assert result == ["text string", 123, {"type": "text", "text": "hi"}]
+
+    def test_tuple_content(self):
+        """Test that tuple content is also handled"""
+        content = (
+            {"type": "text", "text": "Hello"},
+            {
+                "type": "image_url",
+                "image_url": {"url": "https://example.com/img.jpg"},
+            },
+        )
+        result = transform_message_content(content)
+
+        assert len(result) == 2
+        assert result[0] == {"type": "text", "text": "Hello"}
+        assert result[1] == {
+            "type": "uri",
+            "modality": "image",
+            "mime_type": "",
+            "uri": "https://example.com/img.jpg",
+        }
+
+    def test_other_types_returned_as_is(self):
+        """Test that other types are returned unchanged"""
+        assert transform_message_content(123) == 123
+        assert transform_message_content(None) is None
+        assert transform_message_content({"key": "value"}) == {"key": "value"}
+
+    def test_mixed_content_types(self):
+        """Test transforming mixed content with multiple formats"""
+        content = [
+            {"type": "text", "text": "Look at these:"},
+            {
+                "type": "image_url",
+                "image_url": {"url": "data:image/png;base64,iVBORw0"},
+            },
+            {
+                "type": "image",
+                "source": {
+                    "type": "base64",
+                    "media_type": "image/jpeg",
+                    "data": "/9j/4AAQ",
+                },
+            },
+            {"inline_data": {"mime_type": "audio/wav", "data": "UklGRiQA"}},
+        ]
+        result = transform_message_content(content)
+
+        assert len(result) == 4
+        assert result[0] == {"type": "text", "text": "Look at these:"}
+        assert result[1] == {
+            "type": "blob",
+            "modality": "image",
+            "mime_type": "image/png",
+            "content": "iVBORw0",
+        }
+        assert result[2] == {
+            "type": "blob",
+            "modality": "image",
+            "mime_type": "image/jpeg",
+            "content": "/9j/4AAQ",
+        }
+        assert result[3] == {
+            "type": "blob",
+            "modality": "audio",
+            "mime_type": "audio/wav",
+            "content": "UklGRiQA",
+        }
+
+    def test_empty_list(self):
+        """Test that empty list is returned as empty list"""
+        assert transform_message_content([]) == []

From df59f49159950fd031480108f60ddbdd47ef5833 Mon Sep 17 00:00:00 2001
From: Fabian Schindler <fabian.schindler@sentry.io>
Date: Thu, 15 Jan 2026 14:03:50 +0100
Subject: [PATCH 14/14] refactor(openai): Use shared transform_message_content
 from ai/utils

Replace local _convert_message_parts function with the shared
transform_message_content function to deduplicate code across
AI integrations.
---
 sentry_sdk/integrations/openai.py        |  92 +---------------
 tests/integrations/openai/test_openai.py | 134 +++++++++--------------
 2 files changed, 59 insertions(+), 167 deletions(-)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index 8937867ba4..9a69f601db 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -8,7 +8,7 @@
     extract_response_output,
     set_data_normalized,
     normalize_message_roles,
-    parse_data_uri,
+    transform_message_content,
     truncate_and_annotate_messages,
 )
 from sentry_sdk.consts import SPANDATA
@@ -22,7 +22,7 @@
     reraise,
 )
 
-from typing import TYPE_CHECKING, Dict
+from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
     from typing import Any, Iterable, List, Optional, Callable, AsyncIterator, Iterator
@@ -184,89 +184,6 @@ def _calculate_token_usage(
     )
 
 
-def _convert_message_parts(messages: "List[Dict[str, Any]]") -> "List[Dict[str, Any]]":
-    """
-    Convert the message parts from OpenAI format to the `gen_ai.request.messages` format.
-    e.g:
-    {
-        "role": "user",
-        "content": [
-            {
-                "text": "How many ponies do you see in the image?",
-                "type": "text"
-            },
-            {
-                "type": "image_url",
-                "image_url": {
-                    "url": "data:image/jpeg;base64,...",
-                    "detail": "high"
-                }
-            }
-        ]
-    }
-    becomes:
-    {
-        "role": "user",
-        "content": [
-            {
-                "text": "How many ponies do you see in the image?",
-                "type": "text"
-            },
-            {
-                "type": "blob",
-                "modality": "image",
-                "mime_type": "image/jpeg",
-                "content": "data:image/jpeg;base64,..."
-            }
-        ]
-    }
-    """
-
-    def _map_item(item: "Dict[str, Any]") -> "Dict[str, Any]":
-        if not isinstance(item, dict):
-            return item
-
-        if item.get("type") == "image_url":
-            image_url = item.get("image_url")
-            if isinstance(image_url, str):
-                url = image_url
-            elif isinstance(image_url, dict):
-                url = image_url.get("url", "")
-            else:
-                url = ""
-            if url.startswith("data:"):
-                try:
-                    mime_type, content = parse_data_uri(url)
-                    return {
-                        "type": "blob",
-                        "modality": "image",
-                        "mime_type": mime_type,
-                        "content": content,
-                    }
-                except ValueError:
-                    # If parsing fails, return as URI
-                    return {
-                        "type": "uri",
-                        "modality": "image",
-                        "uri": url,
-                    }
-            else:
-                return {
-                    "type": "uri",
-                    "modality": "image",
-                    "uri": url,
-                }
-        return item
-
-    for message in messages:
-        if not isinstance(message, dict):
-            continue
-        content = message.get("content")
-        if isinstance(content, list):
-            message["content"] = [_map_item(item) for item in content]
-    return messages
-
-
 def _set_input_data(
     span: "Span",
     kwargs: "dict[str, Any]",
@@ -288,7 +205,10 @@ def _set_input_data(
         and integration.include_prompts
     ):
         normalized_messages = normalize_message_roles(messages)
-        normalized_messages = _convert_message_parts(normalized_messages)
+        # Transform content parts to standardized format
+        for message in normalized_messages:
+            if isinstance(message, dict) and "content" in message:
+                message["content"] = transform_message_content(message["content"])
 
         scope = sentry_sdk.get_current_scope()
         messages_data = truncate_and_annotate_messages(normalized_messages, span, scope)
diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index 10cf625ab7..2c1e32b1e4 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -47,9 +47,8 @@
 from sentry_sdk.integrations.openai import (
     OpenAIIntegration,
     _calculate_token_usage,
-    _convert_message_parts,
 )
-from sentry_sdk.ai.utils import MAX_GEN_AI_MESSAGE_BYTES
+from sentry_sdk.ai.utils import MAX_GEN_AI_MESSAGE_BYTES, transform_message_content
 from sentry_sdk._types import AnnotatedValue
 from sentry_sdk.serializer import serialize
 
@@ -1514,42 +1513,35 @@ def test_openai_message_role_mapping(sentry_init, capture_events):
     assert "ai" not in roles
 
 
-def test_convert_message_parts_image_url_to_blob():
+def test_transform_message_content_image_url_to_blob():
     """Test that OpenAI image_url message parts are correctly converted to blob format"""
-    messages = [
+    content = [
         {
-            "role": "user",
-            "content": [
-                {
-                    "text": "How many ponies do you see in the image?",
-                    "type": "text",
-                },
-                {
-                    "type": "image_url",
-                    "image_url": {
-                        "url": "data:image/jpeg;base64,/9j/4AAQSkZJRg==",
-                        "detail": "high",
-                    },
-                },
-            ],
-        }
+            "text": "How many ponies do you see in the image?",
+            "type": "text",
+        },
+        {
+            "type": "image_url",
+            "image_url": {
+                "url": "data:image/jpeg;base64,/9j/4AAQSkZJRg==",
+                "detail": "high",
+            },
+        },
     ]
 
-    converted = _convert_message_parts(messages)
+    converted = transform_message_content(content)
 
-    assert len(converted) == 1
-    assert converted[0]["role"] == "user"
-    assert isinstance(converted[0]["content"], list)
-    assert len(converted[0]["content"]) == 2
+    assert isinstance(converted, list)
+    assert len(converted) == 2
 
     # First item (text) should remain unchanged
-    assert converted[0]["content"][0] == {
+    assert converted[0] == {
         "text": "How many ponies do you see in the image?",
         "type": "text",
     }
 
     # Second item (image_url) should be converted to blob format
-    blob_item = converted[0]["content"][1]
+    blob_item = converted[1]
     assert blob_item["type"] == "blob"
     assert blob_item["modality"] == "image"
     assert blob_item["mime_type"] == "image/jpeg"
@@ -1558,104 +1550,84 @@ def test_convert_message_parts_image_url_to_blob():
     assert "image_url" not in blob_item
 
 
-def test_convert_message_parts_image_url_to_uri():
+def test_transform_message_content_image_url_to_uri():
     """Test that OpenAI image_url with non-data URLs are converted to uri format"""
-    messages = [
+    content = [
         {
-            "role": "user",
-            "content": [
-                {
-                    "type": "image_url",
-                    "image_url": {
-                        "url": "https://example.com/image.jpg",
-                        "detail": "low",
-                    },
-                },
-            ],
-        }
+            "type": "image_url",
+            "image_url": {
+                "url": "https://example.com/image.jpg",
+                "detail": "low",
+            },
+        },
     ]
 
-    converted = _convert_message_parts(messages)
+    converted = transform_message_content(content)
 
     assert len(converted) == 1
-    uri_item = converted[0]["content"][0]
+    uri_item = converted[0]
     assert uri_item["type"] == "uri"
     assert uri_item["uri"] == "https://example.com/image.jpg"
     # Verify the original image_url structure is replaced
     assert "image_url" not in uri_item
 
 
-def test_convert_message_parts_malformed_data_uri():
+def test_transform_message_content_malformed_data_uri():
     """Test that malformed data URIs are handled gracefully without crashing"""
-    messages = [
+    content = [
         {
-            "role": "user",
-            "content": [
-                {
-                    "type": "image_url",
-                    "image_url": {
-                        # Malformed: missing ;base64, and comma separator
-                        "url": "data:image/jpeg",
-                    },
-                },
-            ],
-        }
+            "type": "image_url",
+            "image_url": {
+                # Malformed: missing ;base64, and comma separator
+                "url": "data:image/jpeg",
+            },
+        },
     ]
 
     # Should not raise an exception
-    converted = _convert_message_parts(messages)
+    converted = transform_message_content(content)
 
     assert len(converted) == 1
     # Malformed data URI should fall back to uri type
-    item = converted[0]["content"][0]
+    item = converted[0]
     assert item["type"] == "uri"
     assert item["uri"] == "data:image/jpeg"
     assert item["modality"] == "image"
 
 
-def test_convert_message_parts_image_url_as_string():
+def test_transform_message_content_image_url_as_string():
     """Test that image_url as a string (instead of dict) is handled gracefully"""
-    messages = [
+    content = [
         {
-            "role": "user",
-            "content": [
-                {
-                    "type": "image_url",
-                    # Some implementations pass image_url as a string directly
-                    "image_url": "https://example.com/image.jpg",
-                },
-            ],
-        }
+            "type": "image_url",
+            # Some implementations pass image_url as a string directly
+            "image_url": "https://example.com/image.jpg",
+        },
     ]
 
     # Should not raise an exception
-    converted = _convert_message_parts(messages)
+    converted = transform_message_content(content)
 
     assert len(converted) == 1
-    item = converted[0]["content"][0]
+    item = converted[0]
     assert item["type"] == "uri"
     assert item["modality"] == "image"
     assert item["uri"] == "https://example.com/image.jpg"
 
 
-def test_convert_message_parts_image_url_as_string_data_uri():
+def test_transform_message_content_image_url_as_string_data_uri():
     """Test that image_url as a data URI string is correctly converted to blob"""
-    messages = [
+    content = [
         {
-            "role": "user",
-            "content": [
-                {
-                    "type": "image_url",
-                    "image_url": "data:image/png;base64,iVBORw0KGgo=",
-                },
-            ],
-        }
+            "type": "image_url",
+            "image_url": "data:image/png;base64,iVBORw0KGgo=",
+        },
     ]
 
-    converted = _convert_message_parts(messages)
+    converted = transform_message_content(content)
 
     assert len(converted) == 1
-    item = converted[0]["content"][0]
+    item = converted[0]
     assert item["type"] == "blob"
     assert item["modality"] == "image"
     assert item["mime_type"] == "image/png"