Skip to content

Commit 8a48ad2

Browse files
fix(integrations): langchain add multimodal content transformation functions for images, audio, and files (#5278)
### Description Add more support on message types for `gen_ai.request.messages` #### Issues Closes: https://linear.app/getsentry/issue/TET-1637/redact-images-langchain
1 parent 2abe9f4 commit 8a48ad2

File tree

4 files changed

+1667
-1
lines changed

4 files changed

+1667
-1
lines changed

sentry_sdk/ai/utils.py

Lines changed: 391 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,397 @@ def parse_data_uri(url: str) -> "Tuple[str, str]":
7272
return mime_type, content
7373

7474

75+
def get_modality_from_mime_type(mime_type: str) -> str:
76+
"""
77+
Infer the content modality from a MIME type string.
78+
79+
Args:
80+
mime_type: A MIME type string (e.g., "image/jpeg", "audio/mp3")
81+
82+
Returns:
83+
One of: "image", "audio", "video", or "document"
84+
Defaults to "image" for unknown or empty MIME types.
85+
86+
Examples:
87+
"image/jpeg" -> "image"
88+
"audio/mp3" -> "audio"
89+
"video/mp4" -> "video"
90+
"application/pdf" -> "document"
91+
"text/plain" -> "document"
92+
"""
93+
if not mime_type:
94+
return "image" # Default fallback
95+
96+
mime_lower = mime_type.lower()
97+
if mime_lower.startswith("image/"):
98+
return "image"
99+
elif mime_lower.startswith("audio/"):
100+
return "audio"
101+
elif mime_lower.startswith("video/"):
102+
return "video"
103+
elif mime_lower.startswith("application/") or mime_lower.startswith("text/"):
104+
return "document"
105+
else:
106+
return "image" # Default fallback for unknown types
107+
108+
109+
def transform_openai_content_part(
110+
content_part: "Dict[str, Any]",
111+
) -> "Optional[Dict[str, Any]]":
112+
"""
113+
Transform an OpenAI/LiteLLM content part to Sentry's standardized format.
114+
115+
This handles the OpenAI image_url format used by OpenAI and LiteLLM SDKs.
116+
117+
Input format:
118+
- {"type": "image_url", "image_url": {"url": "..."}}
119+
- {"type": "image_url", "image_url": "..."} (string shorthand)
120+
121+
Output format (one of):
122+
- {"type": "blob", "modality": "image", "mime_type": "...", "content": "..."}
123+
- {"type": "uri", "modality": "image", "mime_type": "", "uri": "..."}
124+
125+
Args:
126+
content_part: A dictionary representing a content part from OpenAI/LiteLLM
127+
128+
Returns:
129+
A transformed dictionary in standardized format, or None if the format
130+
is not OpenAI image_url format or transformation fails.
131+
"""
132+
if not isinstance(content_part, dict):
133+
return None
134+
135+
block_type = content_part.get("type")
136+
137+
if block_type != "image_url":
138+
return None
139+
140+
image_url_data = content_part.get("image_url")
141+
if isinstance(image_url_data, str):
142+
url = image_url_data
143+
elif isinstance(image_url_data, dict):
144+
url = image_url_data.get("url", "")
145+
else:
146+
return None
147+
148+
if not url:
149+
return None
150+
151+
# Check if it's a data URI (base64 encoded)
152+
if url.startswith("data:"):
153+
try:
154+
mime_type, content = parse_data_uri(url)
155+
return {
156+
"type": "blob",
157+
"modality": get_modality_from_mime_type(mime_type),
158+
"mime_type": mime_type,
159+
"content": content,
160+
}
161+
except ValueError:
162+
# If parsing fails, return as URI
163+
return {
164+
"type": "uri",
165+
"modality": "image",
166+
"mime_type": "",
167+
"uri": url,
168+
}
169+
else:
170+
# Regular URL
171+
return {
172+
"type": "uri",
173+
"modality": "image",
174+
"mime_type": "",
175+
"uri": url,
176+
}
177+
178+
179+
def transform_anthropic_content_part(
180+
content_part: "Dict[str, Any]",
181+
) -> "Optional[Dict[str, Any]]":
182+
"""
183+
Transform an Anthropic content part to Sentry's standardized format.
184+
185+
This handles the Anthropic image and document formats with source dictionaries.
186+
187+
Input format:
188+
- {"type": "image", "source": {"type": "base64", "media_type": "...", "data": "..."}}
189+
- {"type": "image", "source": {"type": "url", "media_type": "...", "url": "..."}}
190+
- {"type": "image", "source": {"type": "file", "media_type": "...", "file_id": "..."}}
191+
- {"type": "document", "source": {...}} (same source formats)
192+
193+
Output format (one of):
194+
- {"type": "blob", "modality": "...", "mime_type": "...", "content": "..."}
195+
- {"type": "uri", "modality": "...", "mime_type": "...", "uri": "..."}
196+
- {"type": "file", "modality": "...", "mime_type": "...", "file_id": "..."}
197+
198+
Args:
199+
content_part: A dictionary representing a content part from Anthropic
200+
201+
Returns:
202+
A transformed dictionary in standardized format, or None if the format
203+
is not Anthropic format or transformation fails.
204+
"""
205+
if not isinstance(content_part, dict):
206+
return None
207+
208+
block_type = content_part.get("type")
209+
210+
if block_type not in ("image", "document") or "source" not in content_part:
211+
return None
212+
213+
source = content_part.get("source")
214+
if not isinstance(source, dict):
215+
return None
216+
217+
source_type = source.get("type")
218+
media_type = source.get("media_type", "")
219+
modality = (
220+
"document"
221+
if block_type == "document"
222+
else get_modality_from_mime_type(media_type)
223+
)
224+
225+
if source_type == "base64":
226+
return {
227+
"type": "blob",
228+
"modality": modality,
229+
"mime_type": media_type,
230+
"content": source.get("data", ""),
231+
}
232+
elif source_type == "url":
233+
return {
234+
"type": "uri",
235+
"modality": modality,
236+
"mime_type": media_type,
237+
"uri": source.get("url", ""),
238+
}
239+
elif source_type == "file":
240+
return {
241+
"type": "file",
242+
"modality": modality,
243+
"mime_type": media_type,
244+
"file_id": source.get("file_id", ""),
245+
}
246+
247+
return None
248+
249+
250+
def transform_google_content_part(
251+
content_part: "Dict[str, Any]",
252+
) -> "Optional[Dict[str, Any]]":
253+
"""
254+
Transform a Google GenAI content part to Sentry's standardized format.
255+
256+
This handles the Google GenAI inline_data and file_data formats.
257+
258+
Input format:
259+
- {"inline_data": {"mime_type": "...", "data": "..."}}
260+
- {"file_data": {"mime_type": "...", "file_uri": "..."}}
261+
262+
Output format (one of):
263+
- {"type": "blob", "modality": "...", "mime_type": "...", "content": "..."}
264+
- {"type": "uri", "modality": "...", "mime_type": "...", "uri": "..."}
265+
266+
Args:
267+
content_part: A dictionary representing a content part from Google GenAI
268+
269+
Returns:
270+
A transformed dictionary in standardized format, or None if the format
271+
is not Google format or transformation fails.
272+
"""
273+
if not isinstance(content_part, dict):
274+
return None
275+
276+
# Handle Google inline_data format
277+
if "inline_data" in content_part:
278+
inline_data = content_part.get("inline_data")
279+
if isinstance(inline_data, dict):
280+
mime_type = inline_data.get("mime_type", "")
281+
return {
282+
"type": "blob",
283+
"modality": get_modality_from_mime_type(mime_type),
284+
"mime_type": mime_type,
285+
"content": inline_data.get("data", ""),
286+
}
287+
return None
288+
289+
# Handle Google file_data format
290+
if "file_data" in content_part:
291+
file_data = content_part.get("file_data")
292+
if isinstance(file_data, dict):
293+
mime_type = file_data.get("mime_type", "")
294+
return {
295+
"type": "uri",
296+
"modality": get_modality_from_mime_type(mime_type),
297+
"mime_type": mime_type,
298+
"uri": file_data.get("file_uri", ""),
299+
}
300+
return None
301+
302+
return None
303+
304+
305+
def transform_generic_content_part(
306+
content_part: "Dict[str, Any]",
307+
) -> "Optional[Dict[str, Any]]":
308+
"""
309+
Transform a generic/LangChain-style content part to Sentry's standardized format.
310+
311+
This handles generic formats where the type indicates the modality and
312+
the data is provided via direct base64, url, or file_id fields.
313+
314+
Input format:
315+
- {"type": "image", "base64": "...", "mime_type": "..."}
316+
- {"type": "audio", "url": "...", "mime_type": "..."}
317+
- {"type": "video", "base64": "...", "mime_type": "..."}
318+
- {"type": "file", "file_id": "...", "mime_type": "..."}
319+
320+
Output format (one of):
321+
- {"type": "blob", "modality": "...", "mime_type": "...", "content": "..."}
322+
- {"type": "uri", "modality": "...", "mime_type": "...", "uri": "..."}
323+
- {"type": "file", "modality": "...", "mime_type": "...", "file_id": "..."}
324+
325+
Args:
326+
content_part: A dictionary representing a content part in generic format
327+
328+
Returns:
329+
A transformed dictionary in standardized format, or None if the format
330+
is not generic format or transformation fails.
331+
"""
332+
if not isinstance(content_part, dict):
333+
return None
334+
335+
block_type = content_part.get("type")
336+
337+
if block_type not in ("image", "audio", "video", "file"):
338+
return None
339+
340+
# Ensure it's not Anthropic format (which also uses type: "image")
341+
if "source" in content_part:
342+
return None
343+
344+
mime_type = content_part.get("mime_type", "")
345+
modality = block_type if block_type != "file" else "document"
346+
347+
# Check for base64 encoded content
348+
if "base64" in content_part:
349+
return {
350+
"type": "blob",
351+
"modality": modality,
352+
"mime_type": mime_type,
353+
"content": content_part.get("base64", ""),
354+
}
355+
# Check for URL reference
356+
elif "url" in content_part:
357+
return {
358+
"type": "uri",
359+
"modality": modality,
360+
"mime_type": mime_type,
361+
"uri": content_part.get("url", ""),
362+
}
363+
# Check for file_id reference
364+
elif "file_id" in content_part:
365+
return {
366+
"type": "file",
367+
"modality": modality,
368+
"mime_type": mime_type,
369+
"file_id": content_part.get("file_id", ""),
370+
}
371+
372+
return None
373+
374+
375+
def transform_content_part(
376+
content_part: "Dict[str, Any]",
377+
) -> "Optional[Dict[str, Any]]":
378+
"""
379+
Transform a content part from various AI SDK formats to Sentry's standardized format.
380+
381+
This is a heuristic dispatcher that detects the format and delegates to the
382+
appropriate SDK-specific transformer. For direct SDK integration, prefer using
383+
the specific transformers directly:
384+
- transform_openai_content_part() for OpenAI/LiteLLM
385+
- transform_anthropic_content_part() for Anthropic
386+
- transform_google_content_part() for Google GenAI
387+
- transform_generic_content_part() for LangChain and other generic formats
388+
389+
Detection order:
390+
1. OpenAI: type == "image_url"
391+
2. Google: "inline_data" or "file_data" keys present
392+
3. Anthropic: type in ("image", "document") with "source" key
393+
4. Generic: type in ("image", "audio", "video", "file") with base64/url/file_id
394+
395+
Output format (one of):
396+
- {"type": "blob", "modality": "...", "mime_type": "...", "content": "..."}
397+
- {"type": "uri", "modality": "...", "mime_type": "...", "uri": "..."}
398+
- {"type": "file", "modality": "...", "mime_type": "...", "file_id": "..."}
399+
400+
Args:
401+
content_part: A dictionary representing a content part from an AI SDK
402+
403+
Returns:
404+
A transformed dictionary in standardized format, or None if the format
405+
is unrecognized or transformation fails.
406+
"""
407+
if not isinstance(content_part, dict):
408+
return None
409+
410+
# Try OpenAI format first (most common, clear indicator)
411+
result = transform_openai_content_part(content_part)
412+
if result is not None:
413+
return result
414+
415+
# Try Google format (unique keys make it easy to detect)
416+
result = transform_google_content_part(content_part)
417+
if result is not None:
418+
return result
419+
420+
# Try Anthropic format (has "source" key)
421+
result = transform_anthropic_content_part(content_part)
422+
if result is not None:
423+
return result
424+
425+
# Try generic format as fallback
426+
result = transform_generic_content_part(content_part)
427+
if result is not None:
428+
return result
429+
430+
# Unrecognized format
431+
return None
432+
433+
434+
def transform_message_content(content: "Any") -> "Any":
435+
"""
436+
Transform message content, handling both string content and list of content blocks.
437+
438+
For list content, each item is transformed using transform_content_part().
439+
Items that cannot be transformed (return None) are kept as-is.
440+
441+
Args:
442+
content: Message content - can be a string, list of content blocks, or other
443+
444+
Returns:
445+
- String content: returned as-is
446+
- List content: list with each transformable item converted to standardized format
447+
- Other: returned as-is
448+
"""
449+
if isinstance(content, str):
450+
return content
451+
452+
if isinstance(content, (list, tuple)):
453+
transformed = []
454+
for item in content:
455+
if isinstance(item, dict):
456+
result = transform_content_part(item)
457+
# If transformation succeeded, use the result; otherwise keep original
458+
transformed.append(result if result is not None else item)
459+
else:
460+
transformed.append(item)
461+
return transformed
462+
463+
return content
464+
465+
75466
def _normalize_data(data: "Any", unpack: bool = True) -> "Any":
76467
# convert pydantic data (e.g. OpenAI v1+) to json compatible format
77468
if hasattr(data, "model_dump"):

0 commit comments

Comments
 (0)