@@ -72,6 +72,397 @@ def parse_data_uri(url: str) -> "Tuple[str, str]":
7272 return mime_type , content
7373
7474
75+ def get_modality_from_mime_type (mime_type : str ) -> str :
76+ """
77+ Infer the content modality from a MIME type string.
78+
79+ Args:
80+ mime_type: A MIME type string (e.g., "image/jpeg", "audio/mp3")
81+
82+ Returns:
83+ One of: "image", "audio", "video", or "document"
84+ Defaults to "image" for unknown or empty MIME types.
85+
86+ Examples:
87+ "image/jpeg" -> "image"
88+ "audio/mp3" -> "audio"
89+ "video/mp4" -> "video"
90+ "application/pdf" -> "document"
91+ "text/plain" -> "document"
92+ """
93+ if not mime_type :
94+ return "image" # Default fallback
95+
96+ mime_lower = mime_type .lower ()
97+ if mime_lower .startswith ("image/" ):
98+ return "image"
99+ elif mime_lower .startswith ("audio/" ):
100+ return "audio"
101+ elif mime_lower .startswith ("video/" ):
102+ return "video"
103+ elif mime_lower .startswith ("application/" ) or mime_lower .startswith ("text/" ):
104+ return "document"
105+ else :
106+ return "image" # Default fallback for unknown types
107+
108+
109+ def transform_openai_content_part (
110+ content_part : "Dict[str, Any]" ,
111+ ) -> "Optional[Dict[str, Any]]" :
112+ """
113+ Transform an OpenAI/LiteLLM content part to Sentry's standardized format.
114+
115+ This handles the OpenAI image_url format used by OpenAI and LiteLLM SDKs.
116+
117+ Input format:
118+ - {"type": "image_url", "image_url": {"url": "..."}}
119+ - {"type": "image_url", "image_url": "..."} (string shorthand)
120+
121+ Output format (one of):
122+ - {"type": "blob", "modality": "image", "mime_type": "...", "content": "..."}
123+ - {"type": "uri", "modality": "image", "mime_type": "", "uri": "..."}
124+
125+ Args:
126+ content_part: A dictionary representing a content part from OpenAI/LiteLLM
127+
128+ Returns:
129+ A transformed dictionary in standardized format, or None if the format
130+ is not OpenAI image_url format or transformation fails.
131+ """
132+ if not isinstance (content_part , dict ):
133+ return None
134+
135+ block_type = content_part .get ("type" )
136+
137+ if block_type != "image_url" :
138+ return None
139+
140+ image_url_data = content_part .get ("image_url" )
141+ if isinstance (image_url_data , str ):
142+ url = image_url_data
143+ elif isinstance (image_url_data , dict ):
144+ url = image_url_data .get ("url" , "" )
145+ else :
146+ return None
147+
148+ if not url :
149+ return None
150+
151+ # Check if it's a data URI (base64 encoded)
152+ if url .startswith ("data:" ):
153+ try :
154+ mime_type , content = parse_data_uri (url )
155+ return {
156+ "type" : "blob" ,
157+ "modality" : get_modality_from_mime_type (mime_type ),
158+ "mime_type" : mime_type ,
159+ "content" : content ,
160+ }
161+ except ValueError :
162+ # If parsing fails, return as URI
163+ return {
164+ "type" : "uri" ,
165+ "modality" : "image" ,
166+ "mime_type" : "" ,
167+ "uri" : url ,
168+ }
169+ else :
170+ # Regular URL
171+ return {
172+ "type" : "uri" ,
173+ "modality" : "image" ,
174+ "mime_type" : "" ,
175+ "uri" : url ,
176+ }
177+
178+
179+ def transform_anthropic_content_part (
180+ content_part : "Dict[str, Any]" ,
181+ ) -> "Optional[Dict[str, Any]]" :
182+ """
183+ Transform an Anthropic content part to Sentry's standardized format.
184+
185+ This handles the Anthropic image and document formats with source dictionaries.
186+
187+ Input format:
188+ - {"type": "image", "source": {"type": "base64", "media_type": "...", "data": "..."}}
189+ - {"type": "image", "source": {"type": "url", "media_type": "...", "url": "..."}}
190+ - {"type": "image", "source": {"type": "file", "media_type": "...", "file_id": "..."}}
191+ - {"type": "document", "source": {...}} (same source formats)
192+
193+ Output format (one of):
194+ - {"type": "blob", "modality": "...", "mime_type": "...", "content": "..."}
195+ - {"type": "uri", "modality": "...", "mime_type": "...", "uri": "..."}
196+ - {"type": "file", "modality": "...", "mime_type": "...", "file_id": "..."}
197+
198+ Args:
199+ content_part: A dictionary representing a content part from Anthropic
200+
201+ Returns:
202+ A transformed dictionary in standardized format, or None if the format
203+ is not Anthropic format or transformation fails.
204+ """
205+ if not isinstance (content_part , dict ):
206+ return None
207+
208+ block_type = content_part .get ("type" )
209+
210+ if block_type not in ("image" , "document" ) or "source" not in content_part :
211+ return None
212+
213+ source = content_part .get ("source" )
214+ if not isinstance (source , dict ):
215+ return None
216+
217+ source_type = source .get ("type" )
218+ media_type = source .get ("media_type" , "" )
219+ modality = (
220+ "document"
221+ if block_type == "document"
222+ else get_modality_from_mime_type (media_type )
223+ )
224+
225+ if source_type == "base64" :
226+ return {
227+ "type" : "blob" ,
228+ "modality" : modality ,
229+ "mime_type" : media_type ,
230+ "content" : source .get ("data" , "" ),
231+ }
232+ elif source_type == "url" :
233+ return {
234+ "type" : "uri" ,
235+ "modality" : modality ,
236+ "mime_type" : media_type ,
237+ "uri" : source .get ("url" , "" ),
238+ }
239+ elif source_type == "file" :
240+ return {
241+ "type" : "file" ,
242+ "modality" : modality ,
243+ "mime_type" : media_type ,
244+ "file_id" : source .get ("file_id" , "" ),
245+ }
246+
247+ return None
248+
249+
250+ def transform_google_content_part (
251+ content_part : "Dict[str, Any]" ,
252+ ) -> "Optional[Dict[str, Any]]" :
253+ """
254+ Transform a Google GenAI content part to Sentry's standardized format.
255+
256+ This handles the Google GenAI inline_data and file_data formats.
257+
258+ Input format:
259+ - {"inline_data": {"mime_type": "...", "data": "..."}}
260+ - {"file_data": {"mime_type": "...", "file_uri": "..."}}
261+
262+ Output format (one of):
263+ - {"type": "blob", "modality": "...", "mime_type": "...", "content": "..."}
264+ - {"type": "uri", "modality": "...", "mime_type": "...", "uri": "..."}
265+
266+ Args:
267+ content_part: A dictionary representing a content part from Google GenAI
268+
269+ Returns:
270+ A transformed dictionary in standardized format, or None if the format
271+ is not Google format or transformation fails.
272+ """
273+ if not isinstance (content_part , dict ):
274+ return None
275+
276+ # Handle Google inline_data format
277+ if "inline_data" in content_part :
278+ inline_data = content_part .get ("inline_data" )
279+ if isinstance (inline_data , dict ):
280+ mime_type = inline_data .get ("mime_type" , "" )
281+ return {
282+ "type" : "blob" ,
283+ "modality" : get_modality_from_mime_type (mime_type ),
284+ "mime_type" : mime_type ,
285+ "content" : inline_data .get ("data" , "" ),
286+ }
287+ return None
288+
289+ # Handle Google file_data format
290+ if "file_data" in content_part :
291+ file_data = content_part .get ("file_data" )
292+ if isinstance (file_data , dict ):
293+ mime_type = file_data .get ("mime_type" , "" )
294+ return {
295+ "type" : "uri" ,
296+ "modality" : get_modality_from_mime_type (mime_type ),
297+ "mime_type" : mime_type ,
298+ "uri" : file_data .get ("file_uri" , "" ),
299+ }
300+ return None
301+
302+ return None
303+
304+
305+ def transform_generic_content_part (
306+ content_part : "Dict[str, Any]" ,
307+ ) -> "Optional[Dict[str, Any]]" :
308+ """
309+ Transform a generic/LangChain-style content part to Sentry's standardized format.
310+
311+ This handles generic formats where the type indicates the modality and
312+ the data is provided via direct base64, url, or file_id fields.
313+
314+ Input format:
315+ - {"type": "image", "base64": "...", "mime_type": "..."}
316+ - {"type": "audio", "url": "...", "mime_type": "..."}
317+ - {"type": "video", "base64": "...", "mime_type": "..."}
318+ - {"type": "file", "file_id": "...", "mime_type": "..."}
319+
320+ Output format (one of):
321+ - {"type": "blob", "modality": "...", "mime_type": "...", "content": "..."}
322+ - {"type": "uri", "modality": "...", "mime_type": "...", "uri": "..."}
323+ - {"type": "file", "modality": "...", "mime_type": "...", "file_id": "..."}
324+
325+ Args:
326+ content_part: A dictionary representing a content part in generic format
327+
328+ Returns:
329+ A transformed dictionary in standardized format, or None if the format
330+ is not generic format or transformation fails.
331+ """
332+ if not isinstance (content_part , dict ):
333+ return None
334+
335+ block_type = content_part .get ("type" )
336+
337+ if block_type not in ("image" , "audio" , "video" , "file" ):
338+ return None
339+
340+ # Ensure it's not Anthropic format (which also uses type: "image")
341+ if "source" in content_part :
342+ return None
343+
344+ mime_type = content_part .get ("mime_type" , "" )
345+ modality = block_type if block_type != "file" else "document"
346+
347+ # Check for base64 encoded content
348+ if "base64" in content_part :
349+ return {
350+ "type" : "blob" ,
351+ "modality" : modality ,
352+ "mime_type" : mime_type ,
353+ "content" : content_part .get ("base64" , "" ),
354+ }
355+ # Check for URL reference
356+ elif "url" in content_part :
357+ return {
358+ "type" : "uri" ,
359+ "modality" : modality ,
360+ "mime_type" : mime_type ,
361+ "uri" : content_part .get ("url" , "" ),
362+ }
363+ # Check for file_id reference
364+ elif "file_id" in content_part :
365+ return {
366+ "type" : "file" ,
367+ "modality" : modality ,
368+ "mime_type" : mime_type ,
369+ "file_id" : content_part .get ("file_id" , "" ),
370+ }
371+
372+ return None
373+
374+
375+ def transform_content_part (
376+ content_part : "Dict[str, Any]" ,
377+ ) -> "Optional[Dict[str, Any]]" :
378+ """
379+ Transform a content part from various AI SDK formats to Sentry's standardized format.
380+
381+ This is a heuristic dispatcher that detects the format and delegates to the
382+ appropriate SDK-specific transformer. For direct SDK integration, prefer using
383+ the specific transformers directly:
384+ - transform_openai_content_part() for OpenAI/LiteLLM
385+ - transform_anthropic_content_part() for Anthropic
386+ - transform_google_content_part() for Google GenAI
387+ - transform_generic_content_part() for LangChain and other generic formats
388+
389+ Detection order:
390+ 1. OpenAI: type == "image_url"
391+ 2. Google: "inline_data" or "file_data" keys present
392+ 3. Anthropic: type in ("image", "document") with "source" key
393+ 4. Generic: type in ("image", "audio", "video", "file") with base64/url/file_id
394+
395+ Output format (one of):
396+ - {"type": "blob", "modality": "...", "mime_type": "...", "content": "..."}
397+ - {"type": "uri", "modality": "...", "mime_type": "...", "uri": "..."}
398+ - {"type": "file", "modality": "...", "mime_type": "...", "file_id": "..."}
399+
400+ Args:
401+ content_part: A dictionary representing a content part from an AI SDK
402+
403+ Returns:
404+ A transformed dictionary in standardized format, or None if the format
405+ is unrecognized or transformation fails.
406+ """
407+ if not isinstance (content_part , dict ):
408+ return None
409+
410+ # Try OpenAI format first (most common, clear indicator)
411+ result = transform_openai_content_part (content_part )
412+ if result is not None :
413+ return result
414+
415+ # Try Google format (unique keys make it easy to detect)
416+ result = transform_google_content_part (content_part )
417+ if result is not None :
418+ return result
419+
420+ # Try Anthropic format (has "source" key)
421+ result = transform_anthropic_content_part (content_part )
422+ if result is not None :
423+ return result
424+
425+ # Try generic format as fallback
426+ result = transform_generic_content_part (content_part )
427+ if result is not None :
428+ return result
429+
430+ # Unrecognized format
431+ return None
432+
433+
434+ def transform_message_content (content : "Any" ) -> "Any" :
435+ """
436+ Transform message content, handling both string content and list of content blocks.
437+
438+ For list content, each item is transformed using transform_content_part().
439+ Items that cannot be transformed (return None) are kept as-is.
440+
441+ Args:
442+ content: Message content - can be a string, list of content blocks, or other
443+
444+ Returns:
445+ - String content: returned as-is
446+ - List content: list with each transformable item converted to standardized format
447+ - Other: returned as-is
448+ """
449+ if isinstance (content , str ):
450+ return content
451+
452+ if isinstance (content , (list , tuple )):
453+ transformed = []
454+ for item in content :
455+ if isinstance (item , dict ):
456+ result = transform_content_part (item )
457+ # If transformation succeeded, use the result; otherwise keep original
458+ transformed .append (result if result is not None else item )
459+ else :
460+ transformed .append (item )
461+ return transformed
462+
463+ return content
464+
465+
75466def _normalize_data (data : "Any" , unpack : bool = True ) -> "Any" :
76467 # convert pydantic data (e.g. OpenAI v1+) to json compatible format
77468 if hasattr (data , "model_dump" ):
0 commit comments