feat: add span processor for tool input.value and output.value

andrei-rusu · andrei-rusu · commit fe39a23dec0d · 2025-12-03T18:16:19.000+02:00
diff --git a/src/uipath_llamaindex/_cli/_tracing/_attribute_normalizer.py b/src/uipath_llamaindex/_cli/_tracing/_attribute_normalizer.py
@@ -0,0 +1,87 @@
+"""OpenTelemetry SpanProcessor for normalizing LlamaIndex tool call attributes.
+
+LlamaIndex wraps tool arguments in {"kwargs": {...}} which differs from other
+frameworks like LangChain that use flat {"arg": value} format. This processor
+normalizes the format at the span level before exporters or dev terminal read it.
+"""
+
+import json
+import logging
+from typing import Any, Optional
+
+from opentelemetry.context import Context
+from opentelemetry.sdk.trace import ReadableSpan, Span, SpanProcessor
+
+logger = logging.getLogger(__name__)
+
+
+class AttributeNormalizingSpanProcessor(SpanProcessor):
+    """Normalizes LlamaIndex tool call attributes to match other frameworks.
+
+    Unwraps {"kwargs": {...}} to flat {...} format for consistency with LangChain.
+    """
+
+    def on_start(self, span: Span, parent_context: Optional[Context] = None) -> None:
+        """Called when span starts - no action needed."""
+        pass
+
+    def on_end(self, span: ReadableSpan) -> None:
+        """Normalize tool call attributes before span is consumed by exporters/terminal."""
+        if not span._attributes:
+            return
+
+        try:
+            # Get the mutable internal attributes dict
+            attrs: dict = span._attributes  # type: ignore[attr-defined]
+
+            if attrs.get("openinference.span.kind", None) == "TOOL":
+                # Normalize tool call attributes
+                for key in ("input.value", "output.value"):
+                    if key in attrs:
+                        original = attrs[key]
+                        normalized = self._normalize_attribute(key, original)
+
+                        if normalized != original:
+                            attrs[key] = normalized
+                            if logger.isEnabledFor(logging.DEBUG):
+                                logger.debug(
+                                    f"Normalized {key} in span '{span.name}': "
+                                    f"{original[:50]}... → {normalized[:50]}..."
+                                )
+
+        except Exception as e:
+            # Don't crash span processing if normalization fails
+            logger.debug(
+                f"Failed to normalize span '{getattr(span, 'name', 'unknown')}': {e}"
+            )
+
+    def _normalize_attribute(self, key: str, value: Any) -> str:
+        """Unwrap LlamaIndex's kwargs wrapper if present."""
+        if isinstance(value, str):
+            try:
+                value = json.loads(value)
+            except Exception:
+                pass
+        if isinstance(value, dict):
+            if key == "input.value":
+                if "kwargs" in value:
+                    value = json.dumps(value["kwargs"])
+            elif key == "output.value":
+                value = json.dumps(
+                    {
+                        "content": value.get("raw_output"),
+                        "status": "success"
+                        if not value.get("is_error", False)
+                        else "error",
+                        "tool_call_id": value.get("tool_call_id"),
+                    }
+                )
+        return str(value)
+
+    def shutdown(self) -> None:
+        """Called on processor shutdown - no cleanup needed."""
+        pass
+
+    def force_flush(self, timeout_millis: int = 30000) -> bool:
+        """Force flush - always succeeds (nothing to flush)."""
+        return True
diff --git a/src/uipath_llamaindex/runtime/factory.py b/src/uipath_llamaindex/runtime/factory.py
@@ -17,6 +17,9 @@
 from uipath.runtime.errors import UiPathErrorCategory
 from workflows import Workflow
 
+from uipath_llamaindex._cli._tracing._attribute_normalizer import (
+    AttributeNormalizingSpanProcessor,
+)
 from uipath_llamaindex.runtime.config import LlamaIndexConfig
 from uipath_llamaindex.runtime.errors import (
     UiPathLlamaIndexErrorCode,
@@ -55,6 +58,11 @@ def _setup_instrumentation(self, trace_manager: UiPathTraceManager | None) -> No
         LlamaIndexInstrumentor().instrument()
         UiPathSpanUtils.register_current_span_provider(get_current_span)
 
+        if trace_manager:
+            trace_manager.tracer_provider.add_span_processor(
+                AttributeNormalizingSpanProcessor()
+            )
+
     def _get_storage_path(self) -> str:
         """Get the storage path for workflow state."""
         if self._storage_path is None: