Skip to content

Commit d47bfef

Browse files
committed
fix(langchain): normalize tool definitions and tool_calls for Langfuse UI
1. Add _to_langfuse_tool() to convert Anthropic (input_schema) and OpenAI (function wrapper) formats to flat {name, description, parameters} shape. 2. Structure LLM input as {messages, tools} when tools are present so extractToolsFromObservation finds definitions at the top-level tools key. 3. Convert AIMessage.tool_calls and invalid_tool_calls from {name, args, id} to {id, type, name, arguments} with args serialized as a JSON string. Only assign to message_dict when the converted list is non-empty. Fixes: langfuse/langfuse#11850
1 parent caddeff commit d47bfef

2 files changed

Lines changed: 158 additions & 15 deletions

File tree

langfuse/langchain/CallbackHandler.py

Lines changed: 98 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
from contextvars import Token
23
from typing import (
34
Any,
@@ -35,6 +36,38 @@
3536
from langfuse.logger import langfuse_logger
3637
from langfuse.types import TraceContext
3738

39+
40+
def _to_langfuse_tool(tool: Any) -> Any:
41+
"""Normalize a tool definition to Langfuse's LLMToolDefinitionSchema: {name, description, parameters}.
42+
43+
LangChain providers serialize tools differently depending on the backend:
44+
- Anthropic (ChatAnthropic): {name, description, input_schema}
45+
- OpenAI / LiteLLM: {type: "function", function: {name, description, parameters}}
46+
47+
Langfuse's backend (extractToolsFromObservation) validates tools against
48+
LLMToolDefinitionSchema, which expects the flat format {name, description, parameters}.
49+
Both provider formats are normalized here into that canonical shape.
50+
"""
51+
if not isinstance(tool, dict):
52+
return tool
53+
# OpenAI wrapper: {type: "function", function: {name, description, parameters}}
54+
if tool.get("type") == "function" and "function" in tool:
55+
fn = tool["function"]
56+
return {
57+
"name": fn.get("name", ""),
58+
"description": fn.get("description", ""),
59+
"parameters": fn.get("parameters", {}),
60+
}
61+
# Anthropic format: {name, description, input_schema}
62+
if "name" in tool and "input_schema" in tool:
63+
return {
64+
"name": tool["name"],
65+
"description": tool.get("description", ""),
66+
"parameters": tool["input_schema"],
67+
}
68+
return tool
69+
70+
3871
try:
3972
import langchain
4073

@@ -841,9 +874,16 @@ def __on_llm_action(
841874
self._child_to_parent_run_id_map[run_id] = parent_run_id
842875

843876
try:
877+
observation_input: Any = prompts
844878
tools = kwargs.get("invocation_params", {}).get("tools", None)
845879
if tools and isinstance(tools, list):
846-
prompts.extend([{"role": "tool", "content": tool} for tool in tools])
880+
# Structure input as {messages, tools} so extractToolsFromObservation
881+
# can find tool definitions at the top-level tools key — the canonical
882+
# format expected by the backend's LLMToolDefinitionSchema.
883+
observation_input = {
884+
"messages": prompts,
885+
"tools": [_to_langfuse_tool(t) for t in tools],
886+
}
847887

848888
model_name = self._parse_model_and_log_errors(
849889
serialized=serialized, metadata=metadata, kwargs=kwargs
@@ -868,7 +908,7 @@ def __on_llm_action(
868908

869909
content = {
870910
"name": self.get_langchain_run_name(serialized, **kwargs),
871-
"input": prompts,
911+
"input": observation_input,
872912
"metadata": self.__join_tags_and_metadata(
873913
tags,
874914
metadata,
@@ -1056,14 +1096,66 @@ def _convert_message_to_dict(self, message: BaseMessage) -> Dict[str, Any]:
10561096
and message.tool_calls is not None
10571097
and len(message.tool_calls) > 0
10581098
):
1059-
message_dict["tool_calls"] = message.tool_calls
1099+
# Convert LangChain's tool_calls format {name, args, id} to
1100+
# the flat ToolCallSchema: {id, type, name, arguments}.
1101+
# Langfuse's frontend ToolCallSchema expects the flat format with
1102+
# arguments as a JSON string.
1103+
converted_tool_calls = []
1104+
for tc in message.tool_calls:
1105+
if not isinstance(tc, dict):
1106+
langfuse_logger.debug(
1107+
"Skipping tool_call entry that is not a dict: %s", tc
1108+
)
1109+
continue
1110+
try:
1111+
arguments = json.dumps(tc.get("args", {}))
1112+
except (TypeError, ValueError) as e:
1113+
langfuse_logger.debug(
1114+
"Failed to serialize tool call args to JSON: %s", e
1115+
)
1116+
arguments = "{}"
1117+
converted_tool_calls.append(
1118+
{
1119+
"id": tc.get("id", ""),
1120+
"type": "function",
1121+
"name": tc.get("name", ""),
1122+
"arguments": arguments,
1123+
}
1124+
)
1125+
if converted_tool_calls:
1126+
message_dict["tool_calls"] = converted_tool_calls
10601127

10611128
if (
1062-
hasattr(message, "invalid_tool_calls")
1063-
and message.invalid_tool_calls is not None
1129+
hasattr(message, "invalid_tool_calls")
1130+
and message.invalid_tool_calls is not None
10641131
and len(message.invalid_tool_calls) > 0
10651132
):
1066-
message_dict["invalid_tool_calls"] = message.invalid_tool_calls
1133+
converted_invalid_tool_calls = []
1134+
for tc in message.invalid_tool_calls:
1135+
if not isinstance(tc, dict):
1136+
langfuse_logger.debug(
1137+
"Skipping invalid_tool_call entry that is not a dict: %s",
1138+
tc,
1139+
)
1140+
continue
1141+
try:
1142+
arguments = json.dumps(tc.get("args", {}))
1143+
except (TypeError, ValueError) as e:
1144+
langfuse_logger.debug(
1145+
"Failed to serialize invalid tool call args to JSON: %s", e
1146+
)
1147+
arguments = "{}"
1148+
converted_invalid_tool_calls.append(
1149+
{
1150+
"id": tc.get("id", ""),
1151+
"type": "function",
1152+
"name": tc.get("name", ""),
1153+
"arguments": arguments,
1154+
"error": tc.get("error", ""),
1155+
}
1156+
)
1157+
if converted_invalid_tool_calls:
1158+
message_dict["invalid_tool_calls"] = converted_invalid_tool_calls
10671159

10681160
elif isinstance(message, SystemMessage):
10691161
message_dict = {"role": "system", "content": message.content}

tests/test_langchain.py

Lines changed: 60 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,58 @@
1818

1919
from langfuse._client.client import Langfuse
2020
from langfuse.langchain import CallbackHandler
21+
from langfuse.langchain.CallbackHandler import _to_langfuse_tool
2122
from tests.utils import create_uuid, encode_file_to_base64, get_api
2223

2324

25+
# --- Unit tests for _to_langfuse_tool ---
26+
27+
28+
def test_to_langfuse_tool_openai_format():
29+
tool = {
30+
"type": "function",
31+
"function": {
32+
"name": "get_weather",
33+
"description": "Get the weather",
34+
"parameters": {"type": "object", "properties": {}},
35+
},
36+
}
37+
result = _to_langfuse_tool(tool)
38+
assert result == {
39+
"name": "get_weather",
40+
"description": "Get the weather",
41+
"parameters": {"type": "object", "properties": {}},
42+
}
43+
44+
45+
def test_to_langfuse_tool_anthropic_format():
46+
tool = {
47+
"name": "get_weather",
48+
"description": "Get the weather",
49+
"input_schema": {"type": "object", "properties": {}},
50+
}
51+
result = _to_langfuse_tool(tool)
52+
assert result == {
53+
"name": "get_weather",
54+
"description": "Get the weather",
55+
"parameters": {"type": "object", "properties": {}},
56+
}
57+
58+
59+
def test_to_langfuse_tool_passthrough_unknown_dict():
60+
tool = {"name": "my_tool", "custom_field": "value"}
61+
result = _to_langfuse_tool(tool)
62+
assert result == tool
63+
64+
65+
def test_to_langfuse_tool_passthrough_non_dict():
66+
result = _to_langfuse_tool("not a dict")
67+
assert result == "not a dict"
68+
69+
70+
# --- End unit tests ---
71+
72+
2473
def test_callback_generated_from_trace_chat():
2574
langfuse = Langfuse()
2675

@@ -762,15 +811,17 @@ class GetWeather(BaseModel):
762811

763812
for generation in generations:
764813
assert generation.input is not None
765-
tool_messages = [msg for msg in generation.input if msg["role"] == "tool"]
766-
assert len(tool_messages) == 2
767-
assert any(
768-
"standardize_address" == msg["content"]["function"]["name"]
769-
for msg in tool_messages
770-
)
771-
assert any(
772-
"get_weather" == msg["content"]["function"]["name"] for msg in tool_messages
773-
)
814+
# Input is structured as {messages, tools} for extractToolsFromObservation
815+
assert "messages" in generation.input
816+
assert "tools" in generation.input
817+
tool_names = [t["name"] for t in generation.input["tools"]]
818+
assert "standardize_address" in tool_names
819+
assert "get_weather" in tool_names
820+
# Each tool must conform to LLMToolDefinitionSchema
821+
for t in generation.input["tools"]:
822+
assert "name" in t
823+
assert "description" in t
824+
assert "parameters" in t
774825

775826
assert generation.output is not None
776827

0 commit comments

Comments
 (0)