Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/api/providers/base-openai-compatible-provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ export abstract class BaseOpenAiCompatibleProvider<ModelName extends string>
const stream = await this.createStream(systemPrompt, messages, metadata)

const matcher = new TagMatcher(
"think",
["think", "thought"],
(chunk) =>
({
type: chunk.matched ? "reasoning" : "text",
Expand Down
2 changes: 1 addition & 1 deletion src/api/providers/lm-studio.ts
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
}

const matcher = new TagMatcher(
"think",
["think", "thought"],
(chunk) =>
({
type: chunk.matched ? "reasoning" : "text",
Expand Down
2 changes: 1 addition & 1 deletion src/api/providers/native-ollama.ts
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio
]

const matcher = new TagMatcher(
"think",
["think", "thought"],
(chunk) =>
({
type: chunk.matched ? "reasoning" : "text",
Expand Down
2 changes: 1 addition & 1 deletion src/api/providers/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
}

const matcher = new TagMatcher(
"think",
["think", "thought"],
(chunk) =>
({
type: chunk.matched ? "reasoning" : "text",
Expand Down
4 changes: 3 additions & 1 deletion src/core/assistant-message/presentAssistantMessage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -287,9 +287,11 @@ export async function presentAssistantMessage(cline: Task) {
// Have to do this for partial and complete since sending
// content in thinking tags to markdown renderer will
// automatically be removed.
// Strip any streamed <thinking> tags from text output.
// Strip any streamed <thinking> or <thought> tags from text output.
content = content.replace(/<thinking>\s?/g, "")
content = content.replace(/\s?<\/thinking>/g, "")
content = content.replace(/<thought>\s?/g, "")
content = content.replace(/\s?<\/thought>/g, "")
}

await cline.say("text", content, undefined, block.partial)
Expand Down
135 changes: 135 additions & 0 deletions src/utils/__tests__/tag-matcher.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
import { TagMatcher } from "../tag-matcher"

describe("TagMatcher", () => {
describe("single tag name (backward compatibility)", () => {
it("should extract content inside <think> tags", () => {
const matcher = new TagMatcher("think")
const result = matcher.final("<think>reasoning here</think> output text")
expect(result).toEqual([
{ matched: true, data: "reasoning here" },
{ matched: false, data: " output text" },
])
})

it("should handle streamed chunks", () => {
const matcher = new TagMatcher("think")
const chunks = []
chunks.push(...matcher.update("<thi"))
chunks.push(...matcher.update("nk>reason"))
chunks.push(...matcher.update("ing</think>"))
chunks.push(...matcher.final(" done"))
const allData = chunks.reduce(
(acc, c) => {
const key = c.matched ? "matched" : "unmatched"
acc[key] += c.data
return acc
},
{ matched: "", unmatched: "" },
)
expect(allData.matched).toBe("reasoning")
expect(allData.unmatched).toBe(" done")
})

it("should pass through text with no tags", () => {
const matcher = new TagMatcher("think")
const result = matcher.final("just some text")
expect(result).toEqual([{ matched: false, data: "just some text" }])
})

it("tagName getter returns first tag name", () => {
const matcher = new TagMatcher("think")
expect(matcher.tagName).toBe("think")
})
})

describe("multiple tag names", () => {
it("should extract content inside <thought> tags", () => {
const matcher = new TagMatcher(["think", "thought"])
const result = matcher.final("<thought>reasoning here</thought> output text")
expect(result).toEqual([
{ matched: true, data: "reasoning here" },
{ matched: false, data: " output text" },
])
})

it("should still extract content inside <think> tags", () => {
const matcher = new TagMatcher(["think", "thought"])
const result = matcher.final("<think>reasoning here</think> output text")
expect(result).toEqual([
{ matched: true, data: "reasoning here" },
{ matched: false, data: " output text" },
])
})

it("should handle streamed <thought> tags across chunks", () => {
const matcher = new TagMatcher(["think", "thought"])
const chunks = []
chunks.push(...matcher.update("<thou"))
chunks.push(...matcher.update("ght>my rea"))
chunks.push(...matcher.update("soning</thought>"))
chunks.push(...matcher.final(" answer"))
const allData = chunks.reduce(
(acc, c) => {
const key = c.matched ? "matched" : "unmatched"
acc[key] += c.data
return acc
},
{ matched: "", unmatched: "" },
)
expect(allData.matched).toBe("my reasoning")
expect(allData.unmatched).toBe(" answer")
})

it("should not match mismatched open/close tags", () => {
// <think> opened but </thought> close - should not match as valid close
const matcher = new TagMatcher(["think", "thought"])
const result = matcher.final("<think>content</thought>more")
// The close tag won't match because activeTagName is "think"
// so </thought> is not recognized as closing it
const matchedData = result.filter((c) => c.matched).map((c) => c.data)
const unmatchedData = result.filter((c) => !c.matched).map((c) => c.data)
// Content stays matched because the tag was never properly closed
expect(matchedData.join("")).toContain("content")
expect(unmatchedData.join("")).not.toContain("content")
})

it("should handle text before thought tag", () => {
const matcher = new TagMatcher(["think", "thought"], undefined, 0)
const result = matcher.final("<thought>reasoning</thought>answer")
expect(result).toEqual([
{ matched: true, data: "reasoning" },
{ matched: false, data: "answer" },
])
})

it("should ignore non-matching tags", () => {
const matcher = new TagMatcher(["think", "thought"])
const result = matcher.final("<div>not a match</div>")
expect(result).toEqual([{ matched: false, data: "<div>not a match</div>" }])
})

it("tagName getter returns first tag name from array", () => {
const matcher = new TagMatcher(["think", "thought"])
expect(matcher.tagName).toBe("think")
})

it("tagNames contains all provided tag names", () => {
const matcher = new TagMatcher(["think", "thought"])
expect(matcher.tagNames).toEqual(["think", "thought"])
})
})

describe("with transform", () => {
it("should apply transform to thought tag results", () => {
const matcher = new TagMatcher(["think", "thought"], (chunk) => ({
type: chunk.matched ? "reasoning" : "text",
text: chunk.data,
}))
const result = matcher.final("<thought>my reasoning</thought>my answer")
expect(result).toEqual([
{ type: "reasoning", text: "my reasoning" },
{ type: "text", text: "my answer" },
])
})
})
})
73 changes: 65 additions & 8 deletions src/utils/tag-matcher.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,23 @@ export class TagMatcher<Result = TagMatcherResult> {
state: "TEXT" | "TAG_OPEN" | "TAG_CLOSE" = "TEXT"
depth = 0
pointer = 0
readonly tagNames: string[]
private candidates: number[] = []
private activeTagName: string | undefined
constructor(
readonly tagName: string,
tagName: string | string[],
readonly transform?: (chunks: TagMatcherResult) => Result,
readonly position = 0,
) {}
) {
this.tagNames = Array.isArray(tagName) ? tagName : [tagName]
}

/**
* For backward compatibility, return the first tag name.
*/
get tagName(): string {
return this.tagNames[0]
}
private collect() {
if (!this.cached.length) {
return
Expand All @@ -48,6 +60,47 @@ export class TagMatcher<Result = TagMatcherResult> {
return chunks.map(this.transform)
}

/**
* Check if any remaining candidate tag name has the given length.
*/
private _anyCompletedCandidate(): boolean {
return this.candidates.some((i) => this.tagNames[i].length === this.index)
}

/**
* Get the first completed candidate tag name (fully matched at current index).
*/
private _getCompletedCandidate(): string | undefined {
for (const i of this.candidates) {
if (this.tagNames[i].length === this.index) {
return this.tagNames[i]
}
}
return undefined
}

/**
* Filter candidates to only those matching the given char at the current index.
*/
private _filterCandidates(char: string): boolean {
this.candidates = this.candidates.filter((i) => this.tagNames[i][this.index] === char)
return this.candidates.length > 0
}

/**
* Reset candidates to all tag name indices (for open tags) or
* only the active tag name (for close tags).
*/
private _resetCandidates(closeTag: boolean) {
if (closeTag && this.activeTagName !== undefined) {
// For closing tags, only match the tag that was opened
const idx = this.tagNames.indexOf(this.activeTagName)
this.candidates = idx >= 0 ? [idx] : this.tagNames.map((_, i) => i)
} else {
this.candidates = this.tagNames.map((_, i) => i)
}
}

private _update(chunk: string) {
for (const char of chunk) {
this.cached.push(char)
Expand All @@ -57,38 +110,42 @@ export class TagMatcher<Result = TagMatcherResult> {
if (char === "<" && (this.pointer <= this.position + 1 || this.matched)) {
this.state = "TAG_OPEN"
this.index = 0
this._resetCandidates(false)
} else {
this.collect()
}
} else if (this.state === "TAG_OPEN") {
if (char === ">" && this.index === this.tagName.length) {
if (char === ">" && this._anyCompletedCandidate()) {
this.state = "TEXT"
if (!this.matched) {
this.cached = []
}
this.activeTagName = this._getCompletedCandidate()
this.depth++
this.matched = true
} else if (this.index === 0 && char === "/") {
this.state = "TAG_CLOSE"
} else if (char === " " && (this.index === 0 || this.index === this.tagName.length)) {
this._resetCandidates(true)
} else if (char === " " && (this.index === 0 || this._anyCompletedCandidate())) {
continue
} else if (this.tagName[this.index] === char) {
} else if (this._filterCandidates(char)) {
this.index++
} else {
this.state = "TEXT"
this.collect()
}
} else if (this.state === "TAG_CLOSE") {
if (char === ">" && this.index === this.tagName.length) {
if (char === ">" && this._anyCompletedCandidate()) {
this.state = "TEXT"
this.depth--
this.matched = this.depth > 0
if (!this.matched) {
this.cached = []
this.activeTagName = undefined
}
} else if (char === " " && (this.index === 0 || this.index === this.tagName.length)) {
} else if (char === " " && (this.index === 0 || this._anyCompletedCandidate())) {
continue
} else if (this.tagName[this.index] === char) {
} else if (this._filterCandidates(char)) {
this.index++
} else {
this.state = "TEXT"
Expand Down
Loading