diff --git a/src/api/providers/base-openai-compatible-provider.ts b/src/api/providers/base-openai-compatible-provider.ts index fc3d769ae2a..fb2b36c20bf 100644 --- a/src/api/providers/base-openai-compatible-provider.ts +++ b/src/api/providers/base-openai-compatible-provider.ts @@ -118,7 +118,7 @@ export abstract class BaseOpenAiCompatibleProvider const stream = await this.createStream(systemPrompt, messages, metadata) const matcher = new TagMatcher( - "think", + ["think", "thought"], (chunk) => ({ type: chunk.matched ? "reasoning" : "text", diff --git a/src/api/providers/lm-studio.ts b/src/api/providers/lm-studio.ts index a771394c535..6f72ea0ce52 100644 --- a/src/api/providers/lm-studio.ts +++ b/src/api/providers/lm-studio.ts @@ -105,7 +105,7 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan } const matcher = new TagMatcher( - "think", + ["think", "thought"], (chunk) => ({ type: chunk.matched ? "reasoning" : "text", diff --git a/src/api/providers/native-ollama.ts b/src/api/providers/native-ollama.ts index 99c1dc03cfa..7ee91282a42 100644 --- a/src/api/providers/native-ollama.ts +++ b/src/api/providers/native-ollama.ts @@ -215,7 +215,7 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio ] const matcher = new TagMatcher( - "think", + ["think", "thought"], (chunk) => ({ type: chunk.matched ? "reasoning" : "text", diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index 33b29abcafe..8842514a5b6 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -178,7 +178,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl } const matcher = new TagMatcher( - "think", + ["think", "thought"], (chunk) => ({ type: chunk.matched ? "reasoning" : "text", diff --git a/src/core/assistant-message/presentAssistantMessage.ts b/src/core/assistant-message/presentAssistantMessage.ts index 7f5862be154..8e18cfb5fac 100644 --- a/src/core/assistant-message/presentAssistantMessage.ts +++ b/src/core/assistant-message/presentAssistantMessage.ts @@ -287,9 +287,11 @@ export async function presentAssistantMessage(cline: Task) { // Have to do this for partial and complete since sending // content in thinking tags to markdown renderer will // automatically be removed. - // Strip any streamed tags from text output. + // Strip any streamed or tags from text output. content = content.replace(/\s?/g, "") content = content.replace(/\s?<\/thinking>/g, "") + content = content.replace(/\s?/g, "") + content = content.replace(/\s?<\/thought>/g, "") } await cline.say("text", content, undefined, block.partial) diff --git a/src/utils/__tests__/tag-matcher.spec.ts b/src/utils/__tests__/tag-matcher.spec.ts new file mode 100644 index 00000000000..fadd449afd8 --- /dev/null +++ b/src/utils/__tests__/tag-matcher.spec.ts @@ -0,0 +1,135 @@ +import { TagMatcher } from "../tag-matcher" + +describe("TagMatcher", () => { + describe("single tag name (backward compatibility)", () => { + it("should extract content inside tags", () => { + const matcher = new TagMatcher("think") + const result = matcher.final("reasoning here output text") + expect(result).toEqual([ + { matched: true, data: "reasoning here" }, + { matched: false, data: " output text" }, + ]) + }) + + it("should handle streamed chunks", () => { + const matcher = new TagMatcher("think") + const chunks = [] + chunks.push(...matcher.update("reason")) + chunks.push(...matcher.update("ing")) + chunks.push(...matcher.final(" done")) + const allData = chunks.reduce( + (acc, c) => { + const key = c.matched ? "matched" : "unmatched" + acc[key] += c.data + return acc + }, + { matched: "", unmatched: "" }, + ) + expect(allData.matched).toBe("reasoning") + expect(allData.unmatched).toBe(" done") + }) + + it("should pass through text with no tags", () => { + const matcher = new TagMatcher("think") + const result = matcher.final("just some text") + expect(result).toEqual([{ matched: false, data: "just some text" }]) + }) + + it("tagName getter returns first tag name", () => { + const matcher = new TagMatcher("think") + expect(matcher.tagName).toBe("think") + }) + }) + + describe("multiple tag names", () => { + it("should extract content inside tags", () => { + const matcher = new TagMatcher(["think", "thought"]) + const result = matcher.final("reasoning here output text") + expect(result).toEqual([ + { matched: true, data: "reasoning here" }, + { matched: false, data: " output text" }, + ]) + }) + + it("should still extract content inside tags", () => { + const matcher = new TagMatcher(["think", "thought"]) + const result = matcher.final("reasoning here output text") + expect(result).toEqual([ + { matched: true, data: "reasoning here" }, + { matched: false, data: " output text" }, + ]) + }) + + it("should handle streamed tags across chunks", () => { + const matcher = new TagMatcher(["think", "thought"]) + const chunks = [] + chunks.push(...matcher.update("my rea")) + chunks.push(...matcher.update("soning")) + chunks.push(...matcher.final(" answer")) + const allData = chunks.reduce( + (acc, c) => { + const key = c.matched ? "matched" : "unmatched" + acc[key] += c.data + return acc + }, + { matched: "", unmatched: "" }, + ) + expect(allData.matched).toBe("my reasoning") + expect(allData.unmatched).toBe(" answer") + }) + + it("should not match mismatched open/close tags", () => { + // opened but close - should not match as valid close + const matcher = new TagMatcher(["think", "thought"]) + const result = matcher.final("contentmore") + // The close tag won't match because activeTagName is "think" + // so is not recognized as closing it + const matchedData = result.filter((c) => c.matched).map((c) => c.data) + const unmatchedData = result.filter((c) => !c.matched).map((c) => c.data) + // Content stays matched because the tag was never properly closed + expect(matchedData.join("")).toContain("content") + expect(unmatchedData.join("")).not.toContain("content") + }) + + it("should handle text before thought tag", () => { + const matcher = new TagMatcher(["think", "thought"], undefined, 0) + const result = matcher.final("reasoninganswer") + expect(result).toEqual([ + { matched: true, data: "reasoning" }, + { matched: false, data: "answer" }, + ]) + }) + + it("should ignore non-matching tags", () => { + const matcher = new TagMatcher(["think", "thought"]) + const result = matcher.final("
not a match
") + expect(result).toEqual([{ matched: false, data: "
not a match
" }]) + }) + + it("tagName getter returns first tag name from array", () => { + const matcher = new TagMatcher(["think", "thought"]) + expect(matcher.tagName).toBe("think") + }) + + it("tagNames contains all provided tag names", () => { + const matcher = new TagMatcher(["think", "thought"]) + expect(matcher.tagNames).toEqual(["think", "thought"]) + }) + }) + + describe("with transform", () => { + it("should apply transform to thought tag results", () => { + const matcher = new TagMatcher(["think", "thought"], (chunk) => ({ + type: chunk.matched ? "reasoning" : "text", + text: chunk.data, + })) + const result = matcher.final("my reasoningmy answer") + expect(result).toEqual([ + { type: "reasoning", text: "my reasoning" }, + { type: "text", text: "my answer" }, + ]) + }) + }) +}) diff --git a/src/utils/tag-matcher.ts b/src/utils/tag-matcher.ts index 38d99a2904d..ba1778b87fb 100644 --- a/src/utils/tag-matcher.ts +++ b/src/utils/tag-matcher.ts @@ -17,11 +17,23 @@ export class TagMatcher { state: "TEXT" | "TAG_OPEN" | "TAG_CLOSE" = "TEXT" depth = 0 pointer = 0 + readonly tagNames: string[] + private candidates: number[] = [] + private activeTagName: string | undefined constructor( - readonly tagName: string, + tagName: string | string[], readonly transform?: (chunks: TagMatcherResult) => Result, readonly position = 0, - ) {} + ) { + this.tagNames = Array.isArray(tagName) ? tagName : [tagName] + } + + /** + * For backward compatibility, return the first tag name. + */ + get tagName(): string { + return this.tagNames[0] + } private collect() { if (!this.cached.length) { return @@ -48,6 +60,47 @@ export class TagMatcher { return chunks.map(this.transform) } + /** + * Check if any remaining candidate tag name has the given length. + */ + private _anyCompletedCandidate(): boolean { + return this.candidates.some((i) => this.tagNames[i].length === this.index) + } + + /** + * Get the first completed candidate tag name (fully matched at current index). + */ + private _getCompletedCandidate(): string | undefined { + for (const i of this.candidates) { + if (this.tagNames[i].length === this.index) { + return this.tagNames[i] + } + } + return undefined + } + + /** + * Filter candidates to only those matching the given char at the current index. + */ + private _filterCandidates(char: string): boolean { + this.candidates = this.candidates.filter((i) => this.tagNames[i][this.index] === char) + return this.candidates.length > 0 + } + + /** + * Reset candidates to all tag name indices (for open tags) or + * only the active tag name (for close tags). + */ + private _resetCandidates(closeTag: boolean) { + if (closeTag && this.activeTagName !== undefined) { + // For closing tags, only match the tag that was opened + const idx = this.tagNames.indexOf(this.activeTagName) + this.candidates = idx >= 0 ? [idx] : this.tagNames.map((_, i) => i) + } else { + this.candidates = this.tagNames.map((_, i) => i) + } + } + private _update(chunk: string) { for (const char of chunk) { this.cached.push(char) @@ -57,38 +110,42 @@ export class TagMatcher { if (char === "<" && (this.pointer <= this.position + 1 || this.matched)) { this.state = "TAG_OPEN" this.index = 0 + this._resetCandidates(false) } else { this.collect() } } else if (this.state === "TAG_OPEN") { - if (char === ">" && this.index === this.tagName.length) { + if (char === ">" && this._anyCompletedCandidate()) { this.state = "TEXT" if (!this.matched) { this.cached = [] } + this.activeTagName = this._getCompletedCandidate() this.depth++ this.matched = true } else if (this.index === 0 && char === "/") { this.state = "TAG_CLOSE" - } else if (char === " " && (this.index === 0 || this.index === this.tagName.length)) { + this._resetCandidates(true) + } else if (char === " " && (this.index === 0 || this._anyCompletedCandidate())) { continue - } else if (this.tagName[this.index] === char) { + } else if (this._filterCandidates(char)) { this.index++ } else { this.state = "TEXT" this.collect() } } else if (this.state === "TAG_CLOSE") { - if (char === ">" && this.index === this.tagName.length) { + if (char === ">" && this._anyCompletedCandidate()) { this.state = "TEXT" this.depth-- this.matched = this.depth > 0 if (!this.matched) { this.cached = [] + this.activeTagName = undefined } - } else if (char === " " && (this.index === 0 || this.index === this.tagName.length)) { + } else if (char === " " && (this.index === 0 || this._anyCompletedCandidate())) { continue - } else if (this.tagName[this.index] === char) { + } else if (this._filterCandidates(char)) { this.index++ } else { this.state = "TEXT"