Skip to content

Commit cad0c2c

Browse files
committed
fix(knowledge): simplify embedding billing — use calculateCost, return modelName
- Use calculateCost() from @/providers/utils instead of inline formula, consistent with how LLM billing works throughout the platform - Return modelName from GenerateEmbeddingsResult so billing uses the actual model (handles custom Azure deployments) instead of a hardcoded fallback string - Fix docs-chunker.ts empty-path fallback to satisfy full GenerateEmbeddingsResult type
1 parent 3b3b5b7 commit cad0c2c

3 files changed

Lines changed: 29 additions & 10 deletions

File tree

apps/sim/lib/chunkers/docs-chunker.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,12 @@ export class DocsChunker {
8484
const { embeddings } =
8585
textChunks.length > 0
8686
? await generateEmbeddings(textChunks)
87-
: { embeddings: [] as number[][] }
87+
: {
88+
embeddings: [] as number[][],
89+
totalTokens: 0,
90+
isBYOK: false,
91+
modelName: 'text-embedding-3-small',
92+
}
8893
const embeddingModel = 'text-embedding-3-small'
8994

9095
const chunks: DocChunk[] = []

apps/sim/lib/knowledge/documents/service.ts

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ import type { ProcessedDocumentTags } from '@/lib/knowledge/types'
4444
import { deleteFile } from '@/lib/uploads/core/storage-service'
4545
import { extractStorageKey } from '@/lib/uploads/utils/file-utils'
4646
import type { DocumentProcessingPayload } from '@/background/knowledge-processing'
47-
import { getEmbeddingModelPricing } from '@/providers/models'
47+
import { calculateCost } from '@/providers/utils'
4848

4949
const logger = createLogger('DocumentService')
5050

@@ -464,6 +464,7 @@ export async function processDocumentAsync(
464464

465465
let totalEmbeddingTokens = 0
466466
let embeddingIsBYOK = false
467+
let embeddingModelName = 'text-embedding-3-small'
467468

468469
await withTimeout(
469470
(async () => {
@@ -509,12 +510,14 @@ export async function processDocumentAsync(
509510
embeddings: batchEmbeddings,
510511
totalTokens: batchTokens,
511512
isBYOK,
513+
modelName,
512514
} = await generateEmbeddings(batch, undefined, kb[0].workspaceId)
513515
for (const emb of batchEmbeddings) {
514516
embeddings.push(emb)
515517
}
516518
totalEmbeddingTokens += batchTokens
517519
embeddingIsBYOK = isBYOK
520+
embeddingModelName = modelName
518521
}
519522
}
520523

@@ -652,18 +655,23 @@ export async function processDocumentAsync(
652655

653656
if (!embeddingIsBYOK && totalEmbeddingTokens > 0 && kb[0].userId) {
654657
try {
655-
const embeddingModel = 'text-embedding-3-small'
656-
const pricing = getEmbeddingModelPricing(embeddingModel)
657-
if (pricing) {
658-
const cost = (totalEmbeddingTokens / 1_000_000) * pricing.input * getCostMultiplier()
658+
const costMultiplier = getCostMultiplier()
659+
const { total: cost } = calculateCost(
660+
embeddingModelName,
661+
totalEmbeddingTokens,
662+
0,
663+
false,
664+
costMultiplier
665+
)
666+
if (cost > 0) {
659667
await recordUsage({
660668
userId: kb[0].userId,
661669
workspaceId: kb[0].workspaceId ?? undefined,
662670
entries: [
663671
{
664672
category: 'model',
665673
source: 'knowledge-base',
666-
description: embeddingModel,
674+
description: embeddingModelName,
667675
cost,
668676
metadata: { inputTokens: totalEmbeddingTokens, outputTokens: 0 },
669677
},

apps/sim/lib/knowledge/embeddings.ts

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -193,12 +193,13 @@ export interface GenerateEmbeddingsResult {
193193
embeddings: number[][]
194194
totalTokens: number
195195
isBYOK: boolean
196+
modelName: string
196197
}
197198

198199
/**
199200
* Generate embeddings for multiple texts with token-aware batching and parallel processing.
200-
* Returns embeddings alongside the actual token count from the API and whether a BYOK key was used.
201-
* Callers should use `totalTokens` and `isBYOK` to record billing via `recordUsage`.
201+
* Returns embeddings alongside actual token count, model name, and whether a workspace BYOK key
202+
* was used (vs. the platform's shared key) — enabling callers to make correct billing decisions.
202203
*/
203204
export async function generateEmbeddings(
204205
texts: string[],
@@ -231,7 +232,12 @@ export async function generateEmbeddings(
231232
totalTokens += batch.totalTokens
232233
}
233234

234-
return { embeddings: allEmbeddings, totalTokens, isBYOK: config.isBYOK }
235+
return {
236+
embeddings: allEmbeddings,
237+
totalTokens,
238+
isBYOK: config.isBYOK,
239+
modelName: config.modelName,
240+
}
235241
}
236242

237243
/**

0 commit comments

Comments
 (0)