@@ -35,6 +35,7 @@ interface EmbeddingConfig {
3535 apiUrl : string
3636 headers : Record < string , string >
3737 modelName : string
38+ isBYOK : boolean
3839}
3940
4041interface EmbeddingResponseItem {
@@ -71,16 +72,19 @@ async function getEmbeddingConfig(
7172 'Content-Type' : 'application/json' ,
7273 } ,
7374 modelName : kbModelName ,
75+ isBYOK : false ,
7476 }
7577 }
7678
7779 let openaiApiKey = env . OPENAI_API_KEY
80+ let isBYOK = false
7881
7982 if ( workspaceId ) {
8083 const byokResult = await getBYOKKey ( workspaceId , 'openai' )
8184 if ( byokResult ) {
8285 logger . info ( 'Using workspace BYOK key for OpenAI embeddings' )
8386 openaiApiKey = byokResult . apiKey
87+ isBYOK = true
8488 }
8589 }
8690
@@ -98,12 +102,16 @@ async function getEmbeddingConfig(
98102 'Content-Type' : 'application/json' ,
99103 } ,
100104 modelName : embeddingModel ,
105+ isBYOK,
101106 }
102107}
103108
104109const EMBEDDING_REQUEST_TIMEOUT_MS = 60_000
105110
106- async function callEmbeddingAPI ( inputs : string [ ] , config : EmbeddingConfig ) : Promise < number [ ] [ ] > {
111+ async function callEmbeddingAPI (
112+ inputs : string [ ] ,
113+ config : EmbeddingConfig
114+ ) : Promise < { embeddings : number [ ] [ ] ; totalTokens : number } > {
107115 return retryWithExponentialBackoff (
108116 async ( ) => {
109117 const useDimensions = supportsCustomDimensions ( config . modelName )
@@ -140,7 +148,10 @@ async function callEmbeddingAPI(inputs: string[], config: EmbeddingConfig): Prom
140148 }
141149
142150 const data : EmbeddingAPIResponse = await response . json ( )
143- return data . data . map ( ( item ) => item . embedding )
151+ return {
152+ embeddings : data . data . map ( ( item ) => item . embedding ) ,
153+ totalTokens : data . usage . total_tokens ,
154+ }
144155 } ,
145156 {
146157 maxRetries : 3 ,
@@ -178,14 +189,22 @@ async function processWithConcurrency<T, R>(
178189 return results
179190}
180191
192+ export interface GenerateEmbeddingsResult {
193+ embeddings : number [ ] [ ]
194+ totalTokens : number
195+ isBYOK : boolean
196+ }
197+
181198/**
182- * Generate embeddings for multiple texts with token-aware batching and parallel processing
199+ * Generate embeddings for multiple texts with token-aware batching and parallel processing.
200+ * Returns embeddings alongside the actual token count from the API and whether a BYOK key was used.
201+ * Callers should use `totalTokens` and `isBYOK` to record billing via `recordUsage`.
183202 */
184203export async function generateEmbeddings (
185204 texts : string [ ] ,
186205 embeddingModel = 'text-embedding-3-small' ,
187206 workspaceId ?: string | null
188- ) : Promise < number [ ] [ ] > {
207+ ) : Promise < GenerateEmbeddingsResult > {
189208 const config = await getEmbeddingConfig ( embeddingModel , workspaceId )
190209
191210 const batches = batchByTokenLimit ( texts , MAX_TOKENS_PER_REQUEST , embeddingModel )
@@ -204,13 +223,15 @@ export async function generateEmbeddings(
204223 )
205224
206225 const allEmbeddings : number [ ] [ ] = [ ]
226+ let totalTokens = 0
207227 for ( const batch of batchResults ) {
208- for ( const emb of batch ) {
228+ for ( const emb of batch . embeddings ) {
209229 allEmbeddings . push ( emb )
210230 }
231+ totalTokens += batch . totalTokens
211232 }
212233
213- return allEmbeddings
234+ return { embeddings : allEmbeddings , totalTokens , isBYOK : config . isBYOK }
214235}
215236
216237/**
@@ -227,6 +248,6 @@ export async function generateSearchEmbedding(
227248 `Using ${ config . useAzure ? 'Azure OpenAI' : 'OpenAI' } for search embedding generation`
228249 )
229250
230- const embeddings = await callEmbeddingAPI ( [ query ] , config )
251+ const { embeddings } = await callEmbeddingAPI ( [ query ] , config )
231252 return embeddings [ 0 ]
232253}
0 commit comments