|
1 | 1 | import logging |
2 | 2 | import os |
3 | | -from typing import Any, Optional |
| 3 | +from typing import Any, Generator, Optional, Sequence |
4 | 4 |
|
5 | 5 | import httpx |
6 | 6 | from llama_index.core.callbacks import CallbackManager |
@@ -37,7 +37,20 @@ def _check_vertex_dependencies() -> None: |
37 | 37 |
|
38 | 38 | import google.genai # noqa: E402 |
39 | 39 | import google.genai.types as genai_types # noqa: E402 |
| 40 | +from llama_index.core.base.llms.types import ( # noqa: E402 |
| 41 | + ChatMessage, |
| 42 | + ChatResponse, |
| 43 | + ChatResponseAsyncGen, |
| 44 | + ChatResponseGen, |
| 45 | + CompletionResponse, |
| 46 | + CompletionResponseAsyncGen, |
| 47 | + CompletionResponseGen, |
| 48 | +) |
40 | 49 | from llama_index.core.bridge.pydantic import PrivateAttr # noqa: E402 |
| 50 | +from llama_index.core.llms.callbacks import ( # noqa: E402 |
| 51 | + llm_chat_callback, |
| 52 | + llm_completion_callback, |
| 53 | +) |
41 | 54 | from llama_index.llms.google_genai import GoogleGenAI # noqa: E402 |
42 | 55 |
|
43 | 56 |
|
@@ -254,3 +267,97 @@ def _build_base_url_static(model: str) -> str: |
254 | 267 | model=model, |
255 | 268 | ) |
256 | 269 | return f"{env_uipath_url.rstrip('/')}/{formatted_endpoint}" |
| 270 | + |
| 271 | + # Streaming fallback methods - call non-streaming and yield single response |
| 272 | + # This works around backend streaming bugs in UiPath Gateway |
| 273 | + |
| 274 | + @llm_completion_callback() |
| 275 | + def complete( |
| 276 | + self, prompt: str, formatted: bool = False, **kwargs: Any |
| 277 | + ) -> CompletionResponse: |
| 278 | + """Completion endpoint - delegates to chat.""" |
| 279 | + response = self.chat([ChatMessage(role="user", content=prompt)], **kwargs) |
| 280 | + return CompletionResponse( |
| 281 | + text=response.message.content or "", |
| 282 | + raw=response.raw, |
| 283 | + additional_kwargs=response.additional_kwargs, |
| 284 | + ) |
| 285 | + |
| 286 | + @llm_completion_callback() |
| 287 | + def stream_complete( |
| 288 | + self, prompt: str, formatted: bool = False, **kwargs: Any |
| 289 | + ) -> CompletionResponseGen: |
| 290 | + """Streaming completion fallback - calls complete and yields single response.""" |
| 291 | + |
| 292 | + def gen() -> Generator[CompletionResponse, None, None]: |
| 293 | + response = self.complete(prompt, formatted=formatted, **kwargs) |
| 294 | + # Yield the full response as a single "chunk" with delta = full text |
| 295 | + yield CompletionResponse( |
| 296 | + text=response.text, |
| 297 | + raw=response.raw, |
| 298 | + delta=response.text, |
| 299 | + additional_kwargs=response.additional_kwargs, |
| 300 | + ) |
| 301 | + |
| 302 | + return gen() |
| 303 | + |
| 304 | + @llm_completion_callback() |
| 305 | + async def astream_complete( |
| 306 | + self, prompt: str, formatted: bool = False, **kwargs: Any |
| 307 | + ) -> CompletionResponseAsyncGen: |
| 308 | + """Async streaming completion fallback - calls acomplete and yields single response.""" |
| 309 | + |
| 310 | + async def gen() -> CompletionResponseAsyncGen: |
| 311 | + response = await self.acomplete(prompt, formatted=formatted, **kwargs) |
| 312 | + # Yield the full response as a single "chunk" with delta = full text |
| 313 | + yield CompletionResponse( |
| 314 | + text=response.text, |
| 315 | + raw=response.raw, |
| 316 | + delta=response.text, |
| 317 | + additional_kwargs=response.additional_kwargs, |
| 318 | + ) |
| 319 | + |
| 320 | + return gen() |
| 321 | + |
| 322 | + @llm_chat_callback() |
| 323 | + def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse: |
| 324 | + """Chat endpoint - delegates to parent's async chat via sync wrapper.""" |
| 325 | + import asyncio |
| 326 | + |
| 327 | + return asyncio.get_event_loop().run_until_complete(self.achat(messages, **kwargs)) |
| 328 | + |
| 329 | + @llm_chat_callback() |
| 330 | + def stream_chat( |
| 331 | + self, messages: Sequence[ChatMessage], **kwargs: Any |
| 332 | + ) -> ChatResponseGen: |
| 333 | + """Streaming chat fallback - calls chat and yields single response.""" |
| 334 | + |
| 335 | + def gen() -> Generator[ChatResponse, None, None]: |
| 336 | + response = self.chat(messages, **kwargs) |
| 337 | + # Yield the full response as a single "chunk" with delta = full content |
| 338 | + yield ChatResponse( |
| 339 | + message=response.message, |
| 340 | + raw=response.raw, |
| 341 | + delta=response.message.content or "", |
| 342 | + additional_kwargs=response.additional_kwargs, |
| 343 | + ) |
| 344 | + |
| 345 | + return gen() |
| 346 | + |
| 347 | + @llm_chat_callback() |
| 348 | + async def astream_chat( |
| 349 | + self, messages: Sequence[ChatMessage], **kwargs: Any |
| 350 | + ) -> ChatResponseAsyncGen: |
| 351 | + """Async streaming chat fallback - calls achat and yields single response.""" |
| 352 | + |
| 353 | + async def gen() -> ChatResponseAsyncGen: |
| 354 | + response = await self.achat(messages, **kwargs) |
| 355 | + # Yield the full response as a single "chunk" with delta = full content |
| 356 | + yield ChatResponse( |
| 357 | + message=response.message, |
| 358 | + raw=response.raw, |
| 359 | + delta=response.message.content or "", |
| 360 | + additional_kwargs=response.additional_kwargs, |
| 361 | + ) |
| 362 | + |
| 363 | + return gen() |
0 commit comments