update vertex

ionmincu · ionmincu · commit e6fc28ee3fa6 · 2025-12-10T16:43:37.000+02:00
diff --git a/src/uipath_llamaindex/llms/__init__.py b/src/uipath_llamaindex/llms/__init__.py
@@ -5,15 +5,6 @@
     OpenAIModel,
 )
 
-# Note: UiPathVertex requires optional dependencies (google-genai, llama-index-llms-google-genai)
-# Import it directly from uipath_llamaindex.llms.vertex:
-#   from uipath_llamaindex.llms.vertex import UiPathVertex
-
-# Note: UiPathChatBedrock and UiPathChatBedrockConverse require optional dependencies
-# (boto3, aiobotocore, llama-index-llms-bedrock, llama-index-llms-bedrock-converse)
-# Import them directly from uipath_llamaindex.llms.bedrock:
-#   from uipath_llamaindex.llms.bedrock import UiPathChatBedrock, UiPathChatBedrockConverse
-
 __all__ = [
     "UiPathOpenAI",
     "OpenAIModel",
diff --git a/src/uipath_llamaindex/llms/vertex.py b/src/uipath_llamaindex/llms/vertex.py
@@ -1,6 +1,6 @@
 import logging
 import os
-from typing import Any, Optional
+from typing import Any, Generator, Optional, Sequence
 
 import httpx
 from llama_index.core.callbacks import CallbackManager
@@ -37,7 +37,20 @@ def _check_vertex_dependencies() -> None:
 
 import google.genai  # noqa: E402
 import google.genai.types as genai_types  # noqa: E402
+from llama_index.core.base.llms.types import (  # noqa: E402
+    ChatMessage,
+    ChatResponse,
+    ChatResponseAsyncGen,
+    ChatResponseGen,
+    CompletionResponse,
+    CompletionResponseAsyncGen,
+    CompletionResponseGen,
+)
 from llama_index.core.bridge.pydantic import PrivateAttr  # noqa: E402
+from llama_index.core.llms.callbacks import (  # noqa: E402
+    llm_chat_callback,
+    llm_completion_callback,
+)
 from llama_index.llms.google_genai import GoogleGenAI  # noqa: E402
 
 
@@ -254,3 +267,97 @@ def _build_base_url_static(model: str) -> str:
             model=model,
         )
         return f"{env_uipath_url.rstrip('/')}/{formatted_endpoint}"
+
+    # Streaming fallback methods - call non-streaming and yield single response
+    # This works around backend streaming bugs in UiPath Gateway
+
+    @llm_completion_callback()
+    def complete(
+        self, prompt: str, formatted: bool = False, **kwargs: Any
+    ) -> CompletionResponse:
+        """Completion endpoint - delegates to chat."""
+        response = self.chat([ChatMessage(role="user", content=prompt)], **kwargs)
+        return CompletionResponse(
+            text=response.message.content or "",
+            raw=response.raw,
+            additional_kwargs=response.additional_kwargs,
+        )
+
+    @llm_completion_callback()
+    def stream_complete(
+        self, prompt: str, formatted: bool = False, **kwargs: Any
+    ) -> CompletionResponseGen:
+        """Streaming completion fallback - calls complete and yields single response."""
+
+        def gen() -> Generator[CompletionResponse, None, None]:
+            response = self.complete(prompt, formatted=formatted, **kwargs)
+            # Yield the full response as a single "chunk" with delta = full text
+            yield CompletionResponse(
+                text=response.text,
+                raw=response.raw,
+                delta=response.text,
+                additional_kwargs=response.additional_kwargs,
+            )
+
+        return gen()
+
+    @llm_completion_callback()
+    async def astream_complete(
+        self, prompt: str, formatted: bool = False, **kwargs: Any
+    ) -> CompletionResponseAsyncGen:
+        """Async streaming completion fallback - calls acomplete and yields single response."""
+
+        async def gen() -> CompletionResponseAsyncGen:
+            response = await self.acomplete(prompt, formatted=formatted, **kwargs)
+            # Yield the full response as a single "chunk" with delta = full text
+            yield CompletionResponse(
+                text=response.text,
+                raw=response.raw,
+                delta=response.text,
+                additional_kwargs=response.additional_kwargs,
+            )
+
+        return gen()
+
+    @llm_chat_callback()
+    def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
+        """Chat endpoint - delegates to parent's async chat via sync wrapper."""
+        import asyncio
+
+        return asyncio.get_event_loop().run_until_complete(self.achat(messages, **kwargs))
+
+    @llm_chat_callback()
+    def stream_chat(
+        self, messages: Sequence[ChatMessage], **kwargs: Any
+    ) -> ChatResponseGen:
+        """Streaming chat fallback - calls chat and yields single response."""
+
+        def gen() -> Generator[ChatResponse, None, None]:
+            response = self.chat(messages, **kwargs)
+            # Yield the full response as a single "chunk" with delta = full content
+            yield ChatResponse(
+                message=response.message,
+                raw=response.raw,
+                delta=response.message.content or "",
+                additional_kwargs=response.additional_kwargs,
+            )
+
+        return gen()
+
+    @llm_chat_callback()
+    async def astream_chat(
+        self, messages: Sequence[ChatMessage], **kwargs: Any
+    ) -> ChatResponseAsyncGen:
+        """Async streaming chat fallback - calls achat and yields single response."""
+
+        async def gen() -> ChatResponseAsyncGen:
+            response = await self.achat(messages, **kwargs)
+            # Yield the full response as a single "chunk" with delta = full content
+            yield ChatResponse(
+                message=response.message,
+                raw=response.raw,
+                delta=response.message.content or "",
+                additional_kwargs=response.additional_kwargs,
+            )
+
+        return gen()
diff --git a/testcases/chat-models/src/main.py b/testcases/chat-models/src/main.py
@@ -13,17 +13,7 @@
 logger = logging.getLogger(__name__)
 
 # Methods to skip for specific models due to known limitations
-# UiPathVertex: sync methods fail in async context (Google GenAI SDK uses asyncio.run internally)
-# UiPathVertex: streaming methods fail (gateway returns JSON array instead of SSE)
 SKIP_METHODS: dict[str, set[str]] = {
-    "UiPathVertex": {
-        "complete",  # asyncio.run() cannot be called from a running event loop
-        "chat",  # asyncio.run() cannot be called from a running event loop
-        "stream_complete",  # asyncio.run() + gateway streaming issue
-        "stream_chat",  # asyncio.run() + gateway streaming issue
-        "astream_complete",  # gateway returns JSON array instead of SSE
-        "astream_chat",  # gateway returns JSON array instead of SSE
-    },
     "UiPathChatBedrock": {
         "tool_calling",  # Bedrock LLMs do not support tool calling - use BedrockConverse instead
         "acomplete",  # Bedrock base class does not implement async methods