Merge branch 'feat/add-chat-models' of https://github.com/UiPath/uipath-llamaindex-python into feat/add-chat-models

radugheo · radugheo · commit 0bff8d40de1c · 2025-12-09T14:48:49.000+02:00
diff --git a/.gitignore b/.gitignore
@@ -177,3 +177,4 @@ cython_debug/
 **/.uipath
 **/**.nupkg
 **/__uipath/
+.claude/settings.local.json
diff --git a/playground.py b/playground.py
@@ -0,0 +1,74 @@
+from llama_index.core.llms import ChatMessage
+from uipath_llamaindex.llms import UiPathVertex, GeminiModels
+
+
+def test_all_methods():
+    llm = UiPathVertex(model=GeminiModels.gemini_2_5_flash, max_tokens=1024)
+    prompt = "What is 2+2? Answer in one word."
+    messages = [ChatMessage(role="user", content=prompt)]
+
+    results = {}
+
+    # Test complete
+    print("Testing complete...")
+    try:
+        response = llm.complete(prompt)
+        print(f"  complete: {response.text.strip()}")
+        results["complete"] = "PASS"
+    except Exception as e:
+        print(f"  complete: FAILED - {e}")
+        results["complete"] = "FAIL"
+
+    # Test chat
+    print("Testing chat...")
+    try:
+        response = llm.chat(messages)
+        print(f"  chat: {response.message.content.strip()}")
+        results["chat"] = "PASS"
+    except Exception as e:
+        print(f"  chat: FAILED - {e}")
+        results["chat"] = "FAIL"
+
+    # Test stream_complete
+    print("Testing stream_complete...")
+    try:
+        chunks = []
+        for chunk in llm.stream_complete(prompt):
+            chunks.append(chunk.delta)
+        print(f"  stream_complete: {''.join(chunks).strip()}")
+        results["stream_complete"] = "PASS"
+    except Exception as e:
+        print(f"  stream_complete: FAILED - {e}")
+        results["stream_complete"] = "FAIL"
+
+    # Test stream_chat
+    print("Testing stream_chat...")
+    try:
+        chunks = []
+        for chunk in llm.stream_chat(messages):
+            chunks.append(chunk.delta)
+        print(f"  stream_chat: {''.join(chunks).strip()}")
+        results["stream_chat"] = "PASS"
+    except Exception as e:
+        print(f"  stream_chat: FAILED - {e}")
+        results["stream_chat"] = "FAIL"
+
+    # Print summary
+    print("\n" + "=" * 50)
+    print("SUMMARY")
+    print("=" * 50)
+
+    passed = sum(1 for v in results.values() if v == "PASS")
+    failed = sum(1 for v in results.values() if v == "FAIL")
+
+    for method, status in results.items():
+        icon = "+" if status == "PASS" else "x"
+        print(f"  [{icon}] {method}: {status}")
+
+    print("-" * 50)
+    print(f"  Total: {len(results)} | Passed: {passed} | Failed: {failed}")
+    print("=" * 50)
+
+
+if __name__ == "__main__":
+    test_all_methods()
diff --git a/src/uipath_llamaindex/llms/vertex.py b/src/uipath_llamaindex/llms/vertex.py
@@ -18,9 +18,6 @@ def _check_vertex_dependencies() -> None:
     if importlib.util.find_spec("llama_index.llms.vertex") is None:
         missing_packages.append("llama-index-llms-vertex")
 
-    if importlib.util.find_spec("httpx") is None:
-        missing_packages.append("httpx")
-
     if missing_packages:
         packages_str = ", ".join(missing_packages)
         raise ImportError(
@@ -35,126 +32,17 @@ def _check_vertex_dependencies() -> None:
 
 _check_vertex_dependencies()
 
-import httpx
-from google.auth.aio import credentials as aio_credentials
 from google.auth.credentials import AnonymousCredentials
 from google.cloud.aiplatform_v1beta1.services.prediction_service import (
     PredictionServiceClient as v1beta1PredictionServiceClient,
 )
 from google.cloud.aiplatform_v1beta1.services.prediction_service.transports.rest import (
     PredictionServiceRestTransport,
 )
-from google.cloud.aiplatform_v1beta1.services.prediction_service.transports.rest_asyncio import (
-    AsyncPredictionServiceRestTransport,
-)
 from llama_index.core.bridge.pydantic import PrivateAttr
 from llama_index.llms.vertex import Vertex
 
 
-class AsyncAnonymousCredentials(aio_credentials.Credentials):
-    """Async-compatible anonymous credentials that don't provide authentication.
-
-    Used to satisfy Google's credential requirements while we handle
-    authentication separately via UiPath Gateway headers.
-    """
-
-    # Class-level attribute to ensure _token exists before parent __init__ runs
-    _token: Optional[str] = None
-
-    def __init__(self) -> None:
-        """Initialize credentials, ensuring _token is set before parent init."""
-        # Set instance attribute before parent __init__ tries to set self.token
-        self._token = None
-        # Now call parent - it will call self.token = None which uses our setter
-        super().__init__()
-
-    @property
-    def token(self) -> Optional[str]:
-        return self._token
-
-    @token.setter
-    def token(self, value: Optional[str]) -> None:
-        self._token = value
-
-    @property
-    def expired(self) -> bool:
-        return False
-
-    @property
-    def valid(self) -> bool:
-        return True
-
-    async def refresh(self, _request) -> None:
-        """No-op refresh for anonymous credentials."""
-        pass
-
-
-class CustomSyncSession:
-    """
-    Custom sync session that redirects all requests to UiPath LLM Gateway.
-
-    Uses httpx for HTTP requests, bypassing Google's AuthorizedSession.
-    """
-
-    def __init__(self, llmgw_url: str, custom_headers: dict[str, str]):
-        self.llmgw_url = llmgw_url
-        self.custom_headers = custom_headers or {}
-        self._client = httpx.Client()
-
-    def request(self, method: str, url: str, **kwargs):
-        """Make an HTTP request, redirecting to UiPath gateway."""
-        # Get headers from kwargs or use empty dict
-        headers = kwargs.pop("headers", {}) or {}
-
-        # Update with our custom headers (including Authorization)
-        headers.update(self.custom_headers)
-
-        # Detect streaming from kwargs
-        is_streaming = kwargs.get("stream", False)
-        headers["X-UiPath-Streaming-Enabled"] = "true" if is_streaming else "false"
-
-        # Convert 'data' to 'content' for httpx
-        if "data" in kwargs:
-            kwargs["content"] = kwargs.pop("data")
-
-        # Make request to our gateway URL instead of the original URL
-        response = self._client.request(
-            method, self.llmgw_url, headers=headers, **kwargs
-        )
-
-        # Return a response wrapper compatible with Google's expectations
-        return HttpxSyncResponseWrapper(response)
-
-    @property
-    def verify(self):
-        return self._client._transport._pool._ssl_context is not None
-
-    @verify.setter
-    def verify(self, _value):
-        # httpx doesn't support changing verify after creation
-        pass
-
-    def close(self):
-        self._client.close()
-
-
-class HttpxSyncResponseWrapper:
-    """Wrapper to make httpx sync response compatible with requests.Response interface."""
-
-    def __init__(self, response: httpx.Response):
-        self._response = response
-        self.status_code = response.status_code
-        self.headers = dict(response.headers)
-        self.content = response.content
-        self.text = response.text
-
-    def json(self):
-        return self._response.json()
-
-    def raise_for_status(self):
-        self._response.raise_for_status()
-
-
 class CustomPredictionServiceRestTransport(PredictionServiceRestTransport):
     """Custom REST transport that redirects requests to UiPath LLM Gateway."""
 
@@ -165,9 +53,6 @@ def __init__(self, llmgw_url: str, custom_headers: dict[str, str], **kwargs):
         kwargs.setdefault("credentials", AnonymousCredentials())
         super().__init__(**kwargs)
 
-        # Disable SSL verification
-        self._session.verify = False
-
         # Monkey-patch the session's request method to redirect to UiPath Gateway
         # This preserves the session object identity while redirecting all requests
         original_request = self._session.request
@@ -176,6 +61,9 @@ def redirected_request(method, url, **kwargs_inner):
             headers = kwargs_inner.pop("headers", {})
             headers.update(self.custom_headers)
 
+            # Remove Google's internal query parameters - UiPath gateway doesn't need them
+            kwargs_inner.pop("params", None)
+
             is_streaming = kwargs_inner.get("stream", False)
             headers["X-UiPath-Streaming-Enabled"] = "true" if is_streaming else "false"
 
@@ -184,92 +72,6 @@ def redirected_request(method, url, **kwargs_inner):
         self._session.request = redirected_request  # type: ignore[method-assign]
 
 
-class CustomAsyncSession:
-    """
-    Custom async session for redirecting requests to UiPath LLM Gateway.
-
-    Uses httpx for async HTTP requests, bypassing Google's AsyncAuthorizedSession.
-    """
-
-    def __init__(self, llmgw_url: str, custom_headers: dict[str, str]):
-        self.llmgw_url = llmgw_url
-        self.custom_headers = custom_headers or {}
-
-    async def request(
-        self,
-        method: str,
-        url: str,
-        data: Optional[bytes] = None,
-        headers: Optional[dict] = None,
-        **kwargs,
-    ):
-        """Make an async HTTP request, redirecting to UiPath gateway."""
-        request_headers = dict(headers) if headers else {}
-
-        # Update with our custom headers (including Authorization)
-        request_headers.update(self.custom_headers)
-
-        # Detect streaming from URL pattern
-        is_streaming = "stream" in url.lower()
-        request_headers["X-UiPath-Streaming-Enabled"] = (
-            "true" if is_streaming else "false"
-        )
-
-        async with httpx.AsyncClient() as client:
-            response = await client.request(
-                method,
-                self.llmgw_url,
-                content=data,
-                headers=request_headers,
-                **kwargs,
-            )
-            # Return a response wrapper compatible with Google's expectations
-            return HttpxAsyncResponseWrapper(
-                status=response.status_code,
-                headers=dict(response.headers),
-                body=response.content,
-            )
-
-    async def close(self):
-        """Close the session (no-op for our implementation)."""
-        pass
-
-
-class HttpxAsyncResponseWrapper:
-    """Wrapper to make httpx async response compatible with Google's expected interface."""
-
-    def __init__(self, status: int, headers: dict, body: bytes):
-        self.status = status
-        self.headers = headers
-        self._body = body
-
-    async def read(self) -> bytes:
-        """Read the response body."""
-        return self._body
-
-    async def content(self) -> bytes:
-        """Read the response content."""
-        return self._body
-
-
-class CustomAsyncPredictionServiceRestTransport(AsyncPredictionServiceRestTransport):
-    """Custom async REST transport that redirects requests to UiPath LLM Gateway."""
-
-    def __init__(self, llmgw_url: str, custom_headers: dict[str, str], **kwargs):
-        self.llmgw_url = llmgw_url
-        self.custom_headers = custom_headers or {}
-
-        # Use async-compatible credentials for the async transport
-        kwargs.setdefault("credentials", AsyncAnonymousCredentials())
-        super().__init__(**kwargs)
-
-        # Replace the session with a custom one that redirects requests
-        self._session = CustomAsyncSession(
-            llmgw_url=llmgw_url,
-            custom_headers=self.custom_headers,
-        )
-
-
 class UiPathVertex(Vertex):
     """
     UiPath Vertex AI LLM that routes requests through UiPath's LLM Gateway.
@@ -350,25 +152,25 @@ def _patch_generative_model_client(self) -> None:
         llmgw_url = self._build_base_url()
         custom_headers = self._build_headers(self._uipath_token)
 
-        # Create custom REST transport that routes to UiPath Gateway
-        rest_transport = CustomPredictionServiceRestTransport(
+        # Create custom sync REST transport that routes to UiPath Gateway
+        sync_transport = CustomPredictionServiceRestTransport(
             llmgw_url=llmgw_url, custom_headers=custom_headers
         )
 
-        # Create the prediction client with our custom transport
-        custom_prediction_client = v1beta1PredictionServiceClient(
-            transport=rest_transport,
+        # Create the sync prediction client with our custom transport
+        custom_sync_client = v1beta1PredictionServiceClient(
+            transport=sync_transport,
         )
 
         # Inject our custom client into the GenerativeModel instance
         # This bypasses the cached_property and uses our client directly
-        # The GenerativeModel uses _prediction_client as a @functools.cached_property
-        # Setting it in __dict__ ensures it's used instead of creating a new one
-        if hasattr(self, '_client') and self._client is not None:
-            self._client.__dict__['_prediction_client'] = custom_prediction_client
+        # The GenerativeModel uses _prediction_client as @functools.cached_property
+        # Setting in __dict__ ensures it's used
+        if hasattr(self, "_client") and self._client is not None:
+            self._client.__dict__["_prediction_client"] = custom_sync_client
 
-        if hasattr(self, '_chat_client') and self._chat_client is not None:
-            self._chat_client.__dict__['_prediction_client'] = custom_prediction_client
+        if hasattr(self, "_chat_client") and self._chat_client is not None:
+            self._chat_client.__dict__["_prediction_client"] = custom_sync_client
 
     @property
     def _uipath_endpoint(self) -> str: