xyz

ionmincu · ionmincu · commit 816153500ac1 · 2025-12-10T11:29:38.000+02:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -9,6 +9,7 @@ dependencies = [
     "llama-index>=0.14.8",
     "llama-index-embeddings-azure-openai>=0.4.1",
     "llama-index-llms-azure-openai>=0.4.2",
+    "llama-index-llms-google-genai>=0.8.0",
     "openinference-instrumentation-llama-index>=4.3.9",
     "uipath>=2.2.26, <2.3.0",
 ]
diff --git a/src/uipath_llamaindex/llms/vertex.py b/src/uipath_llamaindex/llms/vertex.py
@@ -1,7 +1,10 @@
 import logging
 import os
-from typing import Optional
+from typing import Any, Optional
 
+import httpx
+from llama_index.core.callbacks import CallbackManager
+from llama_index.core.constants import DEFAULT_NUM_OUTPUTS, DEFAULT_TEMPERATURE
 from uipath.utils import EndpointManager
 
 from .supported_models import GeminiModels
@@ -15,8 +18,8 @@ def _check_vertex_dependencies() -> None:
 
     missing_packages = []
 
-    if importlib.util.find_spec("llama_index.llms.vertex") is None:
-        missing_packages.append("llama-index-llms-vertex")
+    if importlib.util.find_spec("llama_index.llms.google_genai") is None:
+        missing_packages.append("llama-index-llms-google-genai")
 
     if missing_packages:
         packages_str = ", ".join(missing_packages)
@@ -32,61 +35,62 @@ def _check_vertex_dependencies() -> None:
 
 _check_vertex_dependencies()
 
-from google.auth.credentials import AnonymousCredentials
-from google.cloud.aiplatform_v1beta1.services.prediction_service import (
-    PredictionServiceClient as v1beta1PredictionServiceClient,
-)
-from google.cloud.aiplatform_v1beta1.services.prediction_service.transports.rest import (
-    PredictionServiceRestTransport,
-)
+import google.genai
+import google.genai.types as genai_types
 from llama_index.core.bridge.pydantic import PrivateAttr
-from llama_index.llms.vertex import Vertex
+from llama_index.llms.google_genai import GoogleGenAI
 
 
-class CustomPredictionServiceRestTransport(PredictionServiceRestTransport):
-    """Custom REST transport that redirects requests to UiPath LLM Gateway."""
+class UiPathHttpxClient(httpx.Client):
+    """Custom httpx client that redirects generateContent requests to UiPath gateway."""
 
-    def __init__(self, llmgw_url: str, custom_headers: dict[str, str], **kwargs):
-        self.llmgw_url = llmgw_url
-        self.custom_headers = custom_headers or {}
-
-        kwargs.setdefault("credentials", AnonymousCredentials())
+    def __init__(self, gateway_url: str, **kwargs):
+        self.gateway_url = gateway_url
         super().__init__(**kwargs)
 
-        # Monkey-patch the session's request method to redirect to UiPath Gateway
-        # This preserves the session object identity while redirecting all requests
-        original_request = self._session.request
-
-        def redirected_request(method, url, **kwargs_inner):
-            headers = kwargs_inner.pop("headers", {})
-            headers.update(self.custom_headers)
-
-            # Remove Google's internal query parameters - UiPath gateway doesn't need them
-            kwargs_inner.pop("params", None)
-
-            is_streaming = kwargs_inner.get("stream", False)
-            headers["X-UiPath-Streaming-Enabled"] = "true" if is_streaming else "false"
-
-            return original_request(
-                method, self.llmgw_url, headers=headers, **kwargs_inner
+    def request(self, method: str, url: Any, **kwargs) -> httpx.Response:
+        """Override request to redirect generateContent/streamGenerateContent to UiPath gateway."""
+        url_str = str(url)
+        if "generateContent" in url_str or "streamGenerateContent" in url_str:
+            url = self.gateway_url
+        return super().request(method, url, **kwargs)
+
+    def send(self, request: httpx.Request, **kwargs) -> httpx.Response:
+        """Override send to redirect generateContent/streamGenerateContent to UiPath gateway."""
+        url_str = str(request.url)
+        if "generateContent" in url_str or "streamGenerateContent" in url_str:
+            is_streaming = "streamGenerateContent" in url_str
+            # Build headers with streaming flag and correct host
+            headers = dict(request.headers)
+            if is_streaming:
+                headers["X-UiPath-Streaming-Enabled"] = "true"
+            # Update host header to match the gateway URL
+            gateway_url_parsed = httpx.URL(self.gateway_url)
+            headers["host"] = gateway_url_parsed.host
+            # Create new request with rewritten URL
+            request = httpx.Request(
+                method=request.method,
+                url=self.gateway_url,
+                headers=headers,
+                content=request.content,
+                extensions=request.extensions,
             )
-
-        self._session.request = redirected_request  # type: ignore[method-assign]
+        return super().send(request, **kwargs)
 
 
-class UiPathVertex(Vertex):
+class UiPathVertex(GoogleGenAI):
     """
     UiPath Vertex AI LLM that routes requests through UiPath's LLM Gateway.
 
-    This class wraps LlamaIndex's Vertex class and redirects all API calls
+    This class wraps LlamaIndex's GoogleGenAI class and redirects all API calls
     to UiPath's LLM Gateway for authentication and routing.
 
     Args:
         org_id: UiPath organization ID. Falls back to UIPATH_ORGANIZATION_ID env var.
         tenant_id: UiPath tenant ID. Falls back to UIPATH_TENANT_ID env var.
         token: UiPath access token. Falls back to UIPATH_ACCESS_TOKEN env var.
         model: Model identifier. Defaults to gemini-2.5-flash.
-        **kwargs: Additional arguments passed to the Vertex base class.
+        **kwargs: Additional arguments passed to the GoogleGenAI base class.
 
     Example:
         ```python
@@ -110,7 +114,14 @@ def __init__(
         tenant_id: Optional[str] = None,
         token: Optional[str] = None,
         model: str = GeminiModels.gemini_2_5_flash,
-        **kwargs,
+        temperature: float = DEFAULT_TEMPERATURE,
+        max_tokens: Optional[int] = None,
+        context_window: Optional[int] = None,
+        max_retries: int = 3,
+        generation_config: Optional[genai_types.GenerateContentConfig] = None,
+        callback_manager: Optional[CallbackManager] = None,
+        is_function_calling_model: bool = True,
+        **kwargs: Any,
     ):
         org_id = org_id or os.getenv("UIPATH_ORGANIZATION_ID")
         tenant_id = tenant_id or os.getenv("UIPATH_TENANT_ID")
@@ -129,62 +140,72 @@ def __init__(
                 "UIPATH_ACCESS_TOKEN environment variable or token parameter is required"
             )
 
-        # Initialize base Vertex class with dummy credentials
-        # The actual auth is handled by UiPath Gateway
-        super().__init__(
-            model=model,
-            project=os.getenv("VERTEXAI_PROJECT", "none"),
-            location=os.getenv("VERTEXAI_LOCATION", "us-central1"),
-            credentials=AnonymousCredentials(),
-            **kwargs,
-        )
-
-        # Set private attributes after super().__init__
-        self._uipath_vendor = "vertexai"
-        self._uipath_model_name = model
-        self._uipath_url = None
-        self._uipath_token = token
-
-        # After super().__init__, self._client is a GenerativeModel instance
-        # We need to patch its _prediction_client to use our custom transport
-        self._patch_generative_model_client()
+        # Build UiPath gateway URL and headers
+        uipath_url = self._build_base_url_static(model)
+        headers = self._build_headers_static(token)
 
-    def _patch_generative_model_client(self) -> None:
-        """Patch the GenerativeModel's internal prediction client to use UiPath Gateway."""
-        llmgw_url = self._build_base_url()
-        custom_headers = self._build_headers(self._uipath_token)
-
-        # Create custom sync REST transport that routes to UiPath Gateway
-        sync_transport = CustomPredictionServiceRestTransport(
-            llmgw_url=llmgw_url, custom_headers=custom_headers
+        # Create custom httpx client that redirects requests to UiPath gateway
+        custom_httpx_client = UiPathHttpxClient(
+            gateway_url=uipath_url,
+            headers=headers,
+            follow_redirects=True,
         )
 
-        # Create the sync prediction client with our custom transport
-        custom_sync_client = v1beta1PredictionServiceClient(
-            transport=sync_transport,
+        # Configure HTTP options with our custom client
+        http_options = genai_types.HttpOptions(
+            httpxClient=custom_httpx_client,
         )
 
-        # Inject our custom client into the GenerativeModel instance
-        # This bypasses the cached_property and uses our client directly
-        # The GenerativeModel uses _prediction_client as @functools.cached_property
-        # Setting in __dict__ ensures it's used
-        if hasattr(self, "_client") and self._client is not None:
-            self._client.__dict__["_prediction_client"] = custom_sync_client
+        # Create google.genai client with custom httpx client
+        # We pass a dummy api_key since auth is handled by UiPath headers
+        client = google.genai.Client(
+            api_key="uipath-gateway",
+            http_options=http_options,
+        )
 
-        if hasattr(self, "_chat_client") and self._chat_client is not None:
-            self._chat_client.__dict__["_prediction_client"] = custom_sync_client
+        # Skip calling GoogleGenAI.__init__ which tries to fetch model metadata
+        # Instead, initialize the grandparent (FunctionCallingLLM) directly
+        # and set up the attributes ourselves
+        from llama_index.core.llms.function_calling import FunctionCallingLLM
 
-    @property
-    def _uipath_endpoint(self) -> str:
-        """Get the UiPath LLM Gateway endpoint for this model."""
-        vendor_endpoint = EndpointManager.get_vendor_endpoint()
-        formatted_endpoint = vendor_endpoint.format(
-            vendor=self._uipath_vendor,
-            model=self._uipath_model_name,
+        FunctionCallingLLM.__init__(
+            self,
+            callback_manager=callback_manager,
+            **kwargs,
         )
-        return formatted_endpoint
 
-    def _build_headers(self, token: str) -> dict[str, str]:
+        # Set GoogleGenAI public attributes
+        self.model = model
+        self.temperature = temperature
+        self.context_window = context_window
+        self.max_retries = max_retries
+        self.is_function_calling_model = is_function_calling_model
+        self.cached_content = None
+        self.built_in_tool = None
+        self.file_mode = "hybrid"
+
+        # Set GoogleGenAI private attributes
+        self._client = client
+        self._model_meta = None  # We skip model metadata fetch
+        self._max_tokens = max_tokens or DEFAULT_NUM_OUTPUTS
+
+        # Set up generation config
+        if generation_config:
+            self._generation_config = generation_config.model_dump()
+        else:
+            self._generation_config = genai_types.GenerateContentConfig(
+                temperature=temperature,
+                max_output_tokens=max_tokens,
+            ).model_dump()
+
+        # Set UiPath private attributes
+        self._uipath_vendor = "vertexai"
+        self._uipath_model_name = model
+        self._uipath_url = uipath_url
+        self._uipath_token = token
+
+    @staticmethod
+    def _build_headers_static(token: str) -> dict[str, str]:
         """Build HTTP headers for UiPath Gateway requests."""
         headers = {
             "Authorization": f"Bearer {token}",
@@ -195,16 +216,17 @@ def _build_headers(self, token: str) -> dict[str, str]:
             headers["X-UiPath-ProcessKey"] = process_key
         return headers
 
-    def _build_base_url(self) -> str:
+    @staticmethod
+    def _build_base_url_static(model: str) -> str:
         """Build the full URL for the UiPath LLM Gateway."""
-        if not self._uipath_url:
-            env_uipath_url = os.getenv("UIPATH_URL")
+        env_uipath_url = os.getenv("UIPATH_URL")
 
-            if env_uipath_url:
-                self._uipath_url = (
-                    f"{env_uipath_url.rstrip('/')}/{self._uipath_endpoint}"
-                )
-            else:
-                raise ValueError("UIPATH_URL environment variable is required")
+        if not env_uipath_url:
+            raise ValueError("UIPATH_URL environment variable is required")
 
-        return self._uipath_url
+        vendor_endpoint = EndpointManager.get_vendor_endpoint()
+        formatted_endpoint = vendor_endpoint.format(
+            vendor="vertexai",
+            model=model,
+        )
+        return f"{env_uipath_url.rstrip('/')}/{formatted_endpoint}"
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -9,6 +9,7 @@ dependencies = [`
`9`	`9`	`"llama-index>=0.14.8",`
`10`	`10`	`"llama-index-embeddings-azure-openai>=0.4.1",`
`11`	`11`	`"llama-index-llms-azure-openai>=0.4.2",`
	`12`	`+ "llama-index-llms-google-genai>=0.8.0",`
`12`	`13`	`"openinference-instrumentation-llama-index>=4.3.9",`
`13`	`14`	`"uipath>=2.2.26, <2.3.0",`
`14`	`15`	`]`