Skip to content

Commit b9ce55f

Browse files
authored
Merge pull request #1741 from radofuchs/LCORE_2035_konflux_fix
LCORE-2035 Add the logic for updating proper run.yaml file in konflux
2 parents f5cafb5 + de60965 commit b9ce55f

11 files changed

Lines changed: 283 additions & 90 deletions

File tree

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
2

tests/e2e-prow/rhoai/pipeline-konflux.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,12 @@ export E2E_LSC_HOSTNAME="localhost"
345345
export E2E_JWKS_HOSTNAME="localhost"
346346
export E2E_LLAMA_HOSTNAME="localhost"
347347
export E2E_LLAMA_PORT="8321"
348+
# Same pattern as tests/e2e-prow/rhoai/pipeline.sh and .github/workflows/e2e_tests_*.yaml:
349+
# Behave {MODEL}/{PROVIDER} use these when set; avoids wrong fallbacks if /v1/models
350+
# discovery in before_all is empty (matches run-ci.yaml openai + E2E_OPENAI_MODEL).
351+
: "${E2E_DEFAULT_PROVIDER_OVERRIDE:=openai}"
352+
: "${E2E_DEFAULT_MODEL_OVERRIDE:=${E2E_OPENAI_MODEL:-gpt-4o-mini}}"
353+
export E2E_DEFAULT_PROVIDER_OVERRIDE E2E_DEFAULT_MODEL_OVERRIDE
348354
log "LCS accessible at: http://$E2E_LSC_HOSTNAME:8080"
349355
log "Mock JWKS accessible at: http://$E2E_JWKS_HOSTNAME:8000"
350356
log "Llama Stack (e2e client hooks) at: http://$E2E_LLAMA_HOSTNAME:$E2E_LLAMA_PORT"

tests/e2e-prow/rhoai/scripts/e2e-ops.sh

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -195,14 +195,34 @@ verify_connectivity() {
195195
# First check /readiness to see if port-forward is alive (accept 200, 401, or 503)
196196
http_code=$(curl -s -o /dev/null -w '%{http_code}' --max-time 5 "http://localhost:$local_port/readiness" 2>/dev/null) || http_code="000"
197197

198-
if [[ "$http_code" == "200" || "$http_code" == "401" || "$http_code" == "503" ]]; then
198+
# LCS returns 503 when provider health fails (see health.py). Intentionally broken
199+
# Llama proxy e2e stays 503 forever while the tunnel is still fine. Only accept 503
200+
# on the last attempt so normal restarts keep retrying while providers warm up
201+
# (transient 503 then 200) and we do not short-circuit other suites on first 503.
202+
if [[ "$http_code" == "503" ]]; then
203+
if [[ "$attempt" -eq "$max_attempts" ]]; then
204+
echo "[e2e-ops] /readiness=503 after $max_attempts attempts — LCS reachable; providers still unhealthy (expected for some e2e)"
205+
return 0
206+
fi
207+
echo "[e2e-ops] /readiness=503 (attempt $attempt/$max_attempts); retrying in case providers recover..."
208+
fi
209+
210+
if [[ "$http_code" == "200" || "$http_code" == "401" ]]; then
199211
# Port-forward works; now verify the app is fully initialized by hitting
200212
# a real endpoint. /v1/models requires the Llama Stack handshake to complete.
201-
# Accept 200 (no auth) or 401 (auth enabled) — both prove the full app
202-
# stack is up, not just the TCP socket.
213+
# Accept 200 (no auth) or 401/403 (auth) — both prove the full app stack is up.
214+
#
215+
# Proxy/TLS e2e scenarios intentionally misconfigure Llama (e.g. unreachable
216+
# HTTP proxy). LCS still answers /v1/models with 5xx once the route exists;
217+
# treating those as success avoids false failures on restart-lightspeed while
218+
# still rejecting connection errors (000).
203219
local models_code
204220
models_code=$(curl -s -o /dev/null -w '%{http_code}' --max-time 10 "http://localhost:$local_port/v1/models" 2>/dev/null) || models_code="000"
205-
if [[ "$models_code" == "200" || "$models_code" == "401" ]]; then
221+
if [[ "$models_code" == "200" || "$models_code" == "401" || "$models_code" == "403" ]]; then
222+
return 0
223+
fi
224+
if [[ "$models_code" =~ ^5[0-9][0-9]$ ]]; then
225+
echo "[e2e-ops] /v1/models=$models_code (LCS reachable; Llama/provider error expected in some e2e)"
206226
return 0
207227
fi
208228
echo "[e2e-ops] /readiness=$http_code but /v1/models=$models_code (app still initializing, attempt $attempt/$max_attempts)"
@@ -563,6 +583,7 @@ cmd_wait_for_pod() {
563583
cmd_update_configmap() {
564584
local configmap_name="${1:?ConfigMap name required}"
565585
local source_file="${2:?Source file required}"
586+
local configmap_key="${3:-lightspeed-stack.yaml}"
566587

567588
echo "Updating ConfigMap $configmap_name from $source_file..."
568589

@@ -575,7 +596,7 @@ cmd_update_configmap() {
575596
# If delete succeeds but create fails the ConfigMap is gone and every
576597
# subsequent attempt cascades into failure.
577598
if ! oc create configmap "$configmap_name" -n "$NAMESPACE" \
578-
--from-file="lightspeed-stack.yaml=$source_file" \
599+
--from-file="${configmap_key}=${source_file}" \
579600
--dry-run=client -o yaml | oc apply -n "$NAMESPACE" -f -; then
580601
echo "ERROR: oc apply for ConfigMap $configmap_name failed" >&2
581602
return 1
@@ -586,8 +607,9 @@ cmd_update_configmap() {
586607

587608
cmd_get_configmap_content() {
588609
local configmap_name="${1:?ConfigMap name required}"
610+
local configmap_key="${2:-lightspeed-stack.yaml}"
589611
oc get configmap "$configmap_name" -n "$NAMESPACE" \
590-
-o 'jsonpath={.data.lightspeed-stack\.yaml}'
612+
-o "go-template={{index .data \"$configmap_key\"}}"
591613
}
592614

593615
cmd_disrupt_llama_stack() {

tests/e2e/features/environment.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def _fetch_models_from_service() -> dict:
6060
host_env = os.getenv("E2E_LSC_HOSTNAME", "localhost")
6161
port_env = os.getenv("E2E_LSC_PORT", "8080")
6262
url = f"http://{host_env}:{port_env}/v1/models"
63-
response = requests.get(url, timeout=5)
63+
response = requests.get(url, params={"model_type": "llm"}, timeout=15)
6464
response.raise_for_status()
6565
data = response.json()
6666

@@ -87,7 +87,7 @@ def before_all(context: Context) -> None:
8787
Attempts to detect a default LLM model and provider via
8888
_fetch_models_from_service() and stores results in context.default_model
8989
and context.default_provider; if detection fails, falls back to
90-
"gpt-4-turbo" and "openai".
90+
``FALLBACK_MODEL`` / ``FALLBACK_PROVIDER`` (aligned with server-mode e2e YAML).
9191
9292
Parameters:
9393
----------

tests/e2e/features/proxy.feature

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
@e2e_group_3 @skip-in-library-mode @skip-in-prow
1+
@e2e_group_3 @skip-in-library-mode
22
Feature: Proxy and TLS networking tests for Llama Stack providers
33

44
Verify that the Lightspeed Stack works correctly when Llama Stack's
@@ -21,7 +21,7 @@ Feature: Proxy and TLS networking tests for Llama Stack providers
2121

2222
# --- AC1: Tunnel proxy routing ---
2323

24-
@TunnelProxy
24+
@TunnelProxy @skip-in-prow
2525
Scenario: LLM traffic is routed through a configured tunnel proxy
2626
Given A tunnel proxy is running on port 8888
2727
And Llama Stack is configured to route inference through the tunnel proxy
@@ -47,12 +47,13 @@ Feature: Proxy and TLS networking tests for Llama Stack providers
4747
"""
4848
{"query": "What is 2+2?", "model": "{MODEL}", "provider": "{PROVIDER}", "shield_ids": []}
4949
"""
50-
Then The status code of the response is 500
50+
#will be fixed in https://redhat.atlassian.net/browse/LCORE-2255
51+
Then The status code of the response is one of 404 or 500
5152

5253

5354
# --- AC2: Interception proxy with CA certificate ---
5455

55-
@InterceptionProxy
56+
@InterceptionProxy @skip-in-prow
5657
Scenario: LLM traffic works through interception proxy with correct CA
5758
Given An interception proxy with trustme CA is running on port 8889
5859
And Llama Stack is configured to route inference through the interception proxy with CA cert
@@ -65,7 +66,7 @@ Feature: Proxy and TLS networking tests for Llama Stack providers
6566
Then The status code of the response is 200
6667
And The interception proxy intercepted at least 1 connection
6768

68-
@InterceptionProxy
69+
@InterceptionProxy @skip-in-prow
6970
Scenario: LLM query fails when interception proxy CA is not provided
7071
Given An interception proxy with trustme CA is running on port 8890
7172
And Llama Stack is configured to route inference through the interception proxy without CA cert

tests/e2e/features/steps/common_http.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,23 @@ def check_status_code(context: Context, status: int) -> None:
4040
)
4141

4242

43+
@step("The status code of the response is one of {first:d} or {second:d}")
44+
def check_status_code_one_of(context: Context, first: int, second: int) -> None:
45+
"""Assert the response status is one of two allowed codes (order does not matter)."""
46+
assert context.response is not None, "Request needs to be performed first"
47+
allowed = {first, second}
48+
actual = context.response.status_code
49+
if actual not in allowed:
50+
try:
51+
error_body = context.response.json()
52+
except Exception:
53+
error_body = context.response.text
54+
assert False, (
55+
f"Status code is {actual}, expected one of {sorted(allowed)}. "
56+
f"Response: {error_body}"
57+
)
58+
59+
4360
@then('Content type of response is set to "{content_type}"')
4461
def check_content_type(context: Context, content_type: str) -> None:
4562
"""Check the HTTP content type for latest response from tested service."""

tests/e2e/features/steps/proxy.py

Lines changed: 26 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,6 @@
1313
"""
1414

1515
import asyncio
16-
import os
17-
import shutil
1816
import subprocess
1917
import tempfile
2018
import threading
@@ -23,20 +21,21 @@
2321
from typing import Any, Optional
2422

2523
import trustme
26-
import yaml
2724
from behave import given, then # pyright: ignore[reportAttributeAccessIssue]
2825
from behave.runner import Context
2926

27+
from tests.e2e.utils.llama_config_utils import (
28+
backup_llama_config,
29+
load_llama_config,
30+
restore_llama_config_if_modified,
31+
write_llama_config,
32+
)
3033
from tests.e2e.utils.utils import (
3134
is_prow_environment,
3235
restart_container,
3336
wait_for_lightspeed_stack_http_ready,
3437
)
3538

36-
# Llama Stack config — mounted into the container from the host
37-
_LLAMA_STACK_CONFIG = "run.yaml"
38-
_LLAMA_STACK_CONFIG_BACKUP = "run.yaml.proxy-backup"
39-
4039

4140
def _is_docker_mode() -> bool:
4241
"""Check if services are running in Docker containers (local e2e)."""
@@ -126,18 +125,6 @@ def _get_proxy_host(is_docker: bool) -> str:
126125
return "172.17.0.1"
127126

128127

129-
def _load_llama_config() -> dict[str, Any]:
130-
"""Load the base Llama Stack run config."""
131-
with open(_LLAMA_STACK_CONFIG, encoding="utf-8") as f:
132-
return yaml.safe_load(f)
133-
134-
135-
def _write_config(config: dict[str, Any], path: str) -> None:
136-
"""Write a YAML config file."""
137-
with open(path, "w", encoding="utf-8") as f:
138-
yaml.dump(config, f, default_flow_style=False)
139-
140-
141128
def _find_inference_provider(
142129
context: Context, config: dict[str, Any]
143130
) -> dict[str, Any]:
@@ -175,12 +162,6 @@ def _find_inference_provider(
175162
)
176163

177164

178-
def _backup_llama_config() -> None:
179-
"""Create a backup of the current run.yaml if not already backed up."""
180-
if not os.path.exists(_LLAMA_STACK_CONFIG_BACKUP):
181-
shutil.copy(_LLAMA_STACK_CONFIG, _LLAMA_STACK_CONFIG_BACKUP)
182-
183-
184165
# --- Background Steps ---
185166

186167

@@ -214,11 +195,8 @@ def restore_if_modified(context: Context) -> None:
214195
_stop_proxy(context, "tunnel_proxy", "proxy_loop")
215196
_stop_proxy(context, "interception_proxy", "interception_proxy_loop")
216197

217-
if os.path.exists(_LLAMA_STACK_CONFIG_BACKUP):
218-
print(
219-
f"Restoring original Llama Stack config from {_LLAMA_STACK_CONFIG_BACKUP}..."
220-
)
221-
shutil.move(_LLAMA_STACK_CONFIG_BACKUP, _LLAMA_STACK_CONFIG)
198+
if restore_llama_config_if_modified():
199+
print("Restoring original Llama Stack config from backup...")
222200

223201

224202
# --- Service Restart Steps ---
@@ -264,10 +242,10 @@ def run_proxy() -> None:
264242
@given("Llama Stack is configured to route inference through the tunnel proxy")
265243
def configure_llama_tunnel_proxy(context: Context) -> None:
266244
"""Modify run.yaml with proxy config pointing to the tunnel proxy."""
267-
_backup_llama_config()
245+
backup_llama_config()
268246
proxy = context.tunnel_proxy
269247
proxy_host = _get_proxy_host(context.is_docker_mode)
270-
config = _load_llama_config()
248+
config = load_llama_config()
271249
provider = _find_inference_provider(context, config)
272250

273251
if "config" not in provider:
@@ -278,14 +256,14 @@ def configure_llama_tunnel_proxy(context: Context) -> None:
278256
}
279257
}
280258

281-
_write_config(config, _LLAMA_STACK_CONFIG)
259+
write_llama_config(config)
282260

283261

284262
@given('Llama Stack is configured to route inference through proxy "{proxy_url}"')
285263
def configure_llama_unreachable_proxy(context: Context, proxy_url: str) -> None:
286264
"""Modify run.yaml with a proxy URL (may be unreachable)."""
287-
_backup_llama_config()
288-
config = _load_llama_config()
265+
backup_llama_config()
266+
config = load_llama_config()
289267
provider = _find_inference_provider(context, config)
290268

291269
if "config" not in provider:
@@ -296,7 +274,7 @@ def configure_llama_unreachable_proxy(context: Context, proxy_url: str) -> None:
296274
}
297275
}
298276

299-
_write_config(config, _LLAMA_STACK_CONFIG)
277+
write_llama_config(config)
300278

301279

302280
# --- Interception Proxy Steps ---
@@ -346,10 +324,10 @@ def run_proxy() -> None:
346324
)
347325
def configure_llama_interception_with_ca(context: Context) -> None:
348326
"""Modify run.yaml with interception proxy and CA cert config."""
349-
_backup_llama_config()
327+
backup_llama_config()
350328
proxy = context.interception_proxy
351329
proxy_host = _get_proxy_host(context.is_docker_mode)
352-
config = _load_llama_config()
330+
config = load_llama_config()
353331
provider = _find_inference_provider(context, config)
354332

355333
if "config" not in provider:
@@ -364,7 +342,7 @@ def configure_llama_interception_with_ca(context: Context) -> None:
364342
},
365343
}
366344

367-
_write_config(config, _LLAMA_STACK_CONFIG)
345+
write_llama_config(config)
368346

369347

370348
@given(
@@ -373,10 +351,10 @@ def configure_llama_interception_with_ca(context: Context) -> None:
373351
)
374352
def configure_llama_interception_no_ca(context: Context) -> None:
375353
"""Modify run.yaml with interception proxy but NO CA cert."""
376-
_backup_llama_config()
354+
backup_llama_config()
377355
proxy = context.interception_proxy
378356
proxy_host = _get_proxy_host(context.is_docker_mode)
379-
config = _load_llama_config()
357+
config = load_llama_config()
380358
provider = _find_inference_provider(context, config)
381359

382360
if "config" not in provider:
@@ -387,7 +365,7 @@ def configure_llama_interception_no_ca(context: Context) -> None:
387365
},
388366
}
389367

390-
_write_config(config, _LLAMA_STACK_CONFIG)
368+
write_llama_config(config)
391369

392370

393371
# --- TLS Steps ---
@@ -396,8 +374,8 @@ def configure_llama_interception_no_ca(context: Context) -> None:
396374
@given('Llama Stack is configured with minimum TLS version "{version}"')
397375
def configure_llama_tls_version(context: Context, version: str) -> None:
398376
"""Modify run.yaml with TLS version config."""
399-
_backup_llama_config()
400-
config = _load_llama_config()
377+
backup_llama_config()
378+
config = load_llama_config()
401379
provider = _find_inference_provider(context, config)
402380

403381
if "config" not in provider:
@@ -408,14 +386,14 @@ def configure_llama_tls_version(context: Context, version: str) -> None:
408386
}
409387
}
410388

411-
_write_config(config, _LLAMA_STACK_CONFIG)
389+
write_llama_config(config)
412390

413391

414392
@given('Llama Stack is configured with ciphers "{ciphers}"')
415393
def configure_llama_ciphers(context: Context, ciphers: str) -> None:
416394
"""Modify run.yaml with cipher suite config."""
417-
_backup_llama_config()
418-
config = _load_llama_config()
395+
backup_llama_config()
396+
config = load_llama_config()
419397
provider = _find_inference_provider(context, config)
420398

421399
if "config" not in provider:
@@ -426,7 +404,7 @@ def configure_llama_ciphers(context: Context, ciphers: str) -> None:
426404
}
427405
}
428406

429-
_write_config(config, _LLAMA_STACK_CONFIG)
407+
write_llama_config(config)
430408

431409

432410
# --- Proxy Verification Steps ---

0 commit comments

Comments
 (0)