From f21725f2c9d26da7647d6ee0134c43a3d596c5c6 Mon Sep 17 00:00:00 2001 From: Nick Larsen Date: Fri, 10 Apr 2026 12:34:15 +0200 Subject: [PATCH 1/5] test: Add final step to scale down kafka This ensures Kafka pods don't hang when zookeeper is shutdown before kafka on namespace deletion by kuttl --- .../kuttl/cluster-operation/90-shutdown-kafka.yaml | 11 +++++++++++ .../kuttl/delete-rolegroup/90-shutdown-kafka.yaml | 11 +++++++++++ tests/templates/kuttl/kerberos/90-shutdown-kafka.yaml | 11 +++++++++++ tests/templates/kuttl/logging/90-shutdown-kafka.yaml | 11 +++++++++++ tests/templates/kuttl/opa/90-shutdown-kafka.yaml | 11 +++++++++++ tests/templates/kuttl/smoke/90-shutdown-kafka.yaml | 11 +++++++++++ tests/templates/kuttl/tls/90-shutdown-kafka.yaml | 11 +++++++++++ 7 files changed, 77 insertions(+) create mode 100644 tests/templates/kuttl/cluster-operation/90-shutdown-kafka.yaml create mode 100644 tests/templates/kuttl/delete-rolegroup/90-shutdown-kafka.yaml create mode 100644 tests/templates/kuttl/kerberos/90-shutdown-kafka.yaml create mode 100644 tests/templates/kuttl/logging/90-shutdown-kafka.yaml create mode 100644 tests/templates/kuttl/opa/90-shutdown-kafka.yaml create mode 100644 tests/templates/kuttl/smoke/90-shutdown-kafka.yaml create mode 100644 tests/templates/kuttl/tls/90-shutdown-kafka.yaml diff --git a/tests/templates/kuttl/cluster-operation/90-shutdown-kafka.yaml b/tests/templates/kuttl/cluster-operation/90-shutdown-kafka.yaml new file mode 100644 index 00000000..acce2cee --- /dev/null +++ b/tests/templates/kuttl/cluster-operation/90-shutdown-kafka.yaml @@ -0,0 +1,11 @@ +--- +# Scale Kafka down before kuttl deletes the namespace. +# Without this, ZooKeeper and Kafka are terminated simultaneously, +# and Kafka hangs on controlled-shutdown ZooKeeper timeouts. +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: | + kubectl patch kafkacluster test-kafka -n $NAMESPACE --type merge -p '{"spec":{"brokers":{"roleGroups":{"default":{"replicas":0}}}}}' + - script: | + kubectl wait --for=delete pod -l app.kubernetes.io/instance=test-kafka -n $NAMESPACE --timeout=300s diff --git a/tests/templates/kuttl/delete-rolegroup/90-shutdown-kafka.yaml b/tests/templates/kuttl/delete-rolegroup/90-shutdown-kafka.yaml new file mode 100644 index 00000000..acce2cee --- /dev/null +++ b/tests/templates/kuttl/delete-rolegroup/90-shutdown-kafka.yaml @@ -0,0 +1,11 @@ +--- +# Scale Kafka down before kuttl deletes the namespace. +# Without this, ZooKeeper and Kafka are terminated simultaneously, +# and Kafka hangs on controlled-shutdown ZooKeeper timeouts. +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: | + kubectl patch kafkacluster test-kafka -n $NAMESPACE --type merge -p '{"spec":{"brokers":{"roleGroups":{"default":{"replicas":0}}}}}' + - script: | + kubectl wait --for=delete pod -l app.kubernetes.io/instance=test-kafka -n $NAMESPACE --timeout=300s diff --git a/tests/templates/kuttl/kerberos/90-shutdown-kafka.yaml b/tests/templates/kuttl/kerberos/90-shutdown-kafka.yaml new file mode 100644 index 00000000..acce2cee --- /dev/null +++ b/tests/templates/kuttl/kerberos/90-shutdown-kafka.yaml @@ -0,0 +1,11 @@ +--- +# Scale Kafka down before kuttl deletes the namespace. +# Without this, ZooKeeper and Kafka are terminated simultaneously, +# and Kafka hangs on controlled-shutdown ZooKeeper timeouts. +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: | + kubectl patch kafkacluster test-kafka -n $NAMESPACE --type merge -p '{"spec":{"brokers":{"roleGroups":{"default":{"replicas":0}}}}}' + - script: | + kubectl wait --for=delete pod -l app.kubernetes.io/instance=test-kafka -n $NAMESPACE --timeout=300s diff --git a/tests/templates/kuttl/logging/90-shutdown-kafka.yaml b/tests/templates/kuttl/logging/90-shutdown-kafka.yaml new file mode 100644 index 00000000..809a3cf5 --- /dev/null +++ b/tests/templates/kuttl/logging/90-shutdown-kafka.yaml @@ -0,0 +1,11 @@ +--- +# Scale Kafka down before kuttl deletes the namespace. +# Without this, ZooKeeper and Kafka are terminated simultaneously, +# and Kafka hangs on controlled-shutdown ZooKeeper timeouts. +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: | + kubectl patch kafkacluster test-kafka -n $NAMESPACE --type merge -p '{"spec":{"brokers":{"roleGroups":{"automatic-log-config":{"replicas":0}}}}}' + - script: | + kubectl wait --for=delete pod -l app.kubernetes.io/instance=test-kafka -n $NAMESPACE --timeout=300s diff --git a/tests/templates/kuttl/opa/90-shutdown-kafka.yaml b/tests/templates/kuttl/opa/90-shutdown-kafka.yaml new file mode 100644 index 00000000..acce2cee --- /dev/null +++ b/tests/templates/kuttl/opa/90-shutdown-kafka.yaml @@ -0,0 +1,11 @@ +--- +# Scale Kafka down before kuttl deletes the namespace. +# Without this, ZooKeeper and Kafka are terminated simultaneously, +# and Kafka hangs on controlled-shutdown ZooKeeper timeouts. +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: | + kubectl patch kafkacluster test-kafka -n $NAMESPACE --type merge -p '{"spec":{"brokers":{"roleGroups":{"default":{"replicas":0}}}}}' + - script: | + kubectl wait --for=delete pod -l app.kubernetes.io/instance=test-kafka -n $NAMESPACE --timeout=300s diff --git a/tests/templates/kuttl/smoke/90-shutdown-kafka.yaml b/tests/templates/kuttl/smoke/90-shutdown-kafka.yaml new file mode 100644 index 00000000..acce2cee --- /dev/null +++ b/tests/templates/kuttl/smoke/90-shutdown-kafka.yaml @@ -0,0 +1,11 @@ +--- +# Scale Kafka down before kuttl deletes the namespace. +# Without this, ZooKeeper and Kafka are terminated simultaneously, +# and Kafka hangs on controlled-shutdown ZooKeeper timeouts. +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: | + kubectl patch kafkacluster test-kafka -n $NAMESPACE --type merge -p '{"spec":{"brokers":{"roleGroups":{"default":{"replicas":0}}}}}' + - script: | + kubectl wait --for=delete pod -l app.kubernetes.io/instance=test-kafka -n $NAMESPACE --timeout=300s diff --git a/tests/templates/kuttl/tls/90-shutdown-kafka.yaml b/tests/templates/kuttl/tls/90-shutdown-kafka.yaml new file mode 100644 index 00000000..acce2cee --- /dev/null +++ b/tests/templates/kuttl/tls/90-shutdown-kafka.yaml @@ -0,0 +1,11 @@ +--- +# Scale Kafka down before kuttl deletes the namespace. +# Without this, ZooKeeper and Kafka are terminated simultaneously, +# and Kafka hangs on controlled-shutdown ZooKeeper timeouts. +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: | + kubectl patch kafkacluster test-kafka -n $NAMESPACE --type merge -p '{"spec":{"brokers":{"roleGroups":{"default":{"replicas":0}}}}}' + - script: | + kubectl wait --for=delete pod -l app.kubernetes.io/instance=test-kafka -n $NAMESPACE --timeout=300s From cc853b99e2821c1df3dd68683114f06d3a15ca16 Mon Sep 17 00:00:00 2001 From: Nick Larsen Date: Fri, 10 Apr 2026 12:43:25 +0200 Subject: [PATCH 2/5] test: Add final step to scale down kafka when using KRaft This ensures Kafka brokers don't hang when kraft controlers are shutdown on namespace deletion by kuttl --- .../kuttl/configuration/90-shutdown-kafka.yaml | 16 ++++++++++++++++ .../operations-kraft/90-shutdown-kafka.yaml | 16 ++++++++++++++++ .../kuttl/smoke-kraft/90-shutdown-kafka.yaml | 16 ++++++++++++++++ .../kuttl/upgrade/90-shutdown-kafka.yaml | 16 ++++++++++++++++ 4 files changed, 64 insertions(+) create mode 100644 tests/templates/kuttl/configuration/90-shutdown-kafka.yaml create mode 100644 tests/templates/kuttl/operations-kraft/90-shutdown-kafka.yaml create mode 100644 tests/templates/kuttl/smoke-kraft/90-shutdown-kafka.yaml create mode 100644 tests/templates/kuttl/upgrade/90-shutdown-kafka.yaml diff --git a/tests/templates/kuttl/configuration/90-shutdown-kafka.yaml b/tests/templates/kuttl/configuration/90-shutdown-kafka.yaml new file mode 100644 index 00000000..97ed2393 --- /dev/null +++ b/tests/templates/kuttl/configuration/90-shutdown-kafka.yaml @@ -0,0 +1,16 @@ +--- +# Scale Kafka down before kuttl deletes the namespace. +# Without this, Kafka pods may hang during shutdown. +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + # Scale down brokers first, since they depend on controllers. + - script: | + kubectl patch kafkacluster test-kafka -n $NAMESPACE --type merge -p '{"spec":{"brokers":{"roleGroups":{"default":{"replicas":0}}}}}' + - script: | + kubectl wait --for=delete pod -l app.kubernetes.io/instance=test-kafka,app.kubernetes.io/component=broker -n $NAMESPACE --timeout=300s + # Then scale down controllers once all brokers are gone. + - script: | + kubectl patch kafkacluster test-kafka -n $NAMESPACE --type merge -p '{"spec":{"controllers":{"roleGroups":{"default":{"replicas":0}}}}}' + - script: | + kubectl wait --for=delete pod -l app.kubernetes.io/instance=test-kafka -n $NAMESPACE --timeout=300s diff --git a/tests/templates/kuttl/operations-kraft/90-shutdown-kafka.yaml b/tests/templates/kuttl/operations-kraft/90-shutdown-kafka.yaml new file mode 100644 index 00000000..97ed2393 --- /dev/null +++ b/tests/templates/kuttl/operations-kraft/90-shutdown-kafka.yaml @@ -0,0 +1,16 @@ +--- +# Scale Kafka down before kuttl deletes the namespace. +# Without this, Kafka pods may hang during shutdown. +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + # Scale down brokers first, since they depend on controllers. + - script: | + kubectl patch kafkacluster test-kafka -n $NAMESPACE --type merge -p '{"spec":{"brokers":{"roleGroups":{"default":{"replicas":0}}}}}' + - script: | + kubectl wait --for=delete pod -l app.kubernetes.io/instance=test-kafka,app.kubernetes.io/component=broker -n $NAMESPACE --timeout=300s + # Then scale down controllers once all brokers are gone. + - script: | + kubectl patch kafkacluster test-kafka -n $NAMESPACE --type merge -p '{"spec":{"controllers":{"roleGroups":{"default":{"replicas":0}}}}}' + - script: | + kubectl wait --for=delete pod -l app.kubernetes.io/instance=test-kafka -n $NAMESPACE --timeout=300s diff --git a/tests/templates/kuttl/smoke-kraft/90-shutdown-kafka.yaml b/tests/templates/kuttl/smoke-kraft/90-shutdown-kafka.yaml new file mode 100644 index 00000000..8d201b64 --- /dev/null +++ b/tests/templates/kuttl/smoke-kraft/90-shutdown-kafka.yaml @@ -0,0 +1,16 @@ +--- +# Scale Kafka down before kuttl deletes the namespace. +# Without this, Kafka pods may hang during shutdown. +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + # Scale down brokers first, since they depend on controllers. + - script: | + kubectl patch kafkacluster test-kafka -n $NAMESPACE --type merge -p '{"spec":{"brokers":{"roleGroups":{"default":{"replicas":0},"automatic-log-config":{"replicas":0},"custom-log-config":{"replicas":0}}}}}' + - script: | + kubectl wait --for=delete pod -l app.kubernetes.io/instance=test-kafka,app.kubernetes.io/component=broker -n $NAMESPACE --timeout=300s + # Then scale down controllers once all brokers are gone. + - script: | + kubectl patch kafkacluster test-kafka -n $NAMESPACE --type merge -p '{"spec":{"controllers":{"roleGroups":{"automatic-log-config":{"replicas":0},"custom-log-config":{"replicas":0}}}}}' + - script: | + kubectl wait --for=delete pod -l app.kubernetes.io/instance=test-kafka -n $NAMESPACE --timeout=300s diff --git a/tests/templates/kuttl/upgrade/90-shutdown-kafka.yaml b/tests/templates/kuttl/upgrade/90-shutdown-kafka.yaml new file mode 100644 index 00000000..97ed2393 --- /dev/null +++ b/tests/templates/kuttl/upgrade/90-shutdown-kafka.yaml @@ -0,0 +1,16 @@ +--- +# Scale Kafka down before kuttl deletes the namespace. +# Without this, Kafka pods may hang during shutdown. +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + # Scale down brokers first, since they depend on controllers. + - script: | + kubectl patch kafkacluster test-kafka -n $NAMESPACE --type merge -p '{"spec":{"brokers":{"roleGroups":{"default":{"replicas":0}}}}}' + - script: | + kubectl wait --for=delete pod -l app.kubernetes.io/instance=test-kafka,app.kubernetes.io/component=broker -n $NAMESPACE --timeout=300s + # Then scale down controllers once all brokers are gone. + - script: | + kubectl patch kafkacluster test-kafka -n $NAMESPACE --type merge -p '{"spec":{"controllers":{"roleGroups":{"default":{"replicas":0}}}}}' + - script: | + kubectl wait --for=delete pod -l app.kubernetes.io/instance=test-kafka -n $NAMESPACE --timeout=300s From 810b1d103d36089f0e175cab23e37282126f7f2b Mon Sep 17 00:00:00 2001 From: Nick Larsen Date: Thu, 7 May 2026 17:45:23 +0200 Subject: [PATCH 3/5] Revert "test: Add final step to scale down kafka when using KRaft" This reverts commit cc853b99e2821c1df3dd68683114f06d3a15ca16. This relies on a bug fix that should come in with https://github.com/stackabletech/kafka-operator/issues/955 --- .../kuttl/configuration/90-shutdown-kafka.yaml | 16 ---------------- .../operations-kraft/90-shutdown-kafka.yaml | 16 ---------------- .../kuttl/smoke-kraft/90-shutdown-kafka.yaml | 16 ---------------- .../kuttl/upgrade/90-shutdown-kafka.yaml | 16 ---------------- 4 files changed, 64 deletions(-) delete mode 100644 tests/templates/kuttl/configuration/90-shutdown-kafka.yaml delete mode 100644 tests/templates/kuttl/operations-kraft/90-shutdown-kafka.yaml delete mode 100644 tests/templates/kuttl/smoke-kraft/90-shutdown-kafka.yaml delete mode 100644 tests/templates/kuttl/upgrade/90-shutdown-kafka.yaml diff --git a/tests/templates/kuttl/configuration/90-shutdown-kafka.yaml b/tests/templates/kuttl/configuration/90-shutdown-kafka.yaml deleted file mode 100644 index 97ed2393..00000000 --- a/tests/templates/kuttl/configuration/90-shutdown-kafka.yaml +++ /dev/null @@ -1,16 +0,0 @@ ---- -# Scale Kafka down before kuttl deletes the namespace. -# Without this, Kafka pods may hang during shutdown. -apiVersion: kuttl.dev/v1beta1 -kind: TestStep -commands: - # Scale down brokers first, since they depend on controllers. - - script: | - kubectl patch kafkacluster test-kafka -n $NAMESPACE --type merge -p '{"spec":{"brokers":{"roleGroups":{"default":{"replicas":0}}}}}' - - script: | - kubectl wait --for=delete pod -l app.kubernetes.io/instance=test-kafka,app.kubernetes.io/component=broker -n $NAMESPACE --timeout=300s - # Then scale down controllers once all brokers are gone. - - script: | - kubectl patch kafkacluster test-kafka -n $NAMESPACE --type merge -p '{"spec":{"controllers":{"roleGroups":{"default":{"replicas":0}}}}}' - - script: | - kubectl wait --for=delete pod -l app.kubernetes.io/instance=test-kafka -n $NAMESPACE --timeout=300s diff --git a/tests/templates/kuttl/operations-kraft/90-shutdown-kafka.yaml b/tests/templates/kuttl/operations-kraft/90-shutdown-kafka.yaml deleted file mode 100644 index 97ed2393..00000000 --- a/tests/templates/kuttl/operations-kraft/90-shutdown-kafka.yaml +++ /dev/null @@ -1,16 +0,0 @@ ---- -# Scale Kafka down before kuttl deletes the namespace. -# Without this, Kafka pods may hang during shutdown. -apiVersion: kuttl.dev/v1beta1 -kind: TestStep -commands: - # Scale down brokers first, since they depend on controllers. - - script: | - kubectl patch kafkacluster test-kafka -n $NAMESPACE --type merge -p '{"spec":{"brokers":{"roleGroups":{"default":{"replicas":0}}}}}' - - script: | - kubectl wait --for=delete pod -l app.kubernetes.io/instance=test-kafka,app.kubernetes.io/component=broker -n $NAMESPACE --timeout=300s - # Then scale down controllers once all brokers are gone. - - script: | - kubectl patch kafkacluster test-kafka -n $NAMESPACE --type merge -p '{"spec":{"controllers":{"roleGroups":{"default":{"replicas":0}}}}}' - - script: | - kubectl wait --for=delete pod -l app.kubernetes.io/instance=test-kafka -n $NAMESPACE --timeout=300s diff --git a/tests/templates/kuttl/smoke-kraft/90-shutdown-kafka.yaml b/tests/templates/kuttl/smoke-kraft/90-shutdown-kafka.yaml deleted file mode 100644 index 8d201b64..00000000 --- a/tests/templates/kuttl/smoke-kraft/90-shutdown-kafka.yaml +++ /dev/null @@ -1,16 +0,0 @@ ---- -# Scale Kafka down before kuttl deletes the namespace. -# Without this, Kafka pods may hang during shutdown. -apiVersion: kuttl.dev/v1beta1 -kind: TestStep -commands: - # Scale down brokers first, since they depend on controllers. - - script: | - kubectl patch kafkacluster test-kafka -n $NAMESPACE --type merge -p '{"spec":{"brokers":{"roleGroups":{"default":{"replicas":0},"automatic-log-config":{"replicas":0},"custom-log-config":{"replicas":0}}}}}' - - script: | - kubectl wait --for=delete pod -l app.kubernetes.io/instance=test-kafka,app.kubernetes.io/component=broker -n $NAMESPACE --timeout=300s - # Then scale down controllers once all brokers are gone. - - script: | - kubectl patch kafkacluster test-kafka -n $NAMESPACE --type merge -p '{"spec":{"controllers":{"roleGroups":{"automatic-log-config":{"replicas":0},"custom-log-config":{"replicas":0}}}}}' - - script: | - kubectl wait --for=delete pod -l app.kubernetes.io/instance=test-kafka -n $NAMESPACE --timeout=300s diff --git a/tests/templates/kuttl/upgrade/90-shutdown-kafka.yaml b/tests/templates/kuttl/upgrade/90-shutdown-kafka.yaml deleted file mode 100644 index 97ed2393..00000000 --- a/tests/templates/kuttl/upgrade/90-shutdown-kafka.yaml +++ /dev/null @@ -1,16 +0,0 @@ ---- -# Scale Kafka down before kuttl deletes the namespace. -# Without this, Kafka pods may hang during shutdown. -apiVersion: kuttl.dev/v1beta1 -kind: TestStep -commands: - # Scale down brokers first, since they depend on controllers. - - script: | - kubectl patch kafkacluster test-kafka -n $NAMESPACE --type merge -p '{"spec":{"brokers":{"roleGroups":{"default":{"replicas":0}}}}}' - - script: | - kubectl wait --for=delete pod -l app.kubernetes.io/instance=test-kafka,app.kubernetes.io/component=broker -n $NAMESPACE --timeout=300s - # Then scale down controllers once all brokers are gone. - - script: | - kubectl patch kafkacluster test-kafka -n $NAMESPACE --type merge -p '{"spec":{"controllers":{"roleGroups":{"default":{"replicas":0}}}}}' - - script: | - kubectl wait --for=delete pod -l app.kubernetes.io/instance=test-kafka -n $NAMESPACE --timeout=300s From 6982ad0b04b3c8a60f809f71d290cb357cce554a Mon Sep 17 00:00:00 2001 From: Nick Larsen Date: Fri, 8 May 2026 10:03:20 +0200 Subject: [PATCH 4/5] test(logging): Scale down the custom-log-config RoleGroup Co-authored-by: Andrew Kenworthy --- tests/templates/kuttl/logging/90-shutdown-kafka.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/templates/kuttl/logging/90-shutdown-kafka.yaml b/tests/templates/kuttl/logging/90-shutdown-kafka.yaml index 809a3cf5..c072e08c 100644 --- a/tests/templates/kuttl/logging/90-shutdown-kafka.yaml +++ b/tests/templates/kuttl/logging/90-shutdown-kafka.yaml @@ -6,6 +6,6 @@ apiVersion: kuttl.dev/v1beta1 kind: TestStep commands: - script: | - kubectl patch kafkacluster test-kafka -n $NAMESPACE --type merge -p '{"spec":{"brokers":{"roleGroups":{"automatic-log-config":{"replicas":0}}}}}' + kubectl patch kafkacluster test-kafka -n $NAMESPACE --type merge -p '{"spec":{"brokers":{"roleGroups":{"automatic-log-config":{"replicas":0}, "custom-log-config":{"replicas":0}}}}}' - script: | kubectl wait --for=delete pod -l app.kubernetes.io/instance=test-kafka -n $NAMESPACE --timeout=300s From 932431c68199ee2ebdcce7900db9a85133e584ea Mon Sep 17 00:00:00 2001 From: Nick Larsen Date: Fri, 8 May 2026 10:03:48 +0200 Subject: [PATCH 5/5] test(delete-role): Add note about not scaling down the already deleted RoleGroup --- tests/templates/kuttl/delete-rolegroup/90-shutdown-kafka.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/templates/kuttl/delete-rolegroup/90-shutdown-kafka.yaml b/tests/templates/kuttl/delete-rolegroup/90-shutdown-kafka.yaml index acce2cee..3339f085 100644 --- a/tests/templates/kuttl/delete-rolegroup/90-shutdown-kafka.yaml +++ b/tests/templates/kuttl/delete-rolegroup/90-shutdown-kafka.yaml @@ -5,6 +5,7 @@ apiVersion: kuttl.dev/v1beta1 kind: TestStep commands: + # Note: By the time this script runs, the secondary RoleGroup has already been deleted, therefore we don't scale it down. - script: | kubectl patch kafkacluster test-kafka -n $NAMESPACE --type merge -p '{"spec":{"brokers":{"roleGroups":{"default":{"replicas":0}}}}}' - script: |