Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions test/e2e/features/rollout-restart.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
Feature: Rollout Restart User Changes
# Verifies that user-added pod template annotations persist after OLM reconciliation.
# Fixes: https://github.com/operator-framework/operator-lifecycle-manager/issues/3392

Background:
Given OLM is available
And ClusterCatalog "test" serves bundles
And ServiceAccount "olm-sa" with needed permissions is available in ${TEST_NAMESPACE}

Scenario: User-initiated deployment changes persist after OLM reconciliation
When ClusterExtension is applied
"""
apiVersion: olm.operatorframework.io/v1
kind: ClusterExtension
metadata:
name: ${NAME}
spec:
namespace: ${TEST_NAMESPACE}
serviceAccount:
name: olm-sa
source:
sourceType: Catalog
catalog:
packageName: test
selector:
matchLabels:
"olm.operatorframework.io/metadata.name": test-catalog
"""
Then ClusterExtension is available
And resource "deployment/test-operator" is available
When user performs rollout restart on "deployment/test-operator"
Then deployment "test-operator" has restart annotation
# Wait for at least two OLM reconciliation cycles (controller runs every 10s)
And I wait for "30" seconds
# Verify user changes persisted after OLM reconciliation
Comment on lines +33 to +35
Copy link

Copilot AI Mar 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment hard-codes an assumed reconciliation period (“controller runs every 10s”). If reconciliation is event-driven (e.g., triggered by the rollout restart changing the Deployment), this note is misleading and the fixed 30s sleep may be unnecessary. Consider rewording to avoid a specific cadence and/or replacing the fixed sleep with waiting for an observable reconciliation signal.

Copilot uses AI. Check for mistakes.
Then deployment "test-operator" has restart annotation
84 changes: 84 additions & 0 deletions test/e2e/steps/steps.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"os/exec"
"path/filepath"
"reflect"
"strconv"
"strings"
"time"

Expand Down Expand Up @@ -93,6 +94,9 @@ func RegisterSteps(sc *godog.ScenarioContext) {
sc.Step(`^(?i)resource apply fails with error msg containing "([^"]+)"$`, ResourceApplyFails)
sc.Step(`^(?i)resource "([^"]+)" is eventually restored$`, ResourceRestored)
sc.Step(`^(?i)resource "([^"]+)" matches$`, ResourceMatches)
sc.Step(`^(?i)user performs rollout restart on "([^"]+)"$`, UserPerformsRolloutRestart)
sc.Step(`^(?i)deployment "([^"]+)" has restart annotation$`, DeploymentHasRestartAnnotation)
sc.Step(`^(?i)I wait for "([^"]+)" seconds$`, WaitForSeconds)

sc.Step(`^(?i)ServiceAccount "([^"]*)" with needed permissions is available in test namespace$`, ServiceAccountWithNeededPermissionsIsAvailableInNamespace)
sc.Step(`^(?i)ServiceAccount "([^"]*)" with needed permissions is available in \${TEST_NAMESPACE}$`, ServiceAccountWithNeededPermissionsIsAvailableInNamespace)
Expand Down Expand Up @@ -1312,3 +1316,83 @@ func latestActiveRevisionForExtension(extName string) (*ocv1.ClusterExtensionRev

return latest, nil
}

// UserPerformsRolloutRestart simulates a user running "kubectl rollout restart deployment/<name>".
// This adds a restart annotation to trigger a rolling restart of pods.
// This is used to test the generic fix - OLM should not undo ANY user-added annotations.
// In OLMv0, OLM would undo this change. In OLMv1, it should stay because kubectl owns it.
// See: https://github.com/operator-framework/operator-lifecycle-manager/issues/3392
func UserPerformsRolloutRestart(ctx context.Context, resourceName string) error {
sc := scenarioCtx(ctx)
resourceName = substituteScenarioVars(resourceName, sc)

kind, deploymentName, ok := strings.Cut(resourceName, "/")
if !ok {
return fmt.Errorf("invalid resource name format: %s (expected kind/name)", resourceName)
}

if kind != "deployment" {
return fmt.Errorf("only deployment resources are supported for restart annotation, got: %s", kind)
}

// Run kubectl rollout restart to add the restart annotation.
// This is the real command users run, so we test actual user behavior.
out, err := k8sClient("rollout", "restart", resourceName, "-n", sc.namespace)
if err != nil {
return fmt.Errorf("failed to rollout restart %s: %w; stderr: %s", resourceName, err, stderrOutput(err))
}
Comment on lines +1340 to +1343
Copy link

Copilot AI Mar 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On kubectl failure, this error drops useful diagnostics (stderr). Other steps include stderrOutput(err) in failures, which helps a lot when debugging CI flakes. Consider appending stderr (and possibly the command output) to this error message.

Copilot uses AI. Check for mistakes.

logger.V(1).Info("Rollout restart initiated", "deployment", deploymentName, "output", out)

return nil
}

// DeploymentHasRestartAnnotation checks that a deployment's pod template has
// the kubectl.kubernetes.io/restartedAt annotation. Fails immediately if absent,
// so a failing boxcutter scenario won't stall the entire suite.
func DeploymentHasRestartAnnotation(ctx context.Context, deploymentName string) error {
sc := scenarioCtx(ctx)
deploymentName = substituteScenarioVars(deploymentName, sc)

restartAnnotationKey := "kubectl.kubernetes.io/restartedAt"
out, err := k8sClient("get", "deployment", deploymentName, "-n", sc.namespace,
"-o", fmt.Sprintf("jsonpath={.spec.template.metadata.annotations['%s']}", restartAnnotationKey))
if err != nil {
return fmt.Errorf("failed to get restart annotation on deployment %s: %w; stderr: %s", deploymentName, err, stderrOutput(err))
}

if strings.TrimSpace(out) == "" {
return fmt.Errorf("deployment %s is missing expected annotation %s", deploymentName, restartAnnotationKey)
}

logger.V(1).Info("Restart annotation found", "deployment", deploymentName, "restartedAt", strings.TrimSpace(out))
return nil
}

// WaitForSeconds waits for the given number of seconds.
// Used when a test needs to ensure that at least one OLM reconciliation cycle
// has occurred before checking a condition. Since the controllers are event-driven,
// we use a generous fixed delay rather than polling for an observable signal.
func WaitForSeconds(ctx context.Context, seconds string) error {
sec, err := strconv.Atoi(seconds)
if err != nil {
return fmt.Errorf("invalid seconds value %s: %w", seconds, err)
}

if sec <= 0 {
return fmt.Errorf("seconds value must be greater than 0, got %d", sec)
}

logger.V(1).Info("Waiting for reconciliation", "seconds", sec)

timer := time.NewTimer(time.Duration(sec) * time.Second)
defer timer.Stop()

select {
case <-timer.C:
logger.V(1).Info("Wait complete")
return nil
case <-ctx.Done():
return fmt.Errorf("wait canceled: %w", ctx.Err())
}
}
Loading