Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions cmd/hyperfleet-e2e/test/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ var args struct {
junitReport string
dryRun bool
flakeAttempts int
e2eRunID string
}

func init() {
Expand All @@ -48,6 +49,8 @@ func init() {
"List matching specs without executing them")
pfs.IntVar(&args.flakeAttempts, "flake-attempts", 1,
"Number of attempts for flaky tests (1 = no retries, 3 = up to 2 retries)")
pfs.StringVar(&args.e2eRunID, "e2e-run-id", "",
"E2E run ID for labeling test resources (overrides E2E_RUN_ID env var; optional)")
}

func run(cmd *cobra.Command, argv []string) {
Expand Down Expand Up @@ -88,6 +91,15 @@ func run(cmd *cobra.Command, argv []string) {
os.Exit(1)
}

// If --e2e-run-id was provided, inject it into the environment so
// GetE2ETestRunID() picks it up.
if args.e2eRunID != "" {
if err := os.Setenv("E2E_RUN_ID", args.e2eRunID); err != nil {
log.Printf("Failed to set E2E_RUN_ID: %v\n", err)
os.Exit(1)
}
}

e2e.SetSuiteConfig(cfg)

exitCode := e2e.RunTests(cmd.Context())
Expand Down
7 changes: 7 additions & 0 deletions deploy-scripts/deploy-clm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ fi

ACTION="${ACTION:-}"
NAMESPACE="${NAMESPACE:-}"
export E2E_RUN_ID="${E2E_RUN_ID:-}"
DRY_RUN="${DRY_RUN:-false}"
VERBOSE="${VERBOSE:-false}"

Expand Down Expand Up @@ -141,6 +142,8 @@ REQUIRED FLAGS:

OPTIONAL FLAGS:
--namespace <namespace> Kubernetes namespace (default from .env: ${NAMESPACE})
--e2e-run-id <id> E2E run ID for resource labeling
Set automatically in prow CI via E2E_RUN_ID env var

# Component Selection
--skip-api Skip API installation
Expand Down Expand Up @@ -256,6 +259,10 @@ parse_arguments() {
NAMESPACE="$2"
shift 2
;;
--e2e-run-id)
export E2E_RUN_ID="$2"
shift 2
;;
--skip-api)
INSTALL_API=false
shift
Expand Down
14 changes: 14 additions & 0 deletions deploy-scripts/lib/adapter.sh
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,16 @@ install_adapter_instance() {
return 1
fi

local e2e_run_id="${E2E_RUN_ID:-}"
if [[ -z "${e2e_run_id}" ]]; then
log_error "E2E_RUN_ID is not set. Pass --e2e-run-id to deploy-clm.sh or set the E2E_RUN_ID environment variable."
return 1
fi
if (( ${#e2e_run_id} > 63 )); then
log_error "E2E_RUN_ID '${e2e_run_id}' is ${#e2e_run_id} characters, exceeds the 63-character Kubernetes label value limit."
return 1
fi

Comment on lines +170 to +179

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🎯 Functional Correctness | 🟠 Major | ⚡ Quick win

Missing character validation for E2E_RUN_ID label value.

Lines 170-179 validate length (≤63 chars) but not allowed characters. Kubernetes label values must match (([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?. Invalid characters (e.g., spaces, special chars) will cause Helm deployment failure at line 218.

🔒 Proposed validation fix
   if (( ${`#e2e_run_id`} > 63 )); then
     log_error "E2E_RUN_ID '${e2e_run_id}' is ${`#e2e_run_id`} characters, exceeds the 63-character Kubernetes label value limit."
     return 1
   fi
+  if [[ ! "${e2e_run_id}" =~ ^([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9]$ ]]; then
+    log_error "E2E_RUN_ID '${e2e_run_id}' contains invalid characters. Must match Kubernetes label value format: ([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9]"
+    return 1
+  fi
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@deploy-scripts/lib/adapter.sh` around lines 170 - 179, The E2E_RUN_ID
validation block is missing character validation for Kubernetes label values.
After the existing length check (which validates that e2e_run_id does not exceed
63 characters), add an additional validation check to ensure the e2e_run_id
matches the Kubernetes label value regex pattern which allows only alphanumeric
characters, hyphens, underscores, and periods, and must start and end with an
alphanumeric character. Use a conditional test to match the variable against the
required pattern and log an error with a clear message describing which
characters are allowed if the validation fails.

if [[ "${DRY_RUN}" == "true" ]]; then
log_info "[DRY-RUN] Would install adapter with:"
log_info " Release name: ${release_name}"
Expand All @@ -177,6 +187,7 @@ install_adapter_instance() {
log_info " Subscription ID: ${subscription_id}"
log_info " Topic: ${topic}"
log_info " Dead Letter Topic: ${dead_letter_topic}"
log_info " E2E Run ID: ${e2e_run_id}"
return 0
fi

Expand All @@ -202,6 +213,9 @@ install_adapter_instance() {
--set "broker.googlepubsub.subscriptionId=${subscription_id}"
--set "broker.googlepubsub.topic=${topic}"
--set "broker.googlepubsub.deadLetterTopic=${dead_letter_topic}"
--set "env[0].name=E2E_RUN_ID"
--set "env[0].value=${e2e_run_id}"
--set "labels.e2e\\.hyperfleet\\.io/run-id=${e2e_run_id}"
--labels "adapter-resource-type=${resource_type},adapter-name=${adapter_name}"
)

Expand Down
19 changes: 17 additions & 2 deletions pkg/e2e/suite.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,25 @@ var _ = ginkgo.BeforeSuite(func() {

cfg.Display()

logger.Info("starting hyperfleet-e2e test suite - each test creates temporary resources")
// Get run ID (optional - empty string if not set)
runID, err := helper.GetE2ETestRunID()
if err != nil {
log.Fatalf("Failed to get run ID: %v", err)
}
helper.SetRunID(runID)

if runID != "" {
logger.Info("starting hyperfleet-e2e test suite",
"run_id", runID,
"message", "each test creates temporary resources")
} else {
logger.Info("starting hyperfleet-e2e test suite",
"message", "each test creates temporary resources (no run-id set)")
}
})

var _ = ginkgo.AfterSuite(func() {
runID := helper.GetRunID()
helper.ClearSuiteConfig()
logger.Info("test suite completed")
logger.Info("test suite completed", "run_id", runID)
})
20 changes: 16 additions & 4 deletions pkg/helper/adapter.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,13 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// generateRandomString generates a random alphanumeric string of the specified length
func generateRandomString(length int) string {
// randomAlphanumeric returns a random lowercase alphanumeric string of the given length.
func randomAlphanumeric(length int) string {
const charset = "abcdefghijklmnopqrstuvwxyz0123456789"
b := make([]byte, length)
for i := range b {
n, err := rand.Int(rand.Reader, big.NewInt(int64(len(charset))))
if err != nil {
// Fallback: use current time nanoseconds for basic randomness
b[i] = charset[(time.Now().UnixNano()+int64(i))%int64(len(charset))]
} else {
b[i] = charset[n.Int64()]
Expand Down Expand Up @@ -191,6 +190,19 @@ func (h *Helper) DeployAdapter(ctx context.Context, opts AdapterDeploymentOption
"--set", fmt.Sprintf("fullnameOverride=%s", releaseName),
)

// The run ID (h.RunID) is passed to Helm via --set labels.e2e\.hyperfleet\.io/run-id=<run-id> to label deployed adapter
// and injected into the adapter container as env.E2E_RUN_ID so the adapter can read it via its task config.
if h.RunID != "" {
helmArgs = append(helmArgs,
"--set", fmt.Sprintf("labels.e2e\\.hyperfleet\\.io/run-id=%s", h.RunID),
"--set", "env[0].name=E2E_RUN_ID",
"--set", fmt.Sprintf("env[0].value=%s", h.RunID),
)
logger.Info("injecting run ID into Helm chart",
"run_id", h.RunID,
"release_name", releaseName)
}

// Override image pull policy if set (e.g. IfNotPresent for local kind clusters)
if policy := os.Getenv("IMAGE_PULL_POLICY"); policy != "" {
helmArgs = append(helmArgs, "--set", fmt.Sprintf("image.pullPolicy=%s", policy))
Expand Down Expand Up @@ -323,7 +335,7 @@ func (h *Helper) cleanupClusterScopedResources(ctx context.Context, releaseName
// outputDir is configured via OUTPUT_DIR env var or config file (defaults to "output")
func (h *Helper) saveDiagnosticLogs(ctx context.Context, adapterName, releaseName, namespace string) {
// Generate output directory with adapter name and random suffix
randomSuffix := generateRandomString(4)
randomSuffix := randomAlphanumeric(4)
outputDir := filepath.Join(h.Cfg.OutputDir, fmt.Sprintf("%s-%s", adapterName, randomSuffix))

// Create output directory
Expand Down
1 change: 1 addition & 0 deletions pkg/helper/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ type Helper struct {
Client *client.HyperFleetClient
K8sClient *k8sclient.Client
MaestroClient *maestro.Client
RunID string
}

// TestDataPath resolves a relative path within the testdata directory
Expand Down
47 changes: 47 additions & 0 deletions pkg/helper/suite.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
package helper

import (
"fmt"
"log"
"os"
"regexp"
"sync"

"github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/client"
Expand All @@ -13,6 +16,7 @@ var (
// suiteConfig is loaded once in cmd layer before tests start
suiteConfig *config.Config
configMutex sync.RWMutex
runID string
)

// SetSuiteConfig sets the global suite configuration for the test suite
Expand All @@ -36,6 +40,48 @@ func ClearSuiteConfig() {
suiteConfig = nil
}

// SetRunID sets the global run ID for the test suite
func SetRunID(id string) {
configMutex.Lock()
defer configMutex.Unlock()
runID = id
}

// GetRunID returns the global run ID for the test suite
func GetRunID() string {
configMutex.RLock()
defer configMutex.RUnlock()
return runID
}

// maxRunIDLength is the maximum allowed length for a run ID.
// Kubernetes label values are limited to 63 characters.
const maxRunIDLength = 63

// labelValueRegex matches valid Kubernetes label values per
// https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set
var labelValueRegex = regexp.MustCompile(`^[a-zA-Z0-9]([-_.a-zA-Z0-9]*[a-zA-Z0-9])?$`)

// GetE2ETestRunID returns the run identifier for this E2E test suite execution.
// It reads the E2E_RUN_ID environment variable (set by CI/prow to the namespace name).
// Returns empty string if E2E_RUN_ID is not set (run-id is optional).
func GetE2ETestRunID() (string, error) {
id := os.Getenv("E2E_RUN_ID")
if id == "" {
// Run ID is optional - return empty string
return "", nil
}

// Validate run ID format and length
if len(id) > maxRunIDLength {
return "", fmt.Errorf("E2E_RUN_ID %q is %d characters, exceeds the %d-character Kubernetes label value limit", id, len(id), maxRunIDLength)
}
if !labelValueRegex.MatchString(id) {
return "", fmt.Errorf("E2E_RUN_ID %q contains characters invalid for a Kubernetes label value", id)
}
return id, nil
}

// New creates a helper instance for testing
// Creates a new helper per test
func New() *Helper {
Expand Down Expand Up @@ -67,6 +113,7 @@ func newHelper(cfg *config.Config) (*Helper, error) {
Cfg: cfg,
Client: cl,
K8sClient: k8sClient,
RunID: GetRunID(),
// MaestroClient is initialized lazily via GetMaestroClient() to avoid
// unnecessary K8s API calls in test suites that don't use Maestro
}, nil
Expand Down
5 changes: 5 additions & 0 deletions testdata/adapter-configs/cl-crash/adapter-task-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ params:
source: "event.id"
type: "string"
required: true
- name: "e2eRunId"
source: "env.E2E_RUN_ID"
type: "string"
required: false

preconditions:
- name: "clusterStatus"
Expand Down Expand Up @@ -49,6 +53,7 @@ resources:
labels:
hyperfleet.io/cluster-id: "{{ .clusterId }}"
hyperfleet.io/cluster-name: "{{ .clusterName }}"
e2e.hyperfleet.io/run-id: "{{ .e2eRunId }}"

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗄️ Data Integrity & Integration | 🟠 Major | ⚡ Quick win

Unify run label key across Helm and adapter resources

Line 56 uses e2e.hyperfleet.io/run-id, but Helm injection uses e2e.hyperfleet.io/run in pkg/helper/adapter.go (--set labels.e2e\\.hyperfleet\\.io/run=%s). This splits selectors across two keys and breaks single-key cleanup/reporting.

Proposed fix
-          e2e.hyperfleet.io/run-id: "{{ .e2eRunId }}"
+          e2e.hyperfleet.io/run: "{{ .e2eRunId }}"

Or standardize the Helm key to run-id everywhere instead.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@testdata/adapter-configs/cl-crash/adapter-task-config.yaml` at line 56, The
label key `e2e.hyperfleet.io/run-id` on line 56 of the adapter-task-config.yaml
file is inconsistent with the Helm injection that uses `e2e.hyperfleet.io/run`
in pkg/helper/adapter.go (set via --set labels.e2e\\.hyperfleet\\.io/run=%s).
Change the label key from `e2e.hyperfleet.io/run-id` to `e2e.hyperfleet.io/run`
to unify the label keys across both Helm and adapter resources, ensuring cleanup
and reporting use a single consistent selector key.

annotations:
hyperfleet.io/generation: "{{ .generationSpec }}"
discovery:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ params:
source: "event.id"
type: "string"
required: true
- name: "e2eRunId"
source: "env.E2E_RUN_ID"
type: "string"
required: false

# Preconditions with valid operators and CEL expressions
preconditions:
Expand Down Expand Up @@ -35,7 +39,6 @@ preconditions:
? status.conditions.filter(c, c.type == "Reconciled")[0].last_transition_time
: now()
)).getSeconds() > 300

- name: "clusterAdapterStatus"
api_call:
method: "GET"
Expand All @@ -46,7 +49,6 @@ preconditions:
capture:
- name: "clusterJobStatus"
field: "{.items[?(@.adapter=='cl-job')].conditions[?(@.type=='Available')].status}"

- name: "validationCheck"
expression: |
is_deleting || (clusterJobStatus == "True" && (clusterNotReconciled || clusterReconciledTTL))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ metadata:
labels:
hyperfleet.io/cluster-id: "{{ .clusterId }}"
hyperfleet.io/resource-type: "deployment"
e2e.hyperfleet.io/run-id: "{{ .e2eRunId }}"
Comment thread
mliptak0 marked this conversation as resolved.
annotations:
hyperfleet.io/generation: "{{ .generationSpec }}"
spec:
Expand All @@ -20,10 +21,10 @@ spec:
labels:
app: test
hyperfleet.io/cluster-id: "{{ .clusterId }}"
e2e.hyperfleet.io/run-id: "{{ .e2eRunId }}"
spec:
containers:
- name: test
image: nginx:latest
ports:
- containerPort: 80

Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,10 @@ params:
source: "event.id"
type: "string"
required: true
- name: "testRunId"
source: "env.TEST_RUN_ID"
- name: "e2eRunId"
source: "env.E2E_RUN_ID"
type: "string"
required: false
default: "TEST_RUN_ID"
- name: "ci"
source: "env.CI"
type: "string"
Expand Down Expand Up @@ -62,7 +61,7 @@ resources:
labels:
hyperfleet.io/cluster-id: "{{ .clusterId }}"
hyperfleet.io/cluster-name: "{{ .clusterName }}"
e2e.hyperfleet.io/test-run-id: "{{ .testRunId }}"
e2e.hyperfleet.io/run-id: "{{ .e2eRunId }}"
e2e.hyperfleet.io/ci: "{{ .ci }}"
e2e.hyperfleet.io/managed-by: "test-framework"
annotations:
Expand Down
6 changes: 4 additions & 2 deletions testdata/adapter-configs/cl-job/adapter-task-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ params:
source: "event.id"
type: "string"
required: true
- name: "e2eRunId"
source: "env.E2E_RUN_ID"
type: "string"
required: false

# Preconditions with valid operators and CEL expressions
preconditions:
Expand Down Expand Up @@ -35,7 +39,6 @@ preconditions:
? status.conditions.filter(c, c.type == "Reconciled")[0].last_transition_time
: now()
)).getSeconds() > 300

- name: "clusterAdapterStatus"
api_call:
method: "GET"
Expand All @@ -46,7 +49,6 @@ preconditions:
capture:
- name: "clusterNamespaceStatus"
field: "{.items[?(@.adapter=='cl-namespace')].data.namespace.status}"

- name: "validationCheck"
expression: |
is_deleting || (clusterNamespaceStatus == "Active" && (clusterNotReconciled || clusterReconciledTTL))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,15 @@ metadata:
hyperfleet.io/cluster-id: "{{ .clusterId }}"
hyperfleet.io/resource-type: "job"
app: test-job
e2e.hyperfleet.io/run-id: "{{ .e2eRunId }}"

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗄️ Data Integrity & Integration | 🟠 Major | ⚡ Quick win

Add run-id label on Job Pod template

Line 11 labels only the Job object. Pods created by this Job are not guaranteed to carry e2e.hyperfleet.io/run-id unless it is set under spec.template.metadata.labels.

Proposed fix
 spec:
   backoffLimit: 0
   template:
+    metadata:
+      labels:
+        e2e.hyperfleet.io/run-id: "{{ .e2eRunId }}"
     spec:
       restartPolicy: Never
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@testdata/adapter-configs/cl-job/adapter-task-resource-job.yaml` at line 11,
The label e2e.hyperfleet.io/run-id is currently set only on the Job object
metadata, but Pods created by this Job will not inherit it. To fix this, add the
same label e2e.hyperfleet.io/run-id: "{{ .e2eRunId }}" under the Pod template
specification at spec.template.metadata.labels so that all Pods spawned by this
Job will carry the run-id label for proper tracking and identification.

annotations:
hyperfleet.io/generation: "{{ .generationSpec }}"
spec:
backoffLimit: 0
template:
metadata:
labels:
e2e.hyperfleet.io/run-id: "{{ .e2eRunId }}"
spec:
restartPolicy: Never
containers:
Expand Down
Loading