From 66165c180089e6cd377b89d5253e6faa9de86329 Mon Sep 17 00:00:00 2001 From: Andrii Chubatiuk Date: Fri, 22 May 2026 17:22:47 +0300 Subject: [PATCH 01/31] reconcile: update statefulset status on success for OnDelete update strategy --- docs/CHANGELOG.md | 2 ++ .../operator/factory/reconcile/statefulset.go | 33 +++++++++++++++---- .../vmalertmanager_reconcile_test.go | 6 ++-- .../vmanomaly/vmanomaly_reconcile_test.go | 6 ++-- .../vmcluster/vmcluster_reconcile_test.go | 12 ++++--- .../controller/operator/vlagent_controller.go | 1 + .../controller/operator/vmagent_controller.go | 1 + .../operator/vmalertmanager_controller.go | 1 + .../operator/vmanomaly_controller.go | 1 + .../operator/vmcluster_controller.go | 1 + 10 files changed, 49 insertions(+), 15 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index fbcbfe834..de4c7e75a 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -13,6 +13,8 @@ aliases: ## tip +* BUGFIX: [vmoperator](https://docs.victoriametrics.com/operator/): update status currentRevision and currentReplicas for StatefulSet with OnDelete update strategy. See [#1242](https://github.com/VictoriaMetrics/operator/issues/1242). + ## [v0.68.5](https://github.com/VictoriaMetrics/operator/releases/tag/v0.68.5) **Release date:** 27 May 2026 diff --git a/internal/controller/operator/factory/reconcile/statefulset.go b/internal/controller/operator/factory/reconcile/statefulset.go index c46c26e8b..09c2e4f0d 100644 --- a/internal/controller/operator/factory/reconcile/statefulset.go +++ b/internal/controller/operator/factory/reconcile/statefulset.go @@ -212,13 +212,32 @@ type rollingUpdateOpts struct { delete bool } +// patchSTSCurrentRevision patches statefulset status.currentRevision to match status.updateRevision +// after all pods are updated. This is needed because Kubernetes does not update currentRevision +// for OnDelete strategy, leaving stale data visible to monitoring tools. +// See https://github.com/VictoriaMetrics/operator/issues/1242 +func patchSTSCurrentRevision(ctx context.Context, rclient client.Client, nsn types.NamespacedName, updateRevision string, replicas int32) error { + var sts appsv1.StatefulSet + if err := rclient.Get(ctx, nsn, &sts); err != nil { + return fmt.Errorf("cannot get statefulset for status update: %w", err) + } + if sts.Status.CurrentRevision == updateRevision { + return nil + } + logger.WithContext(ctx).Info(fmt.Sprintf("updating statefulset=%s/%s status currentRevision from %q to %q", nsn.Namespace, nsn.Name, sts.Status.CurrentRevision, updateRevision)) + sts.Status.CurrentRevision = updateRevision + // currentReplicas is not updated with OnDelete strategy too + sts.Status.CurrentReplicas = replicas + if err := rclient.Status().Update(ctx, &sts); err != nil { + return fmt.Errorf("cannot update statefulset=%s/%s currentRevision status: %w", nsn.Namespace, nsn.Name, err) + } + return nil +} + // we perform rolling update on sts by manually evicting pods one by one or in batches // we check sts revision (kubernetes controller-manager is responsible for that) // and compare pods revision label with sts revision // if it doesn't match - updated is needed -// -// we always check if sts.Status.CurrentRevision needs update, to keep it equal to UpdateRevision -// see https://github.com/kubernetes/kube-state-metrics/issues/1324#issuecomment-1779751992 func performRollingUpdateOnSts(ctx context.Context, rclient client.Client, obj *appsv1.StatefulSet, o rollingUpdateOpts) error { time.Sleep(podWaitReadyInterval) nsn := types.NamespacedName{ @@ -278,10 +297,10 @@ func performRollingUpdateOnSts(ctx context.Context, rclient client.Client, obj * return fmt.Errorf("actual pod count: %d less than needed: %d, possible statefulset misconfiguration", totalPodsCount, neededPodCount) } - updatedNeeded := len(podsForUpdate) != 0 || len(updatedPods) != 0 - if !updatedNeeded { + updateNeeded := len(podsForUpdate) != 0 || len(updatedPods) != 0 + if !updateNeeded { l.V(1).Info("no pod needs to be updated") - return nil + return patchSTSCurrentRevision(ctx, rclient, nsn, stsVersion, int32(neededPodCount)) } l.Info(fmt.Sprintf("discovered already updated pods=%d, pods needed to be update=%d", len(updatedPods), len(podsForUpdate))) @@ -355,7 +374,7 @@ func performRollingUpdateOnSts(ctx context.Context, rclient client.Client, obj * l.Info(fmt.Sprintf("finished statefulset update from revision=%q to revision=%q", sts.Status.CurrentRevision, stsVersion)) - return nil + return patchSTSCurrentRevision(ctx, rclient, nsn, stsVersion, int32(neededPodCount)) } // PodIsReady check is pod is ready diff --git a/internal/controller/operator/factory/vmalertmanager/vmalertmanager_reconcile_test.go b/internal/controller/operator/factory/vmalertmanager/vmalertmanager_reconcile_test.go index d5c499260..9c310499e 100644 --- a/internal/controller/operator/factory/vmalertmanager/vmalertmanager_reconcile_test.go +++ b/internal/controller/operator/factory/vmalertmanager/vmalertmanager_reconcile_test.go @@ -160,7 +160,8 @@ func Test_CreateOrUpdate_Actions(t *testing.T) { {Verb: "Get", Kind: "Service", Resource: vmalertmanagerName}, {Verb: "Get", Kind: "VMServiceScrape", Resource: vmalertmanagerName}, {Verb: "Get", Kind: "StatefulSet", Resource: vmalertmanagerName}, - {Verb: "Get", Kind: "StatefulSet", Resource: vmalertmanagerName}, + {Verb: "Get", Kind: "StatefulSet", Resource: vmalertmanagerName}, // getLatestStsState + {Verb: "Get", Kind: "StatefulSet", Resource: vmalertmanagerName}, // patchSTSCurrentRevision }, }) @@ -190,7 +191,8 @@ func Test_CreateOrUpdate_Actions(t *testing.T) { {Verb: "Get", Kind: "Service", Resource: vmalertmanagerName}, {Verb: "Get", Kind: "VMServiceScrape", Resource: vmalertmanagerName}, {Verb: "Get", Kind: "StatefulSet", Resource: vmalertmanagerName}, - {Verb: "Get", Kind: "StatefulSet", Resource: vmalertmanagerName}, + {Verb: "Get", Kind: "StatefulSet", Resource: vmalertmanagerName}, // getLatestStsState + {Verb: "Get", Kind: "StatefulSet", Resource: vmalertmanagerName}, // patchSTSCurrentRevision }, }) } diff --git a/internal/controller/operator/factory/vmanomaly/vmanomaly_reconcile_test.go b/internal/controller/operator/factory/vmanomaly/vmanomaly_reconcile_test.go index e89a7a45d..34c2d3862 100644 --- a/internal/controller/operator/factory/vmanomaly/vmanomaly_reconcile_test.go +++ b/internal/controller/operator/factory/vmanomaly/vmanomaly_reconcile_test.go @@ -198,7 +198,8 @@ schedulers: // StatefulSet {Verb: "Get", Kind: "StatefulSet", Resource: vmanomalyName}, {Verb: "Update", Kind: "StatefulSet", Resource: vmanomalyName}, - {Verb: "Get", Kind: "StatefulSet", Resource: vmanomalyName}, + {Verb: "Get", Kind: "StatefulSet", Resource: vmanomalyName}, // getLatestStsState + {Verb: "Get", Kind: "StatefulSet", Resource: vmanomalyName}, // patchSTSCurrentRevision }, }) @@ -247,7 +248,8 @@ schedulers: {Verb: "Get", Kind: "Secret", Resource: vmanomalyName}, // StatefulSet {Verb: "Get", Kind: "StatefulSet", Resource: vmanomalyName}, - {Verb: "Get", Kind: "StatefulSet", Resource: vmanomalyName}, + {Verb: "Get", Kind: "StatefulSet", Resource: vmanomalyName}, // getLatestStsState + {Verb: "Get", Kind: "StatefulSet", Resource: vmanomalyName}, // patchSTSCurrentRevision }, }) } diff --git a/internal/controller/operator/factory/vmcluster/vmcluster_reconcile_test.go b/internal/controller/operator/factory/vmcluster/vmcluster_reconcile_test.go index 59fbc4c47..14ad3050c 100644 --- a/internal/controller/operator/factory/vmcluster/vmcluster_reconcile_test.go +++ b/internal/controller/operator/factory/vmcluster/vmcluster_reconcile_test.go @@ -196,13 +196,15 @@ func Test_CreateOrUpdate_Actions(t *testing.T) { // VMStorage {Verb: "Get", Kind: "StatefulSet", Resource: vmstorageName}, - {Verb: "Get", Kind: "StatefulSet", Resource: vmstorageName}, // wait for ready + {Verb: "Get", Kind: "StatefulSet", Resource: vmstorageName}, // getLatestStsState + {Verb: "Get", Kind: "StatefulSet", Resource: vmstorageName}, // patchSTSCurrentRevision {Verb: "Get", Kind: "Service", Resource: vmstorageName}, {Verb: "Get", Kind: "VMServiceScrape", Resource: vmstorageName}, // VMSelect {Verb: "Get", Kind: "StatefulSet", Resource: vmselectName}, - {Verb: "Get", Kind: "StatefulSet", Resource: vmselectName}, // wait for ready + {Verb: "Get", Kind: "StatefulSet", Resource: vmselectName}, // getLatestStsState + {Verb: "Get", Kind: "StatefulSet", Resource: vmselectName}, // patchSTSCurrentRevision {Verb: "Get", Kind: "Service", Resource: vmselectName}, {Verb: "Get", Kind: "VMServiceScrape", Resource: vmselectName}, @@ -230,13 +232,15 @@ func Test_CreateOrUpdate_Actions(t *testing.T) { // VMStorage {Verb: "Get", Kind: "StatefulSet", Resource: vmstorageName}, - {Verb: "Get", Kind: "StatefulSet", Resource: vmstorageName}, // wait for ready + {Verb: "Get", Kind: "StatefulSet", Resource: vmstorageName}, // getLatestStsState + {Verb: "Get", Kind: "StatefulSet", Resource: vmstorageName}, // patchSTSCurrentRevision {Verb: "Get", Kind: "Service", Resource: vmstorageName}, {Verb: "Get", Kind: "VMServiceScrape", Resource: vmstorageName}, // VMSelect {Verb: "Get", Kind: "StatefulSet", Resource: vmselectName}, - {Verb: "Get", Kind: "StatefulSet", Resource: vmselectName}, // wait for ready + {Verb: "Get", Kind: "StatefulSet", Resource: vmselectName}, // getLatestStsState + {Verb: "Get", Kind: "StatefulSet", Resource: vmselectName}, // patchSTSCurrentRevision {Verb: "Get", Kind: "Service", Resource: vmselectName}, {Verb: "Get", Kind: "VMServiceScrape", Resource: vmselectName}, diff --git a/internal/controller/operator/vlagent_controller.go b/internal/controller/operator/vlagent_controller.go index d495c1b3c..a507da3b4 100644 --- a/internal/controller/operator/vlagent_controller.go +++ b/internal/controller/operator/vlagent_controller.go @@ -60,6 +60,7 @@ func (r *VLAgentReconciler) Init(rclient client.Client, l logr.Logger, sc *runti // +kubebuilder:rbac:groups="",resources=serviceaccounts,verbs=get;create,update;list // +kubebuilder:rbac:groups=apps,resources=daemonsets,verbs=* // +kubebuilder:rbac:groups=apps,resources=statefulsets,verbs=* +// +kubebuilder:rbac:groups=apps,resources=statefulsets/status,verbs=get;update;patch func (r *VLAgentReconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ctrl.Result, err error) { l := r.Log.WithValues("vlagent", req.Name, "namespace", req.Namespace) ctx = logger.AddToContext(ctx, l) diff --git a/internal/controller/operator/vmagent_controller.go b/internal/controller/operator/vmagent_controller.go index eb6da2f5e..4671f4946 100644 --- a/internal/controller/operator/vmagent_controller.go +++ b/internal/controller/operator/vmagent_controller.go @@ -79,6 +79,7 @@ func (r *VMAgentReconciler) Init(rclient client.Client, l logr.Logger, sc *runti // +kubebuilder:rbac:groups="",resources=serviceaccounts,verbs=get;create,update;list // +kubebuilder:rbac:groups=apps,resources=deployments,verbs=* // +kubebuilder:rbac:groups=apps,resources=statefulsets,verbs=* +// +kubebuilder:rbac:groups=apps,resources=statefulsets/status,verbs=get;update;patch // +kubebuilder:rbac:groups=apps,resources=daemonsets,verbs=* func (r *VMAgentReconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ctrl.Result, err error) { l := r.Log.WithValues("vmagent", req.Name, "namespace", req.Namespace) diff --git a/internal/controller/operator/vmalertmanager_controller.go b/internal/controller/operator/vmalertmanager_controller.go index d1bd82f45..824a2e72a 100644 --- a/internal/controller/operator/vmalertmanager_controller.go +++ b/internal/controller/operator/vmalertmanager_controller.go @@ -67,6 +67,7 @@ func (r *VMAlertmanagerReconciler) Scheme() *runtime.Scheme { // +kubebuilder:rbac:groups=operator.victoriametrics.com,resources=vmalertmanagers/status,verbs=get;update;patch // +kubebuilder:rbac:groups=operator.victoriametrics.com,resources=vmalertmanagers/finalizers,verbs=* // +kubebuilder:rbac:groups=apps,resources=statefulsets,verbs=* +// +kubebuilder:rbac:groups=apps,resources=statefulsets/status,verbs=get;update;patch // +kubebuilder:rbac:groups="",resources=configmaps,verbs=* // +kubebuilder:rbac:groups="",resources=secrets,verbs=* func (r *VMAlertmanagerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ctrl.Result, err error) { diff --git a/internal/controller/operator/vmanomaly_controller.go b/internal/controller/operator/vmanomaly_controller.go index 1c252dc47..7222755c5 100644 --- a/internal/controller/operator/vmanomaly_controller.go +++ b/internal/controller/operator/vmanomaly_controller.go @@ -56,6 +56,7 @@ func (r *VMAnomalyReconciler) Init(rclient client.Client, l logr.Logger, sc *run // +kubebuilder:rbac:groups=operator.victoriametrics.com,resources=vmanomalies/finalizers,verbs=* // +kubebuilder:rbac:groups=apps,resources=deployments,verbs=* // +kubebuilder:rbac:groups=apps,resources=statefulsets,verbs=* +// +kubebuilder:rbac:groups=apps,resources=statefulsets/status,verbs=get;update;patch // +kubebuilder:rbac:groups="",resources=serviceaccounts,verbs=get;create,update;list // +kubebuilder:rbac:groups="",resources=events,verbs=* // +kubebuilder:rbac:groups="",resources=pods,verbs=* diff --git a/internal/controller/operator/vmcluster_controller.go b/internal/controller/operator/vmcluster_controller.go index d471bb22c..4208cea1b 100644 --- a/internal/controller/operator/vmcluster_controller.go +++ b/internal/controller/operator/vmcluster_controller.go @@ -44,6 +44,7 @@ func (r *VMClusterReconciler) Scheme() *runtime.Scheme { // +kubebuilder:rbac:groups=operator.victoriametrics.com,resources=vmclusters/status,verbs=get;update;patch // +kubebuilder:rbac:groups=operator.victoriametrics.com,resources=vmclusters/finalizers,verbs=* // +kubebuilder:rbac:groups=apps,resources=statefulsets,verbs=* +// +kubebuilder:rbac:groups=apps,resources=statefulsets/status,verbs=get;update;patch func (r *VMClusterReconciler) Reconcile(ctx context.Context, request ctrl.Request) (result ctrl.Result, err error) { l := r.Log.WithValues("vmcluster", request.Name, "namespace", request.Namespace) ctx = logger.AddToContext(ctx, l) From 5e4a3a20afd43a387005aa16610a7b34fb4ab4b0 Mon Sep 17 00:00:00 2001 From: Andrii Chubatiuk Date: Mon, 25 May 2026 09:47:23 +0300 Subject: [PATCH 02/31] olm: added stable channel, removed unneeded pipeline steps (#2201) Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com> Signed-off-by: Andrii Chubatiuk --- .github/workflows/operatorhub.yaml | 34 +++++++++---------- ...etrics-operator.clusterserviceversion.yaml | 2 +- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/.github/workflows/operatorhub.yaml b/.github/workflows/operatorhub.yaml index f120c99a9..37d310506 100644 --- a/.github/workflows/operatorhub.yaml +++ b/.github/workflows/operatorhub.yaml @@ -15,6 +15,8 @@ jobs: update: name: Publish new OperatorHub release runs-on: ubuntu-latest + env: + CHANNEL: ${{ startsWith(github.event.workflow_run.head_branch, 'release') && 'stable' || 'beta' }} if: ${{ (github.event.workflow_run.conclusion == 'success' && ! contains(github.event.workflow_run.head_branch, '-')) || github.event_name == 'workflow_dispatch' && github.event.workflow_run.release != 'prereleased' }} strategy: matrix: @@ -58,14 +60,6 @@ jobs: run_id: ${{ github.event.workflow_run.id }} path: bundle - - name: Install opm - run: | - OPM_VERSION=v1.65.0 - curl -fsSLO https://github.com/operator-framework/operator-registry/releases/download/${OPM_VERSION}/linux-amd64-opm - curl -fsSLO https://github.com/operator-framework/operator-registry/releases/download/${OPM_VERSION}/checksums.txt - grep ' linux-amd64-opm$' checksums.txt | sha256sum -c - - install -m 0755 linux-amd64-opm /usr/local/bin/opm - - name: Add operatorhub bundle id: update run: | @@ -78,8 +72,12 @@ jobs: export OPERATOR_DIR=__operatorhub-repo/operators/${OPERATOR_NAME} mkdir -p ${OPERATOR_DIR} - NEW_VERSION=$(ls bundle | head -1) - export OLD_VERSION=$(find ${OPERATOR_DIR}/* ! -path "*/catalog-templates" -maxdepth 0 -type d -exec basename {} \; | sort -V -r | head -1) + export NEW_VERSION=$(ls bundle | head -1) + export OLD_VERSION=$( + { find ${OPERATOR_DIR}/* ! -path "*/catalog-templates" -maxdepth 0 -type d -exec basename {} \; + echo "${NEW_VERSION}" + } | sort -V | grep -B1 "^${NEW_VERSION}$" | grep -v "^${NEW_VERSION}$" + ) export OLD_ENTRY="${OPERATOR_NAME}.v${OLD_VERSION}" if [ ! -z $OLD_VERSION ]; then @@ -89,14 +87,16 @@ jobs: mv bundle/${NEW_VERSION} ${OPERATOR_DIR}/ if [ -f ${OPERATOR_DIR}/Makefile ]; then - opm render ${OPERATOR_DIR}/${NEW_VERSION} --output=yaml \ - | yq 'select(.schema == "olm.bundle")' > /tmp/new-bundle.yaml - + yq -n '.catalog_templates = []' > ${OPERATOR_DIR}/${NEW_VERSION}/release-config.yaml for TEMPLATE in ${OPERATOR_DIR}/catalog-templates/*.yaml; do - PREV_HEAD=$(yq '.entries[] | select(.schema == "olm.channel") | .entries[-1].name' "${TEMPLATE}") - NEW_VERSION="${NEW_VERSION}" PREV_HEAD="${PREV_HEAD}" \ - yq -i '(.entries[] | select(.schema == "olm.channel") | .entries) += [{"name": "victoriametrics-operator.v" + strenv(NEW_VERSION), "replaces": strenv(PREV_HEAD)}]' "${TEMPLATE}" - yq -i '.entries += [load("/tmp/new-bundle.yaml")]' "${TEMPLATE}" + export TPL=$(basename ${TEMPLATE}) + if [ "$CHANNEL" = "stable" ] && ! yq -e '.entries[] | select(.schema == "olm.channel" and .name == "stable")' "$TEMPLATE" > /dev/null 2>&1 && [ -n "$OLD_VERSION" ]; then + yq -i '.catalog_templates += [{"template_name": strenv(TPL), "channels": ["stable","beta"], "replaces": strenv(OLD_ENTRY)}]' ${OPERATOR_DIR}/${NEW_VERSION}/release-config.yaml + elif [ -n "$OLD_VERSION" ]; then + yq -i '.catalog_templates += [{"template_name": strenv(TPL), "channels": [strenv(CHANNEL)], "replaces": strenv(OLD_ENTRY)}]' ${OPERATOR_DIR}/${NEW_VERSION}/release-config.yaml + else + yq -i '.catalog_templates += [{"template_name": strenv(TPL), "channels": [strenv(CHANNEL)]}]' ${OPERATOR_DIR}/${NEW_VERSION}/release-config.yaml + fi done fi diff --git a/config/manifests/bases/victoriametrics-operator.clusterserviceversion.yaml b/config/manifests/bases/victoriametrics-operator.clusterserviceversion.yaml index 3dbf2bee9..c996077cb 100644 --- a/config/manifests/bases/victoriametrics-operator.clusterserviceversion.yaml +++ b/config/manifests/bases/victoriametrics-operator.clusterserviceversion.yaml @@ -22,7 +22,7 @@ metadata: operatorhub.io/ui-metadata-max-k8s-version: "1.30" operators.openshift.io/infrastructure-features: '[fips]' operators.operatorframework.io.bundle.channel.default.v1: beta - operators.operatorframework.io.bundle.channels.v1: beta + operators.operatorframework.io.bundle.channels.v1: beta,stable operators.operatorframework.io/builder: operator-sdk-v1.35.0 operators.operatorframework.io/project_layout: go.kubebuilder.io/v4 repository: https://github.com/VictoriaMetrics/operator From 8474ea17688930fbfc2ddfabd0822fad3aed605d Mon Sep 17 00:00:00 2001 From: Andrii Chubatiuk Date: Mon, 11 May 2026 14:56:12 +0300 Subject: [PATCH 03/31] reconcile: prevent from staying in expanding state (#2124) Signed-off-by: Vadim Rutkovsky Co-authored-by: Vadim Rutkovsky --- docs/CHANGELOG.md | 1 + internal/controller/operator/controllers.go | 2 ++ 2 files changed, 3 insertions(+) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index de4c7e75a..95c88beeb 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -14,6 +14,7 @@ aliases: ## tip * BUGFIX: [vmoperator](https://docs.victoriametrics.com/operator/): update status currentRevision and currentReplicas for StatefulSet with OnDelete update strategy. See [#1242](https://github.com/VictoriaMetrics/operator/issues/1242). +* BUGFIX: [vmoperator](https://docs.victoriametrics.com/operator/): retry reconcile errors, that may lead to expanding state, before resource could hang in expanding state. ## [v0.68.5](https://github.com/VictoriaMetrics/operator/releases/tag/v0.68.5) **Release date:** 27 May 2026 diff --git a/internal/controller/operator/controllers.go b/internal/controller/operator/controllers.go index d5c8bb01d..c0ec26c7a 100644 --- a/internal/controller/operator/controllers.go +++ b/internal/controller/operator/controllers.go @@ -358,6 +358,8 @@ func reconcileAndTrackStatus[T client.Object, ST reconcile.StatusWithMetadata[ST resultStatus = vmv1beta1.UpdateStatusFailed if reconcile.IsRetryable(err) { resultStatus = vmv1beta1.UpdateStatusExpanding + } else { + resultStatus = vmv1beta1.UpdateStatusFailed } resultErr = err return From 9e1a9ff98388e6b642522d41d57e214d9a3dad94 Mon Sep 17 00:00:00 2001 From: Andrii Chubatiuk Date: Mon, 11 May 2026 16:54:16 +0300 Subject: [PATCH 04/31] [vm|vl|vt]cluster: properly generate storageNode command line flags when HPA enabled on a storage (#2119) * [vm|vl|vt]cluster: properly generate storageNode command line flags when HPA enabled on a storage * Update docs/CHANGELOG.md Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com> Signed-off-by: Andrii Chubatiuk * apply suggestions --------- Signed-off-by: Andrii Chubatiuk Signed-off-by: Vadim Rutkovsky Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com> Co-authored-by: Vadim Rutkovsky --- api/operator/v1/cluster_types_test.go | 22 ++++++------ api/operator/v1/vlcluster_types.go | 20 +++++++---- api/operator/v1/vtcluster_types.go | 20 +++++++---- api/operator/v1beta1/vmcluster_types.go | 20 +++++++---- api/operator/v1beta1/vmcluster_types_test.go | 10 +++--- api/operator/v1beta1/vmextra_types.go | 8 +++++ docs/CHANGELOG.md | 1 + .../operator/factory/vlcluster/vlinsert.go | 17 +++++---- .../operator/factory/vlcluster/vlselect.go | 10 +++--- .../operator/factory/vlcluster/vlstorage.go | 1 + .../operator/factory/vmcluster/vmcluster.go | 36 +++++++++---------- .../operator/factory/vtcluster/insert.go | 16 ++++----- .../operator/factory/vtcluster/select.go | 10 +++--- .../operator/factory/vtcluster/storage.go | 1 + 14 files changed, 108 insertions(+), 84 deletions(-) diff --git a/api/operator/v1/cluster_types_test.go b/api/operator/v1/cluster_types_test.go index 0b3e53871..1f2f3e42d 100644 --- a/api/operator/v1/cluster_types_test.go +++ b/api/operator/v1/cluster_types_test.go @@ -9,10 +9,11 @@ import ( vmv1beta1 "github.com/VictoriaMetrics/operator/api/operator/v1beta1" ) +//nolint:dupl func TestVTCluster_AvailableStorageNodeIDs(t *testing.T) { - f := func(cr *VTCluster, requestsType string, want []int32) { + f := func(cr *VTCluster, kind vmv1beta1.ClusterComponent, want []int32) { t.Helper() - assert.Equal(t, want, cr.AvailableStorageNodeIDs(requestsType)) + assert.Equal(t, want, cr.AvailableStorageNodeIDs(kind)) } cr := &VTCluster{ @@ -28,10 +29,10 @@ func TestVTCluster_AvailableStorageNodeIDs(t *testing.T) { } // select excludes maintenance nodes - f(cr, "select", []int32{0, 2, 4}) + f(cr, vmv1beta1.ClusterComponentSelect, []int32{0, 2, 4}) // insert excludes maintenance nodes - f(cr, "insert", []int32{1, 2, 3}) + f(cr, vmv1beta1.ClusterComponentInsert, []int32{1, 2, 3}) // no maintenance nodes f(&VTCluster{ @@ -40,13 +41,14 @@ func TestVTCluster_AvailableStorageNodeIDs(t *testing.T) { CommonAppsParams: vmv1beta1.CommonAppsParams{ReplicaCount: ptr.To(int32(3))}, }, }, - }, "select", []int32{0, 1, 2}) + }, vmv1beta1.ClusterComponentSelect, []int32{0, 1, 2}) } +//nolint:dupl func TestVLCluster_AvailableStorageNodeIDs(t *testing.T) { - f := func(cr *VLCluster, requestsType string, want []int32) { + f := func(cr *VLCluster, kind vmv1beta1.ClusterComponent, want []int32) { t.Helper() - assert.Equal(t, want, cr.AvailableStorageNodeIDs(requestsType)) + assert.Equal(t, want, cr.AvailableStorageNodeIDs(kind)) } cr := &VLCluster{ @@ -62,10 +64,10 @@ func TestVLCluster_AvailableStorageNodeIDs(t *testing.T) { } // select excludes maintenance nodes - f(cr, "select", []int32{0, 2, 4}) + f(cr, vmv1beta1.ClusterComponentSelect, []int32{0, 2, 4}) // insert excludes maintenance nodes - f(cr, "insert", []int32{1, 2, 3}) + f(cr, vmv1beta1.ClusterComponentInsert, []int32{1, 2, 3}) // no maintenance nodes f(&VLCluster{ @@ -74,5 +76,5 @@ func TestVLCluster_AvailableStorageNodeIDs(t *testing.T) { CommonAppsParams: vmv1beta1.CommonAppsParams{ReplicaCount: ptr.To(int32(3))}, }, }, - }, "select", []int32{0, 1, 2}) + }, vmv1beta1.ClusterComponentSelect, []int32{0, 1, 2}) } diff --git a/api/operator/v1/vlcluster_types.go b/api/operator/v1/vlcluster_types.go index 339918e38..702ca8e33 100644 --- a/api/operator/v1/vlcluster_types.go +++ b/api/operator/v1/vlcluster_types.go @@ -776,21 +776,27 @@ func (cr *VLCluster) Validate() error { } // AvailableStorageNodeIDs returns ids of the storage nodes for the provided component -func (cr *VLCluster) AvailableStorageNodeIDs(requestsType string) []int32 { +func (cr *VLCluster) AvailableStorageNodeIDs(kind vmv1beta1.ClusterComponent) []int32 { var result []int32 - if cr.Spec.VLStorage == nil || cr.Spec.VLStorage.ReplicaCount == nil { + if cr.Spec.VLStorage == nil || (cr.Spec.VLStorage.ReplicaCount == nil && cr.Spec.VLStorage.HPA == nil) { return result } maintenanceNodes := sets.New[int32]() - switch requestsType { - case "select": + switch kind { + case vmv1beta1.ClusterComponentSelect: maintenanceNodes.Insert(cr.Spec.VLStorage.MaintenanceSelectNodeIDs...) - case "insert": + case vmv1beta1.ClusterComponentInsert: maintenanceNodes.Insert(cr.Spec.VLStorage.MaintenanceInsertNodeIDs...) default: - panic("BUG unsupported requestsType: " + requestsType) + panic("BUG unsupported kind: " + string(kind)) + } + var replicaCount int32 + if cr.Spec.VLStorage.ReplicaCount != nil { + replicaCount = *cr.Spec.VLStorage.ReplicaCount + } else if cr.Spec.VLStorage.HPA != nil { + replicaCount = cr.Spec.VLStorage.HPA.GetMinReplicas() } - for i := int32(0); i < *cr.Spec.VLStorage.ReplicaCount; i++ { + for i := int32(0); i < replicaCount; i++ { if maintenanceNodes.Has(i) { continue } diff --git a/api/operator/v1/vtcluster_types.go b/api/operator/v1/vtcluster_types.go index 023649f88..b106399ca 100644 --- a/api/operator/v1/vtcluster_types.go +++ b/api/operator/v1/vtcluster_types.go @@ -687,21 +687,27 @@ func (cr *VTCluster) Validate() error { } // AvailableStorageNodeIDs returns ids of the storage nodes for the provided component -func (cr *VTCluster) AvailableStorageNodeIDs(requestsType string) []int32 { +func (cr *VTCluster) AvailableStorageNodeIDs(kind vmv1beta1.ClusterComponent) []int32 { var result []int32 - if cr.Spec.Storage == nil || cr.Spec.Storage.ReplicaCount == nil { + if cr.Spec.Storage == nil || (cr.Spec.Storage.ReplicaCount == nil && cr.Spec.Storage.HPA == nil) { return result } maintenanceNodes := sets.New[int32]() - switch requestsType { - case "select": + switch kind { + case vmv1beta1.ClusterComponentSelect: maintenanceNodes.Insert(cr.Spec.Storage.MaintenanceSelectNodeIDs...) - case "insert": + case vmv1beta1.ClusterComponentInsert: maintenanceNodes.Insert(cr.Spec.Storage.MaintenanceInsertNodeIDs...) default: - panic("BUG unsupported requestsType: " + requestsType) + panic("BUG unsupported kind: " + string(kind)) + } + var replicaCount int32 + if cr.Spec.Storage.ReplicaCount != nil { + replicaCount = *cr.Spec.Storage.ReplicaCount + } else if cr.Spec.Storage.HPA != nil { + replicaCount = cr.Spec.Storage.HPA.GetMinReplicas() } - for i := int32(0); i < *cr.Spec.Storage.ReplicaCount; i++ { + for i := int32(0); i < replicaCount; i++ { if maintenanceNodes.Has(i) { continue } diff --git a/api/operator/v1beta1/vmcluster_types.go b/api/operator/v1beta1/vmcluster_types.go index d4b2b1dad..f404f15a5 100644 --- a/api/operator/v1beta1/vmcluster_types.go +++ b/api/operator/v1beta1/vmcluster_types.go @@ -715,21 +715,27 @@ func (cr *VMCluster) Validate() error { } // AvailableStorageNodeIDs returns ids of the storage nodes for the provided component -func (cr *VMCluster) AvailableStorageNodeIDs(requestsType string) []int32 { +func (cr *VMCluster) AvailableStorageNodeIDs(kind ClusterComponent) []int32 { var result []int32 - if cr.Spec.VMStorage == nil || cr.Spec.VMStorage.ReplicaCount == nil { + if cr.Spec.VMStorage == nil || (cr.Spec.VMStorage.ReplicaCount == nil && cr.Spec.VMStorage.HPA == nil) { return result } maintenanceNodes := sets.New[int32]() - switch requestsType { - case "select": + switch kind { + case ClusterComponentSelect: maintenanceNodes.Insert(cr.Spec.VMStorage.MaintenanceSelectNodeIDs...) - case "insert": + case ClusterComponentInsert: maintenanceNodes.Insert(cr.Spec.VMStorage.MaintenanceInsertNodeIDs...) default: - panic("BUG unsupported requestsType: " + requestsType) + panic("BUG unsupported kind: " + string(kind)) + } + var replicaCount int32 + if cr.Spec.VMStorage.ReplicaCount != nil { + replicaCount = *cr.Spec.VMStorage.ReplicaCount + } else if cr.Spec.VMStorage.HPA != nil { + replicaCount = cr.Spec.VMStorage.HPA.GetMinReplicas() } - for i := int32(0); i < *cr.Spec.VMStorage.ReplicaCount; i++ { + for i := int32(0); i < replicaCount; i++ { if maintenanceNodes.Has(i) { continue } diff --git a/api/operator/v1beta1/vmcluster_types_test.go b/api/operator/v1beta1/vmcluster_types_test.go index 032f8f591..8cbace183 100644 --- a/api/operator/v1beta1/vmcluster_types_test.go +++ b/api/operator/v1beta1/vmcluster_types_test.go @@ -89,9 +89,9 @@ func TestVMBackup_SnapshotCreatePathWithFlags(t *testing.T) { } func TestVMCluster_AvailableStorageNodeIDs(t *testing.T) { - f := func(cr *VMCluster, requestsType string, want []int32) { + f := func(cr *VMCluster, kind ClusterComponent, want []int32) { t.Helper() - assert.Equal(t, want, cr.AvailableStorageNodeIDs(requestsType)) + assert.Equal(t, want, cr.AvailableStorageNodeIDs(kind)) } cr := &VMCluster{ @@ -107,10 +107,10 @@ func TestVMCluster_AvailableStorageNodeIDs(t *testing.T) { } // select excludes maintenance nodes - f(cr, "select", []int32{0, 2, 4}) + f(cr, ClusterComponentSelect, []int32{0, 2, 4}) // insert excludes maintenance nodes - f(cr, "insert", []int32{1, 2, 3}) + f(cr, ClusterComponentInsert, []int32{1, 2, 3}) // no maintenance nodes f(&VMCluster{ @@ -119,5 +119,5 @@ func TestVMCluster_AvailableStorageNodeIDs(t *testing.T) { CommonAppsParams: CommonAppsParams{ReplicaCount: ptr.To(int32(3))}, }, }, - }, "select", []int32{0, 1, 2}) + }, ClusterComponentSelect, []int32{0, 1, 2}) } diff --git a/api/operator/v1beta1/vmextra_types.go b/api/operator/v1beta1/vmextra_types.go index 61642e1d2..92c0599ff 100644 --- a/api/operator/v1beta1/vmextra_types.go +++ b/api/operator/v1beta1/vmextra_types.go @@ -435,6 +435,14 @@ type EmbeddedHPA struct { Behaviour *autoscalingv2.HorizontalPodAutoscalerBehavior `json:"behaviour,omitempty"` } +// GetMinReplicas returns default minReplicas value +func (cr *EmbeddedHPA) GetMinReplicas() int32 { + if cr.MinReplicas != nil { + return *cr.MinReplicas + } + return 1 +} + // Validate validates resource configuration func (cr *EmbeddedHPA) Validate() error { if cr.MinReplicas != nil && *cr.MinReplicas > cr.MaxReplicas { diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 95c88beeb..f21cdafcf 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -15,6 +15,7 @@ aliases: * BUGFIX: [vmoperator](https://docs.victoriametrics.com/operator/): update status currentRevision and currentReplicas for StatefulSet with OnDelete update strategy. See [#1242](https://github.com/VictoriaMetrics/operator/issues/1242). * BUGFIX: [vmoperator](https://docs.victoriametrics.com/operator/): retry reconcile errors, that may lead to expanding state, before resource could hang in expanding state. +* BUGFIX: [vmcluster](https://docs.victoriametrics.com/operator/resources/vmcluster/), [vlcluster](https://docs.victoriametrics.com/operator/resources/vlcluster/) and [vtcluster](https://docs.victoriametrics.com/operator/resources/vtcluster/): when storage HPA was enabled, generated `-storageNode` flags could become incorrect after scaling, which could break expected routing to storage nodes; now the operator derives storage node count from the current StatefulSet state so generated flags stay correct during HPA-driven scaling. See [#2117](https://github.com/VictoriaMetrics/operator/issues/2117). ## [v0.68.5](https://github.com/VictoriaMetrics/operator/releases/tag/v0.68.5) **Release date:** 27 May 2026 diff --git a/internal/controller/operator/factory/vlcluster/vlinsert.go b/internal/controller/operator/factory/vlcluster/vlinsert.go index bae883096..900d05893 100644 --- a/internal/controller/operator/factory/vlcluster/vlinsert.go +++ b/internal/controller/operator/factory/vlcluster/vlinsert.go @@ -79,6 +79,7 @@ func createOrUpdateVLInsertDeployment(ctx context.Context, rclient client.Client PatchSpec: func(existingSpec, newSpec *appsv1.DeploymentSpec) { if cr.Spec.VLInsert.HPA != nil { newSpec.Replicas = existingSpec.Replicas + cr.Spec.VLInsert.ReplicaCount = existingSpec.Replicas } }, } @@ -138,16 +139,14 @@ func buildVLInsertPodSpec(cr *vmv1.VLCluster) (*corev1.PodTemplateSpec, error) { args = append(args, fmt.Sprintf("-loggerFormat=%s", cr.Spec.VLInsert.LogFormat)) } - if cr.Spec.VLStorage != nil && cr.Spec.VLStorage.ReplicaCount != nil { - storageNodeFlag := build.NewFlag("-storageNode", "") - storageNodeIds := cr.AvailableStorageNodeIDs("insert") - for idx, i := range storageNodeIds { - // TODO: introduce TLS webserver config for storage nodes - storageNodeFlag.Add(build.PodDNSAddress(cr.PrefixedName(vmv1beta1.ClusterComponentStorage), i, cr.Namespace, cr.Spec.VLStorage.Port, cr.Spec.ClusterDomainName), idx) - } - totalNodes := len(storageNodeIds) - args = build.AppendFlagsToArgs(args, totalNodes, storageNodeFlag) + storageNodeFlag := build.NewFlag("-storageNode", "") + storageNodeIds := cr.AvailableStorageNodeIDs(vmv1beta1.ClusterComponentInsert) + for idx, i := range storageNodeIds { + // TODO: introduce TLS webserver config for storage nodes + storageNodeFlag.Add(build.PodDNSAddress(cr.PrefixedName(vmv1beta1.ClusterComponentStorage), i, cr.Namespace, cr.Spec.VLStorage.Port, cr.Spec.ClusterDomainName), idx) } + totalNodes := len(storageNodeIds) + args = build.AppendFlagsToArgs(args, totalNodes, storageNodeFlag) if len(cr.Spec.VLInsert.ExtraEnvs) > 0 || len(cr.Spec.VLInsert.ExtraEnvsFrom) > 0 { args = append(args, "-envflag.enable=true") } diff --git a/internal/controller/operator/factory/vlcluster/vlselect.go b/internal/controller/operator/factory/vlcluster/vlselect.go index 1a5cab8fa..1b9f344e8 100644 --- a/internal/controller/operator/factory/vlcluster/vlselect.go +++ b/internal/controller/operator/factory/vlcluster/vlselect.go @@ -190,6 +190,7 @@ func createOrUpdateVLSelectDeployment(ctx context.Context, rclient client.Client PatchSpec: func(existingSpec, newSpec *appsv1.DeploymentSpec) { if cr.Spec.VLSelect.HPA != nil { newSpec.Replicas = existingSpec.Replicas + cr.Spec.VLSelect.ReplicaCount = existingSpec.Replicas } }, } @@ -245,12 +246,9 @@ func buildVLSelectPodSpec(cr *vmv1.VLCluster) (*corev1.PodTemplateSpec, error) { } storageNodeFlag := build.NewFlag("-storageNode", "") - storageNodeIds := cr.AvailableStorageNodeIDs("select") - if cr.Spec.VLStorage != nil && cr.Spec.VLStorage.ReplicaCount != nil { - // TODO: check TLS - for idx, i := range storageNodeIds { - storageNodeFlag.Add(build.PodDNSAddress(cr.PrefixedName(vmv1beta1.ClusterComponentStorage), i, cr.Namespace, cr.Spec.VLStorage.Port, cr.Spec.ClusterDomainName), idx) - } + storageNodeIds := cr.AvailableStorageNodeIDs(vmv1beta1.ClusterComponentSelect) + for idx, i := range storageNodeIds { + storageNodeFlag.Add(build.PodDNSAddress(cr.PrefixedName(vmv1beta1.ClusterComponentStorage), i, cr.Namespace, cr.Spec.VLStorage.Port, cr.Spec.ClusterDomainName), idx) } if len(cr.Spec.VLSelect.ExtraStorageNodes) > 0 { for i, node := range cr.Spec.VLSelect.ExtraStorageNodes { diff --git a/internal/controller/operator/factory/vlcluster/vlstorage.go b/internal/controller/operator/factory/vlcluster/vlstorage.go index 2a2b491b4..6d789f52e 100644 --- a/internal/controller/operator/factory/vlcluster/vlstorage.go +++ b/internal/controller/operator/factory/vlcluster/vlstorage.go @@ -172,6 +172,7 @@ func createOrUpdateVLStorageSTS(ctx context.Context, rclient client.Client, cr, PatchSpec: func(existingSpec, newSpec *appsv1.StatefulSetSpec) { if cr.Spec.VLStorage.HPA != nil { newSpec.Replicas = existingSpec.Replicas + cr.Spec.VLStorage.ReplicaCount = existingSpec.Replicas } }, } diff --git a/internal/controller/operator/factory/vmcluster/vmcluster.go b/internal/controller/operator/factory/vmcluster/vmcluster.go index c9cd88a14..a56c299f7 100644 --- a/internal/controller/operator/factory/vmcluster/vmcluster.go +++ b/internal/controller/operator/factory/vmcluster/vmcluster.go @@ -171,6 +171,7 @@ func createOrUpdateVMSelect(ctx context.Context, rclient client.Client, cr, prev PatchSpec: func(existingSpec, newSpec *appsv1.StatefulSetSpec) { if cr.Spec.VMSelect.HPA != nil { newSpec.Replicas = existingSpec.Replicas + cr.Spec.VMSelect.ReplicaCount = existingSpec.Replicas } }, } @@ -320,6 +321,7 @@ func createOrUpdateVMInsert(ctx context.Context, rclient client.Client, cr, prev PatchSpec: func(existingSpec, newSpec *appsv1.DeploymentSpec) { if cr.Spec.VMInsert.HPA != nil { newSpec.Replicas = existingSpec.Replicas + cr.Spec.VMInsert.ReplicaCount = existingSpec.Replicas } }, } @@ -431,6 +433,7 @@ func createOrUpdateVMStorage(ctx context.Context, rclient client.Client, cr, pre PatchSpec: func(existingSpec, newSpec *appsv1.StatefulSetSpec) { if cr.Spec.VMStorage.HPA != nil { newSpec.Replicas = existingSpec.Replicas + cr.Spec.VMStorage.ReplicaCount = existingSpec.Replicas } }, } @@ -586,16 +589,15 @@ func makePodSpecForVMSelect(cr *vmv1beta1.VMCluster) (*corev1.PodTemplateSpec, e } } - if cr.Spec.VMStorage != nil && cr.Spec.VMStorage.ReplicaCount != nil { - storageNodeFlag := build.NewFlag("-storageNode", "") - storageNodeIds := cr.AvailableStorageNodeIDs("select") - for idx, i := range storageNodeIds { - storageName := cr.PrefixedName(vmv1beta1.ClusterComponentStorage) - storageNodeFlag.Add(build.PodDNSAddress(storageName, i, cr.Namespace, cr.Spec.VMStorage.VMSelectPort, cr.Spec.ClusterDomainName), idx) - } - totalNodes := len(storageNodeIds) - args = build.AppendFlagsToArgs(args, totalNodes, storageNodeFlag) + storageNodeFlag := build.NewFlag("-storageNode", "") + storageNodeIds := cr.AvailableStorageNodeIDs(vmv1beta1.ClusterComponentSelect) + for idx, i := range storageNodeIds { + storageName := cr.PrefixedName(vmv1beta1.ClusterComponentStorage) + storageNodeFlag.Add(build.PodDNSAddress(storageName, i, cr.Namespace, cr.Spec.VMStorage.VMSelectPort, cr.Spec.ClusterDomainName), idx) } + totalNodes := len(storageNodeIds) + args = build.AppendFlagsToArgs(args, totalNodes, storageNodeFlag) + // selectNode arg add for deployments without HPA // HPA leads to rolling restart for vmselect statefulset in case of replicas count changes if cr.Spec.VMSelect.HPA == nil && cr.Spec.VMSelect.ReplicaCount != nil { @@ -792,16 +794,14 @@ func makePodSpecForVMInsert(cr *vmv1beta1.VMCluster) (*corev1.PodTemplateSpec, e args = append(args, fmt.Sprintf("--clusternativeListenAddr=:%s", cr.Spec.VMInsert.ClusterNativePort)) } - if cr.Spec.VMStorage != nil && cr.Spec.VMStorage.ReplicaCount != nil { - storageNodeFlag := build.NewFlag("-storageNode", "") - storageNodeIds := cr.AvailableStorageNodeIDs("insert") - for idx, i := range storageNodeIds { - storageName := cr.PrefixedName(vmv1beta1.ClusterComponentStorage) - storageNodeFlag.Add(build.PodDNSAddress(storageName, i, cr.Namespace, cr.Spec.VMStorage.VMInsertPort, cr.Spec.ClusterDomainName), idx) - } - totalNodes := len(storageNodeIds) - args = build.AppendFlagsToArgs(args, totalNodes, storageNodeFlag) + storageNodeFlag := build.NewFlag("-storageNode", "") + storageNodeIds := cr.AvailableStorageNodeIDs(vmv1beta1.ClusterComponentInsert) + for idx, i := range storageNodeIds { + storageName := cr.PrefixedName(vmv1beta1.ClusterComponentStorage) + storageNodeFlag.Add(build.PodDNSAddress(storageName, i, cr.Namespace, cr.Spec.VMStorage.VMInsertPort, cr.Spec.ClusterDomainName), idx) } + totalNodes := len(storageNodeIds) + args = build.AppendFlagsToArgs(args, totalNodes, storageNodeFlag) if cr.Spec.ReplicationFactor != nil { args = append(args, fmt.Sprintf("-replicationFactor=%d", *cr.Spec.ReplicationFactor)) diff --git a/internal/controller/operator/factory/vtcluster/insert.go b/internal/controller/operator/factory/vtcluster/insert.go index 0bbc70994..c7cca090a 100644 --- a/internal/controller/operator/factory/vtcluster/insert.go +++ b/internal/controller/operator/factory/vtcluster/insert.go @@ -78,6 +78,7 @@ func createOrUpdateVTInsertDeployment(ctx context.Context, rclient client.Client PatchSpec: func(existingSpec, newSpec *appsv1.DeploymentSpec) { if cr.Spec.Insert.HPA != nil { newSpec.Replicas = existingSpec.Replicas + cr.Spec.Insert.ReplicaCount = existingSpec.Replicas } }, } @@ -136,16 +137,13 @@ func buildVTInsertPodSpec(cr *vmv1.VTCluster) (*corev1.PodTemplateSpec, error) { args = append(args, fmt.Sprintf("-loggerFormat=%s", cr.Spec.Insert.LogFormat)) } - if cr.Spec.Storage != nil && cr.Spec.Storage.ReplicaCount != nil { - // TODO: check TLS - storageNodeFlag := build.NewFlag("-storageNode", "") - storageNodeIds := cr.AvailableStorageNodeIDs("insert") - for idx, i := range storageNodeIds { - storageNodeFlag.Add(build.PodDNSAddress(cr.PrefixedName(vmv1beta1.ClusterComponentStorage), i, cr.Namespace, cr.Spec.Storage.Port, cr.Spec.ClusterDomainName), idx) - } - totalNodes := len(storageNodeIds) - args = build.AppendFlagsToArgs(args, totalNodes, storageNodeFlag) + storageNodeFlag := build.NewFlag("-storageNode", "") + storageNodeIds := cr.AvailableStorageNodeIDs(vmv1beta1.ClusterComponentInsert) + for idx, i := range storageNodeIds { + storageNodeFlag.Add(build.PodDNSAddress(cr.PrefixedName(vmv1beta1.ClusterComponentStorage), i, cr.Namespace, cr.Spec.Storage.Port, cr.Spec.ClusterDomainName), idx) } + totalNodes := len(storageNodeIds) + args = build.AppendFlagsToArgs(args, totalNodes, storageNodeFlag) if len(cr.Spec.Insert.ExtraEnvs) > 0 || len(cr.Spec.Insert.ExtraEnvsFrom) > 0 { args = append(args, "-envflag.enable=true") diff --git a/internal/controller/operator/factory/vtcluster/select.go b/internal/controller/operator/factory/vtcluster/select.go index 66374b132..1d926c1b3 100644 --- a/internal/controller/operator/factory/vtcluster/select.go +++ b/internal/controller/operator/factory/vtcluster/select.go @@ -189,6 +189,7 @@ func createOrUpdateVTSelectDeployment(ctx context.Context, rclient client.Client PatchSpec: func(existingSpec, newSpec *appsv1.DeploymentSpec) { if cr.Spec.Select.HPA != nil { newSpec.Replicas = existingSpec.Replicas + cr.Spec.Select.ReplicaCount = existingSpec.Replicas } }, } @@ -244,12 +245,9 @@ func buildVTSelectPodSpec(cr *vmv1.VTCluster) (*corev1.PodTemplateSpec, error) { } storageNodeFlag := build.NewFlag("-storageNode", "") - storageNodeIds := cr.AvailableStorageNodeIDs("select") - if cr.Spec.Storage != nil && cr.Spec.Storage.ReplicaCount != nil { - // TODO: check TLS - for idx, i := range storageNodeIds { - storageNodeFlag.Add(build.PodDNSAddress(cr.PrefixedName(vmv1beta1.ClusterComponentStorage), i, cr.Namespace, cr.Spec.Storage.Port, cr.Spec.ClusterDomainName), idx) - } + storageNodeIds := cr.AvailableStorageNodeIDs(vmv1beta1.ClusterComponentSelect) + for idx, i := range storageNodeIds { + storageNodeFlag.Add(build.PodDNSAddress(cr.PrefixedName(vmv1beta1.ClusterComponentStorage), i, cr.Namespace, cr.Spec.Storage.Port, cr.Spec.ClusterDomainName), idx) } if len(cr.Spec.Select.ExtraStorageNodes) > 0 { for i, node := range cr.Spec.Select.ExtraStorageNodes { diff --git a/internal/controller/operator/factory/vtcluster/storage.go b/internal/controller/operator/factory/vtcluster/storage.go index 9222d7a3e..594a551af 100644 --- a/internal/controller/operator/factory/vtcluster/storage.go +++ b/internal/controller/operator/factory/vtcluster/storage.go @@ -169,6 +169,7 @@ func createOrUpdateVTStorageSTS(ctx context.Context, rclient client.Client, cr, PatchSpec: func(existingSpec, newSpec *appsv1.StatefulSetSpec) { if cr.Spec.Storage.HPA != nil { newSpec.Replicas = existingSpec.Replicas + cr.Spec.Storage.ReplicaCount = existingSpec.Replicas } }, } From 28310d3ed14c1c43fa82b4eb85286524a5e4d517 Mon Sep 17 00:00:00 2001 From: Andrii Chubatiuk Date: Tue, 12 May 2026 10:22:39 +0300 Subject: [PATCH 05/31] chore: switch env repo back to origin (#2130) --- go.mod | 2 -- go.sum | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/go.mod b/go.mod index 210400d00..c5ee4f80e 100644 --- a/go.mod +++ b/go.mod @@ -125,5 +125,3 @@ require ( ) replace github.com/VictoriaMetrics/operator/api => ./api - -replace github.com/caarlos0/env/v11 => github.com/AndrewChubatiuk/env/v11 v11.0.0-20260302065400-14d0354881b6 diff --git a/go.sum b/go.sum index e87561340..1ed8676aa 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,3 @@ -github.com/AndrewChubatiuk/env/v11 v11.0.0-20260302065400-14d0354881b6 h1:5CPOPjp7co7TgffUQ/jOVlw6IX8uHXDHt0W85Mwd7Zw= -github.com/AndrewChubatiuk/env/v11 v11.0.0-20260302065400-14d0354881b6/go.mod h1:qupehSf/Y0TUTsxKywqRt/vJjN5nz6vauiYEUUr8P4U= github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0= github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= github.com/VictoriaMetrics/VictoriaLogs v1.36.2-0.20251008164716-21c0fb3de84d h1:fV15mhBCGpCCBbuOAbOflO8Air+tLklMt8bG35FimzQ= @@ -44,6 +42,8 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/bmatcuk/doublestar/v4 v4.9.1 h1:X8jg9rRZmJd4yRy7ZeNDRnM+T3ZfHv15JiBJ/avrEXE= github.com/bmatcuk/doublestar/v4 v4.9.1/go.mod h1:xBQ8jztBU6kakFMg+8WGxn0c6z1fTSPVIjEY1Wr7jzc= +github.com/caarlos0/env/v11 v11.3.1 h1:cArPWC15hWmEt+gWk7YBi7lEXTXCvpaSdCiZE2X5mCA= +github.com/caarlos0/env/v11 v11.3.1/go.mod h1:qupehSf/Y0TUTsxKywqRt/vJjN5nz6vauiYEUUr8P4U= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= From d326c3325c422ab2dfb91f26f93defccc9b8a5e5 Mon Sep 17 00:00:00 2001 From: Vadim Rutkovsky Date: Tue, 12 May 2026 12:16:19 +0200 Subject: [PATCH 06/31] tests: add more reconcile status change e2e tests (#2140) * tests: add more reconcile status change e2e tests * fix: nitpick in status update logic --- internal/controller/operator/controllers.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/internal/controller/operator/controllers.go b/internal/controller/operator/controllers.go index c0ec26c7a..d5c8bb01d 100644 --- a/internal/controller/operator/controllers.go +++ b/internal/controller/operator/controllers.go @@ -358,8 +358,6 @@ func reconcileAndTrackStatus[T client.Object, ST reconcile.StatusWithMetadata[ST resultStatus = vmv1beta1.UpdateStatusFailed if reconcile.IsRetryable(err) { resultStatus = vmv1beta1.UpdateStatusExpanding - } else { - resultStatus = vmv1beta1.UpdateStatusFailed } resultErr = err return From 1c9d37c18d6a8720556968620c8760360b66fb10 Mon Sep 17 00:00:00 2001 From: Vadim Rutkovsky Date: Tue, 12 May 2026 22:04:06 +0200 Subject: [PATCH 07/31] chore: skip building allure-report when ./allure-results didn't get created --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ef3832589..d2a0cd282 100644 --- a/Makefile +++ b/Makefile @@ -458,7 +458,7 @@ $(MIRRORD_BIN): $(LOCALBIN) .PHONY: allure-report allure-report: - npx allure awesome --single-file ./allure-results -o ./allure-report + @[ -d ./allure-results ] && npx allure awesome --single-file ./allure-results -o ./allure-report || echo "allure-results dir not found, skipping report generation" # go-install-tool will 'go install' any package with custom target and name of binary, if it doesn't exist # $1 - target path with name of binary (ideally with version) From a95e254ba740e0030b9d25586f940e72daac9a28 Mon Sep 17 00:00:00 2001 From: Rudransh Shrivastava Date: Thu, 14 May 2026 16:12:31 +0530 Subject: [PATCH 08/31] pin actions to full-length commit SHAs (#2168) Signed-off-by: Rudransh Shrivastava --- .github/workflows/crds.yaml | 8 ++++---- .github/workflows/docs.yaml | 8 ++++---- .github/workflows/main.yaml | 14 +++++++------- .github/workflows/operatorhub.yaml | 8 ++++---- .github/workflows/release.yaml | 8 ++++---- .github/workflows/sandbox.yaml | 8 ++++---- .github/workflows/upgrade-tests.yaml | 10 +++++----- 7 files changed, 32 insertions(+), 32 deletions(-) diff --git a/.github/workflows/crds.yaml b/.github/workflows/crds.yaml index 376cfcf0a..83ac09ac6 100644 --- a/.github/workflows/crds.yaml +++ b/.github/workflows/crds.yaml @@ -16,14 +16,14 @@ jobs: name: docs steps: - name: Check out operator code - uses: actions/checkout@v5 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: repository: VictoriaMetrics/operator token: ${{ secrets.VM_BOT_GH_TOKEN }} path: __vm-operator-repo - name: Check out VM code - uses: actions/checkout@v5 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: repository: VictoriaMetrics/helm-charts ref: master @@ -31,7 +31,7 @@ jobs: path: __vm-charts-repo - name: Import GPG key - uses: crazy-max/ghaction-import-gpg@v6 + uses: crazy-max/ghaction-import-gpg@2dc316deee8e90f13e1a351ab510b4d5bc0c82cd # v7.0.0 id: import-gpg with: gpg_private_key: ${{ secrets.VM_BOT_GPG_PRIVATE_KEY }} @@ -52,7 +52,7 @@ jobs: working-directory: __vm-charts-repo - name: Create Pull Request - uses: peter-evans/create-pull-request@v7 + uses: peter-evans/create-pull-request@c0f553fe549906ede9cf27b5156039d195d2ece0 # v8.1.0 with: add-paths: charts commit-message: Automatic update operator crds from ${{ github.repository }}@${{ steps.update.outputs.SHORT_SHA }} diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index 728003172..a47c2a063 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -20,12 +20,12 @@ jobs: url: https://docs.victoriametrics.com/operator steps: - name: Checkout operator repo - uses: actions/checkout@v5 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: path: __vm-operator - name: Checkout docs repo - uses: actions/checkout@v5 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: repository: VictoriaMetrics/vmdocs ref: main @@ -33,7 +33,7 @@ jobs: path: __vm-docs - name: Setup Go - uses: actions/setup-go@v6 + uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0 with: go-version-file: '__vm-operator/go.mod' check-latest: true @@ -42,7 +42,7 @@ jobs: - name: Import GPG key id: import-gpg - uses: crazy-max/ghaction-import-gpg@v6 + uses: crazy-max/ghaction-import-gpg@2dc316deee8e90f13e1a351ab510b4d5bc0c82cd # v7.0.0 with: gpg_private_key: ${{ secrets.VM_BOT_GPG_PRIVATE_KEY }} passphrase: ${{ secrets.VM_BOT_PASSPHRASE }} diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 70bac6e19..2cc4e4ad9 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -60,14 +60,14 @@ jobs: SAVED=$((AFTER-BEFORE)) echo "Saved $(formatByteCount $SAVED)" - name: Checkout code - uses: actions/checkout@v5 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Prepare binary cache - uses: actions/cache@v4 + uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4 with: path: ./bin key: binary - name: Setup Go - uses: actions/setup-go@v6 + uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0 with: go-version-file: "go.mod" check-latest: true @@ -75,7 +75,7 @@ jobs: id: go - name: Run Trivy vulnerability scanner in repo mode - uses: aquasecurity/trivy-action@master + uses: aquasecurity/trivy-action@57a97c7e7821a5776cebc9bb87c984fa69cba8f1 # v0.35.0 with: scan-type: "fs" ignore-unfixed: true @@ -83,7 +83,7 @@ jobs: output: "trivy-results.sarif" - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@v4 + uses: github/codeql-action/upload-sarif@c6f931105cb2c34c8f901cc885ba1e2e259cf745 # v4.34.0 with: sarif_file: "trivy-results.sarif" @@ -119,7 +119,7 @@ jobs: git fetch origin ${{ github.base_ref || 'master' }} BASE_REF=origin/${{ github.base_ref || 'master' }} TAG=${TAG} make test-e2e - name: Publish Test Report - uses: mikepenz/action-junit-report@v6 + uses: mikepenz/action-junit-report@49b2ca06f62aa7ef83ae6769a2179271e160d8e4 # v6.3.1 if: success() || failure() with: report_paths: 'report.xml' @@ -128,7 +128,7 @@ jobs: run: make allure-report - name: Archive Allure report if: github.event.pull_request.draft == false && failure() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: allure-report path: ./allure-report diff --git a/.github/workflows/operatorhub.yaml b/.github/workflows/operatorhub.yaml index 37d310506..7e7c2c72b 100644 --- a/.github/workflows/operatorhub.yaml +++ b/.github/workflows/operatorhub.yaml @@ -35,7 +35,7 @@ jobs: GH_TOKEN: ${{ secrets.VM_BOT_GH_TOKEN }} - name: Check out OperatorHub operators repo fork - uses: actions/checkout@v5 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: repository: ${{ matrix.repo.upstream }} ref: main @@ -43,7 +43,7 @@ jobs: path: __operatorhub-repo - name: Import GPG key - uses: crazy-max/ghaction-import-gpg@v6 + uses: crazy-max/ghaction-import-gpg@2dc316deee8e90f13e1a351ab510b4d5bc0c82cd # v7.0.0 id: import-gpg with: gpg_private_key: ${{ secrets.VM_BOT_GPG_PRIVATE_KEY }} @@ -52,7 +52,7 @@ jobs: git_commit_gpgsign: true workdir: __operatorhub-repo - - uses: dawidd6/action-download-artifact@v11 + - uses: dawidd6/action-download-artifact@1f8785ff7a5130826f848e7f72725c85d241860f # v18 with: name: olm workflow: release.yaml @@ -104,7 +104,7 @@ jobs: - name: Create Pull Request if: ${{ steps.update.outputs.VERSION != '' }} - uses: peter-evans/create-pull-request@v7 + uses: peter-evans/create-pull-request@c0f553fe549906ede9cf27b5156039d195d2ece0 # v8.1.0 with: add-paths: operators/victoriametrics-operator commit-message: 'victoriametrics-operator: ${{ steps.update.outputs.VERSION }}' diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 7f27e0853..2848cc1bb 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -14,14 +14,14 @@ jobs: pages: write steps: - name: Checkout code - uses: actions/checkout@v5 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Prepare binary cache - uses: actions/cache@v4 + uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4 with: path: ./bin key: binary - name: Setup Go - uses: actions/setup-go@v6 + uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0 with: go-version-file: 'go.mod' check-latest: true @@ -42,7 +42,7 @@ jobs: gh release upload ${{github.event.release.tag_name}} ./dist/install-no-webhook.yaml#install-no-webhook.yaml --clobber || echo "fix me NOT enough security permissions" gh release upload ${{github.event.release.tag_name}} ./dist/install-with-webhook.yaml#install-with-webhook.yaml --clobber || echo "fix me NOT enough security permissions" gh release upload ${{github.event.release.tag_name}} ./config/crd/overlay/crd.yaml#crd.yaml --clobber || echo "fix me NOT enough security permissions" - - uses: actions/upload-artifact@v5 + - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: olm path: bundle diff --git a/.github/workflows/sandbox.yaml b/.github/workflows/sandbox.yaml index 551e28313..5a4d772a8 100644 --- a/.github/workflows/sandbox.yaml +++ b/.github/workflows/sandbox.yaml @@ -25,7 +25,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout operator - uses: actions/checkout@v5 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: repository: VictoriaMetrics/operator ref: ${{ github.event.inputs.branch }} @@ -40,7 +40,7 @@ jobs: TAG=$IMAGE_TAG make docker-push - name: Checkout ops - uses: actions/checkout@v5 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: repository: VictoriaMetrics/ops ref: main @@ -48,7 +48,7 @@ jobs: path: __vm-ops-repo - name: Import GPG key - uses: crazy-max/ghaction-import-gpg@v6 + uses: crazy-max/ghaction-import-gpg@2dc316deee8e90f13e1a351ab510b4d5bc0c82cd # v7.0.0 id: import-gpg with: gpg_private_key: ${{ secrets.VM_BOT_GPG_PRIVATE_KEY }} @@ -66,7 +66,7 @@ jobs: working-directory: __vm-ops-repo - name: Create Pull Request - uses: peter-evans/create-pull-request@v7 + uses: peter-evans/create-pull-request@c0f553fe549906ede9cf27b5156039d195d2ece0 # v8.1.0 with: add-paths: ${{ steps.update.outputs.OPERATOR_PATH }} commit-message: Automatic update operator version on sandbox from ${{ github.repository }}@${{ env.IMAGE_TAG }} diff --git a/.github/workflows/upgrade-tests.yaml b/.github/workflows/upgrade-tests.yaml index 13795c1e3..02727a8fa 100644 --- a/.github/workflows/upgrade-tests.yaml +++ b/.github/workflows/upgrade-tests.yaml @@ -26,14 +26,14 @@ jobs: SAVED=$((AFTER-BEFORE)) echo "Saved $(formatByteCount $SAVED)" - name: Checkout code - uses: actions/checkout@v6 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Prepare binary cache - uses: actions/cache@v5 + uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 with: path: ./bin key: binary - name: Setup Go - uses: actions/setup-go@v6 + uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0 with: go-version-file: "go.mod" check-latest: true @@ -55,7 +55,7 @@ jobs: sudo apt install -y libgpgme-dev TAG=${TAG} make test-e2e-upgrade - name: Publish Test Report - uses: mikepenz/action-junit-report@v6 + uses: mikepenz/action-junit-report@bccf2e31636835cf0874589931c4116687171386 # v6.4.0 if: success() || failure() with: report_paths: 'report.xml' @@ -64,7 +64,7 @@ jobs: run: make allure-report - name: Archive Allure report if: failure() - uses: actions/upload-artifact@v7 + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: name: allure-report path: ./allure-report From 525fb3cfa572ed7ad1252a936dbfb5a1b716c697 Mon Sep 17 00:00:00 2001 From: Matt Van Horn Date: Thu, 14 May 2026 23:38:22 -0700 Subject: [PATCH 09/31] feat(vmrule): add patchMergeKey to Groups for kubectl/kustomize patches (#2169) --- api/operator/v1beta1/vmrule_types.go | 6 +++++- config/crd/overlay/crd.descriptionless.yaml | 3 +++ config/crd/overlay/crd.yaml | 3 +++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/api/operator/v1beta1/vmrule_types.go b/api/operator/v1beta1/vmrule_types.go index b292a4905..7a2d98750 100644 --- a/api/operator/v1beta1/vmrule_types.go +++ b/api/operator/v1beta1/vmrule_types.go @@ -26,7 +26,11 @@ var initVMAlertTemplatesOnce sync.Once // VMRuleSpec defines the desired state of VMRule type VMRuleSpec struct { // Groups list of group rules - Groups []RuleGroup `json:"groups"` + // +patchMergeKey=name + // +patchStrategy=merge + // +listType=map + // +listMapKey=name + Groups []RuleGroup `json:"groups" patchStrategy:"merge" patchMergeKey:"name"` } // RuleGroup is a list of sequentially evaluated recording and alerting rules. diff --git a/config/crd/overlay/crd.descriptionless.yaml b/config/crd/overlay/crd.descriptionless.yaml index bdf7e8d0d..e7d2a3a6b 100644 --- a/config/crd/overlay/crd.descriptionless.yaml +++ b/config/crd/overlay/crd.descriptionless.yaml @@ -31762,6 +31762,9 @@ spec: - rules type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map required: - groups type: object diff --git a/config/crd/overlay/crd.yaml b/config/crd/overlay/crd.yaml index 6af1e81e7..e098ad8f5 100644 --- a/config/crd/overlay/crd.yaml +++ b/config/crd/overlay/crd.yaml @@ -65239,6 +65239,9 @@ spec: - rules type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map required: - groups type: object From a6a89928df8b6ed1a0ccfc6916e4ea62ae75441d Mon Sep 17 00:00:00 2001 From: Vadim Rutkovsky Date: Fri, 15 May 2026 14:47:32 +0200 Subject: [PATCH 10/31] Halt emitting config-reloader signals when delay-interval is customized (#2172) * fix: emitting config-reloader signals when delay-interval is customized Each reload called a separate goroutine, instead we should use delay-interval * fix: rework to avoid using goto * Update cmd/config-reloader/main.go Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com> Signed-off-by: Andrii Chubatiuk --------- Signed-off-by: Andrii Chubatiuk Co-authored-by: Andrii Chubatiuk Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com> --- cmd/config-reloader/main.go | 52 ++++++----- cmd/config-reloader/main_test.go | 152 +++++++++++++++++++++++++++++++ 2 files changed, 183 insertions(+), 21 deletions(-) diff --git a/cmd/config-reloader/main.go b/cmd/config-reloader/main.go index b715c0d48..1f5822ee3 100644 --- a/cmd/config-reloader/main.go +++ b/cmd/config-reloader/main.go @@ -251,30 +251,21 @@ func (c *cfgWatcher) start(ctx context.Context) { for { select { case <-c.updates: - go func() { - if *delayInterval > 0 { - t := time.NewTimer(*delayInterval) - defer t.Stop() - select { - case <-t.C: - case <-ctx.Done(): - return - } - } - if err := c.reloader(ctx); err != nil { - logger.Errorf("cannot trigger api reload: %s", err.Error()) - configLastReloadSuccess.Set(0) - configReloadErrorsTotal.Inc() - return - } - configLastReloadSuccess.Set(1) - configLastOkReloadTime.Set(uint64(time.Now().UnixMilli())) - logger.Infof("reload config ok.") - }() - case <-ctx.Done(): return } + if !c.waitDelay(ctx) { + return + } + if err := c.reloader(ctx); err != nil { + logger.Errorf("cannot trigger api reload: %s", err.Error()) + configLastReloadSuccess.Set(0) + configReloadErrorsTotal.Inc() + continue + } + configLastReloadSuccess.Set(1) + configLastOkReloadTime.Set(uint64(time.Now().UnixMilli())) + logger.Infof("reload config ok.") } }() } @@ -283,6 +274,25 @@ func (c *cfgWatcher) close() { c.wg.Wait() } +func (c *cfgWatcher) waitDelay(ctx context.Context) bool { + if *delayInterval > 0 { + t := time.NewTimer(*delayInterval) + defer t.Stop() + select { + case <-t.C: + case <-ctx.Done(): + return false + } + } + for { + select { + case <-c.updates: + default: + return true + } + } +} + type watcher interface { load(ctx context.Context) error start(ctx context.Context, updates chan struct{}) diff --git a/cmd/config-reloader/main_test.go b/cmd/config-reloader/main_test.go index 8c0d8e0f0..05d1680e6 100644 --- a/cmd/config-reloader/main_test.go +++ b/cmd/config-reloader/main_test.go @@ -1,8 +1,11 @@ package main import ( + "context" "flag" + "sync/atomic" "testing" + "time" ) func TestLogFormatAlias(t *testing.T) { @@ -31,3 +34,152 @@ func TestLogFormatAlias(t *testing.T) { // log-format is empty f("", "json", "json") } + +// TestCfgWatcherSignalSentOnce verifies that a burst of updates results in +// exactly one reloader call (channel drained before reload). +func TestCfgWatcherSignalSentOnce(t *testing.T) { + origDelay := *delayInterval + *delayInterval = 0 + defer func() { *delayInterval = origDelay }() + + var reloadCount atomic.Int64 + updates := make(chan struct{}, 10) + w := cfgWatcher{ + updates: updates, + reloader: func(_ context.Context) error { + reloadCount.Add(1) + return nil + }, + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + w.start(ctx) + + for range 5 { + select { + case updates <- struct{}{}: + default: + } + } + + time.Sleep(100 * time.Millisecond) + cancel() + w.close() + + if got := reloadCount.Load(); got != 1 { + t.Fatalf("expected 1 reload call, got %d", got) + } +} + +// TestCfgWatcherDelayIntervalHonoured verifies that the reloader is not called +// before delayInterval elapses after an update signal. +func TestCfgWatcherDelayIntervalHonoured(t *testing.T) { + delay := 150 * time.Millisecond + origDelay := *delayInterval + *delayInterval = delay + defer func() { *delayInterval = origDelay }() + + var reloadCount atomic.Int64 + updates := make(chan struct{}, 10) + w := cfgWatcher{ + updates: updates, + reloader: func(_ context.Context) error { + reloadCount.Add(1) + return nil + }, + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + w.start(ctx) + + updates <- struct{}{} + + // before delay elapses - no reload yet + time.Sleep(50 * time.Millisecond) + if got := reloadCount.Load(); got != 0 { + t.Fatalf("expected 0 reload calls before delay, got %d", got) + } + + // after delay elapses - exactly one reload + time.Sleep(200 * time.Millisecond) + if got := reloadCount.Load(); got != 1 { + t.Fatalf("expected 1 reload call after delay, got %d", got) + } +} + +// TestCfgWatcherDelayIntervalDebouncesUpdates verifies that multiple updates +// arriving within the delay window are coalesced into a single reload call. +func TestCfgWatcherDelayIntervalDebouncesUpdates(t *testing.T) { + delay := 150 * time.Millisecond + origDelay := *delayInterval + *delayInterval = delay + defer func() { *delayInterval = origDelay }() + + var reloadCount atomic.Int64 + updates := make(chan struct{}, 10) + w := cfgWatcher{ + updates: updates, + reloader: func(_ context.Context) error { + reloadCount.Add(1) + return nil + }, + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + w.start(ctx) + + // send first signal then more signals before delay elapses + updates <- struct{}{} + time.Sleep(20 * time.Millisecond) + for range 4 { + select { + case updates <- struct{}{}: + default: + } + } + + // wait for delay + processing + time.Sleep(300 * time.Millisecond) + cancel() + w.close() + + if got := reloadCount.Load(); got != 1 { + t.Fatalf("expected 1 reload call for burst within delay window, got %d", got) + } +} + +// TestCfgWatcherDelayIntervalCancelledContext verifies that cancelling context +// during delay window prevents reloader from being called. +func TestCfgWatcherDelayIntervalCancelledContext(t *testing.T) { + delay := 500 * time.Millisecond + origDelay := *delayInterval + *delayInterval = delay + defer func() { *delayInterval = origDelay }() + + var reloadCount atomic.Int64 + updates := make(chan struct{}, 10) + w := cfgWatcher{ + updates: updates, + reloader: func(_ context.Context) error { + reloadCount.Add(1) + return nil + }, + } + + ctx, cancel := context.WithCancel(context.Background()) + w.start(ctx) + + updates <- struct{}{} + + // cancel before delay elapses + time.Sleep(50 * time.Millisecond) + cancel() + w.close() + + if got := reloadCount.Load(); got != 0 { + t.Fatalf("expected 0 reload calls after context cancel, got %d", got) + } +} From ddf80193675b0a916b72071fe761378c8ca90f8f Mon Sep 17 00:00:00 2001 From: Andrii Chubatiuk Date: Wed, 27 May 2026 11:44:28 +0300 Subject: [PATCH 11/31] reconcile: update statefulset status on success for OnDelete update strategy (#2207) --- docs/CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index f21cdafcf..34f9cc1c4 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -16,6 +16,7 @@ aliases: * BUGFIX: [vmoperator](https://docs.victoriametrics.com/operator/): update status currentRevision and currentReplicas for StatefulSet with OnDelete update strategy. See [#1242](https://github.com/VictoriaMetrics/operator/issues/1242). * BUGFIX: [vmoperator](https://docs.victoriametrics.com/operator/): retry reconcile errors, that may lead to expanding state, before resource could hang in expanding state. * BUGFIX: [vmcluster](https://docs.victoriametrics.com/operator/resources/vmcluster/), [vlcluster](https://docs.victoriametrics.com/operator/resources/vlcluster/) and [vtcluster](https://docs.victoriametrics.com/operator/resources/vtcluster/): when storage HPA was enabled, generated `-storageNode` flags could become incorrect after scaling, which could break expected routing to storage nodes; now the operator derives storage node count from the current StatefulSet state so generated flags stay correct during HPA-driven scaling. See [#2117](https://github.com/VictoriaMetrics/operator/issues/2117). +* BUGFIX: [vmoperator](https://docs.victoriametrics.com/operator/): update status currentRevision and currentReplicas for StatefulSet with OnDelete update strategy. See [#1242](https://github.com/VictoriaMetrics/operator/issues/1242). ## [v0.68.5](https://github.com/VictoriaMetrics/operator/releases/tag/v0.68.5) **Release date:** 27 May 2026 From 6664cd32d41c5f5e4fe6abc962ab5a62779d599a Mon Sep 17 00:00:00 2001 From: Immanuel Tikhonov <122638311+immanuwell@users.noreply.github.com> Date: Fri, 29 May 2026 22:35:31 +0400 Subject: [PATCH 12/31] fix: use seconds for config-reloader success timestamp metric (#2228) Signed-off-by: immanuwell --- cmd/config-reloader/main.go | 2 +- cmd/config-reloader/main_test.go | 36 ++++++++++++++++++++++++++++++++ docs/CHANGELOG.md | 1 + 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/cmd/config-reloader/main.go b/cmd/config-reloader/main.go index 1f5822ee3..6b5ad5d0f 100644 --- a/cmd/config-reloader/main.go +++ b/cmd/config-reloader/main.go @@ -264,7 +264,7 @@ func (c *cfgWatcher) start(ctx context.Context) { continue } configLastReloadSuccess.Set(1) - configLastOkReloadTime.Set(uint64(time.Now().UnixMilli())) + configLastOkReloadTime.Set(uint64(time.Now().Unix())) logger.Infof("reload config ok.") } }() diff --git a/cmd/config-reloader/main_test.go b/cmd/config-reloader/main_test.go index 05d1680e6..b873dd0bf 100644 --- a/cmd/config-reloader/main_test.go +++ b/cmd/config-reloader/main_test.go @@ -183,3 +183,39 @@ func TestCfgWatcherDelayIntervalCancelledContext(t *testing.T) { t.Fatalf("expected 0 reload calls after context cancel, got %d", got) } } + +func TestCfgWatcherSuccessTimestampUsesSeconds(t *testing.T) { + origDelay := *delayInterval + *delayInterval = 0 + defer func() { *delayInterval = origDelay }() + + configLastOkReloadTime.Set(0) + configLastReloadSuccess.Set(0) + + updates := make(chan struct{}, 1) + w := cfgWatcher{ + updates: updates, + reloader: func(_ context.Context) error { + return nil + }, + } + + ctx, cancel := context.WithCancel(context.Background()) + w.start(ctx) + start := time.Now().Unix() + + updates <- struct{}{} + time.Sleep(100 * time.Millisecond) + cancel() + w.close() + + got := int64(configLastOkReloadTime.Get()) + end := time.Now().Unix() + + if got < start || got > end { + t.Fatalf("expected success timestamp in unix seconds between %d and %d, got %d", start, end, got) + } + if configLastReloadSuccess.Get() != 1 { + t.Fatalf("expected reload success metric to be 1, got %d", configLastReloadSuccess.Get()) + } +} diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 34f9cc1c4..b88ee790c 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -17,6 +17,7 @@ aliases: * BUGFIX: [vmoperator](https://docs.victoriametrics.com/operator/): retry reconcile errors, that may lead to expanding state, before resource could hang in expanding state. * BUGFIX: [vmcluster](https://docs.victoriametrics.com/operator/resources/vmcluster/), [vlcluster](https://docs.victoriametrics.com/operator/resources/vlcluster/) and [vtcluster](https://docs.victoriametrics.com/operator/resources/vtcluster/): when storage HPA was enabled, generated `-storageNode` flags could become incorrect after scaling, which could break expected routing to storage nodes; now the operator derives storage node count from the current StatefulSet state so generated flags stay correct during HPA-driven scaling. See [#2117](https://github.com/VictoriaMetrics/operator/issues/2117). * BUGFIX: [vmoperator](https://docs.victoriametrics.com/operator/): update status currentRevision and currentReplicas for StatefulSet with OnDelete update strategy. See [#1242](https://github.com/VictoriaMetrics/operator/issues/1242). +* BUGFIX: [config-reloader](https://docs.victoriametrics.com/operator/): fix `configreloader_last_reload_success_timestamp_seconds` metric to report time in seconds instead of milliseconds. ## [v0.68.5](https://github.com/VictoriaMetrics/operator/releases/tag/v0.68.5) **Release date:** 27 May 2026 From eb866c3b7151d1e8a431d291ff00b15a20dc1164 Mon Sep 17 00:00:00 2001 From: Andrii Chubatiuk Date: Tue, 2 Jun 2026 11:46:55 +0300 Subject: [PATCH 13/31] ignore not found errors during attempt to update resource status (#2240) --- docs/CHANGELOG.md | 1 + internal/controller/operator/factory/reconcile/status.go | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index b88ee790c..50d9be34f 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -18,6 +18,7 @@ aliases: * BUGFIX: [vmcluster](https://docs.victoriametrics.com/operator/resources/vmcluster/), [vlcluster](https://docs.victoriametrics.com/operator/resources/vlcluster/) and [vtcluster](https://docs.victoriametrics.com/operator/resources/vtcluster/): when storage HPA was enabled, generated `-storageNode` flags could become incorrect after scaling, which could break expected routing to storage nodes; now the operator derives storage node count from the current StatefulSet state so generated flags stay correct during HPA-driven scaling. See [#2117](https://github.com/VictoriaMetrics/operator/issues/2117). * BUGFIX: [vmoperator](https://docs.victoriametrics.com/operator/): update status currentRevision and currentReplicas for StatefulSet with OnDelete update strategy. See [#1242](https://github.com/VictoriaMetrics/operator/issues/1242). * BUGFIX: [config-reloader](https://docs.victoriametrics.com/operator/): fix `configreloader_last_reload_success_timestamp_seconds` metric to report time in seconds instead of milliseconds. +* BUGFIX: [vmoperator](https://docs.victoriametrics.com/operator/): ignore `NotFound` errors, that may occur during attempt to update status on a missing resource. ## [v0.68.5](https://github.com/VictoriaMetrics/operator/releases/tag/v0.68.5) **Release date:** 27 May 2026 diff --git a/internal/controller/operator/factory/reconcile/status.go b/internal/controller/operator/factory/reconcile/status.go index d269b96f8..32aa6e300 100644 --- a/internal/controller/operator/factory/reconcile/status.go +++ b/internal/controller/operator/factory/reconcile/status.go @@ -11,6 +11,7 @@ import ( "time" "k8s.io/apimachinery/pkg/api/equality" + k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" utilerrors "k8s.io/apimachinery/pkg/util/errors" @@ -87,6 +88,9 @@ func updateChildStatusConditions[T any, PT interface { return retryOnConflict(func() error { dst := PT(new(T)) if err := rclient.Get(ctx, nsn, dst); err != nil { + if k8serrors.IsNotFound(err) { + return nil + } return err } st := dst.GetStatusMetadata() @@ -98,6 +102,9 @@ func updateChildStatusConditions[T any, PT interface { writeAggregatedStatus(st, vmv1beta1.ConditionDomainTypeAppliedSuffix) if !reflect.DeepEqual(prevSt, st) { if err := rclient.Status().Update(ctx, dst); err != nil { + if k8serrors.IsNotFound(err) { + return nil + } return fmt.Errorf("failed to patch status of broken VMAlertmanagerConfig=%q: %w", childObject.GetName(), err) } } From ea9f1e63e24ed6f846b1240c40817bfa31f90751 Mon Sep 17 00:00:00 2001 From: Max Kotliar Date: Mon, 8 Jun 2026 15:23:44 +0300 Subject: [PATCH 14/31] internal/config: update VM_METRICS_VERSION to v1.145.0 (#2260) Changelog [v1.145.0](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/docs/victoriametrics/changelog/CHANGELOG.md#v11450) Signed-off-by: Max Kotliar --- docs/CHANGELOG.md | 2 ++ docs/env.md | 2 +- internal/config/config.go | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 50d9be34f..5e9e04356 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -13,6 +13,8 @@ aliases: ## tip +* Dependency: [vmoperator](https://docs.victoriametrics.com/operator/): Updated default versions for VM apps to [v1.145.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.145.0) version + * BUGFIX: [vmoperator](https://docs.victoriametrics.com/operator/): update status currentRevision and currentReplicas for StatefulSet with OnDelete update strategy. See [#1242](https://github.com/VictoriaMetrics/operator/issues/1242). * BUGFIX: [vmoperator](https://docs.victoriametrics.com/operator/): retry reconcile errors, that may lead to expanding state, before resource could hang in expanding state. * BUGFIX: [vmcluster](https://docs.victoriametrics.com/operator/resources/vmcluster/), [vlcluster](https://docs.victoriametrics.com/operator/resources/vlcluster/) and [vtcluster](https://docs.victoriametrics.com/operator/resources/vtcluster/): when storage HPA was enabled, generated `-storageNode` flags could become incorrect after scaling, which could break expected routing to storage nodes; now the operator derives storage node count from the current StatefulSet state so generated flags stay correct during HPA-driven scaling. See [#2117](https://github.com/VictoriaMetrics/operator/issues/2117). diff --git a/docs/env.md b/docs/env.md index bc6c9d297..2091a9719 100644 --- a/docs/env.md +++ b/docs/env.md @@ -1,6 +1,6 @@ | Environment variables | | --- | -| VM_METRICS_VERSION: `v1.144.0` # | +| VM_METRICS_VERSION: `v1.145.0` # | | VM_LOGS_VERSION: `v1.50.0` # | | VM_ANOMALY_VERSION: `v1.29.3` # | | VM_TRACES_VERSION: `v0.7.0` # | diff --git a/internal/config/config.go b/internal/config/config.go index 2405fff36..40816f9d0 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -35,7 +35,7 @@ var ( initConf sync.Once defaultEnvs = map[string]string{ - "VM_METRICS_VERSION": "v1.144.0", + "VM_METRICS_VERSION": "v1.145.0", "VM_LOGS_VERSION": "v1.50.0", "VM_ANOMALY_VERSION": "v1.29.3", "VM_TRACES_VERSION": "v0.7.0", From ed623050cd1b499352e21225c50a4ab86c78253e Mon Sep 17 00:00:00 2001 From: Zakhar Bessarab Date: Tue, 9 Jun 2026 21:28:35 +0400 Subject: [PATCH 15/31] vmanomaly: fix marshalling of verify_tls (#2266) * vmanomaly: fix marshalling of verify_tls Sync behaviour of "verify_tls" with vmanomaly itself: accept 3 values: true / false / path to CA to be used for verification. * vmanomaly: add tests for boolean TLS insecure skip verify --- docs/CHANGELOG.md | 1 + .../factory/vmanomaly/config/config.go | 22 +- .../factory/vmanomaly/config/config_test.go | 306 +++++++++++++++++- 3 files changed, 322 insertions(+), 7 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 5e9e04356..7ba841df2 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -21,6 +21,7 @@ aliases: * BUGFIX: [vmoperator](https://docs.victoriametrics.com/operator/): update status currentRevision and currentReplicas for StatefulSet with OnDelete update strategy. See [#1242](https://github.com/VictoriaMetrics/operator/issues/1242). * BUGFIX: [config-reloader](https://docs.victoriametrics.com/operator/): fix `configreloader_last_reload_success_timestamp_seconds` metric to report time in seconds instead of milliseconds. * BUGFIX: [vmoperator](https://docs.victoriametrics.com/operator/): ignore `NotFound` errors, that may occur during attempt to update status on a missing resource. +* BUGFIX: [vmanomaly](https://docs.victoriametrics.com/operator/resources/vmanomaly/): pass the configured TLS CA bundle to the reader, writer and monitoring clients. Previously the CA was mounted as a volume but dropped during config generation, so a `tlsConfig` with only a CA produced no `verify_tls` reference to it; `insecureSkipVerify` is now also propagated correctly. ## [v0.68.5](https://github.com/VictoriaMetrics/operator/releases/tag/v0.68.5) **Release date:** 27 May 2026 diff --git a/internal/controller/operator/factory/vmanomaly/config/config.go b/internal/controller/operator/factory/vmanomaly/config/config.go index 7f9e88d60..f85d2383b 100644 --- a/internal/controller/operator/factory/vmanomaly/config/config.go +++ b/internal/controller/operator/factory/vmanomaly/config/config.go @@ -285,9 +285,15 @@ type clientConfig struct { Password string `yaml:"password,omitempty"` BearerToken string `yaml:"bearer_token,omitempty"` BearerTokenFile string `yaml:"bearer_token_file,omitempty"` - VerifyTLS bool `yaml:"verify_tls,omitempty"` - TLSCertFile string `yaml:"tls_cert_file,omitempty"` - TLSKeyFile string `yaml:"tls_key_file,omitempty"` + // VerifyTLS mirrors vmanomaly's `verify_tls` option, which is overloaded: + // false disables verification, true uses the system CA store and a string + // is treated as a path to the CA bundle to verify against. + // See: + // https://docs.victoriametrics.com/anomaly-detection/components/writer/#config-parameters + // https://docs.victoriametrics.com/anomaly-detection/components/reader/#config-parameters + VerifyTLS any `yaml:"verify_tls,omitempty"` + TLSCertFile string `yaml:"tls_cert_file,omitempty"` + TLSKeyFile string `yaml:"tls_key_file,omitempty"` } func (c *clientConfig) override(cr *vmv1.VMAnomaly, cfg *vmv1.VMAnomalyHTTPClientSpec, ac *build.AssetsCache) error { @@ -298,7 +304,15 @@ func (c *clientConfig) override(cr *vmv1.VMAnomaly, cfg *vmv1.VMAnomalyHTTPClien } c.TLSCertFile = creds.CertFile c.TLSKeyFile = creds.KeyFile - c.VerifyTLS = !cfg.TLSConfig.InsecureSkipVerify + switch { + case cfg.TLSConfig.InsecureSkipVerify: + c.VerifyTLS = false + case creds.CAFile != "": + // vmanomaly expects the CA bundle path to be passed via `verify_tls`. + c.VerifyTLS = creds.CAFile + default: + c.VerifyTLS = true + } } if cfg.BasicAuth != nil { creds, err := ac.BuildBasicAuthCreds(cr.Namespace, cfg.BasicAuth) diff --git a/internal/controller/operator/factory/vmanomaly/config/config_test.go b/internal/controller/operator/factory/vmanomaly/config/config_test.go index 366009e58..d3157ca58 100644 --- a/internal/controller/operator/factory/vmanomaly/config/config_test.go +++ b/internal/controller/operator/factory/vmanomaly/config/config_test.go @@ -279,7 +279,7 @@ reader: - "0" - inf tenant_id: "0:1" - verify_tls: true + verify_tls: /test/monitoring_tls_remote-ca tls_cert_file: /test/monitoring_tls_remote-cert tls_key_file: /test/monitoring_tls_remote-key writer: @@ -291,7 +291,7 @@ writer: label1: value1 label2: value2 tenant_id: "0:2" - verify_tls: true + verify_tls: /test/monitoring_tls_remote-ca tls_cert_file: /test/monitoring_tls_remote-cert tls_key_file: /test/monitoring_tls_remote-key monitoring: @@ -300,7 +300,7 @@ monitoring: push: url: http://monitoring tenant_id: "0:3" - verify_tls: true + verify_tls: /test/monitoring_tls_remote-ca tls_cert_file: /test/monitoring_tls_remote-cert tls_key_file: /test/monitoring_tls_remote-key push_frequency: 20s @@ -308,6 +308,306 @@ monitoring: label1: value1 settings: restore_state: true +server: + port: "8490" +`, + }) + + // TLS without a CA bundle and InsecureSkipVerify=false => verify_tls: true + f(opts{ + cr: &vmv1.VMAnomaly{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-anomaly", + Namespace: "monitoring", + }, + Spec: vmv1.VMAnomalySpec{ + License: &vmv1beta1.License{ + Key: ptr.To("test"), + }, + ConfigRawYaml: ` +models: + model_zscore: + class: 'zscore' + z_threshold: 2.5 + queries: ['test_query'] +schedulers: + scheduler_1m: + class: "scheduler.periodic.PeriodicScheduler" + infer_every: 1m + fit_every: 2m + fit_window: 3h +reader: + queries: + test_query: + expr: vm_metric +writer: + datasource_url: "http://test.com" +`, + Reader: &vmv1.VMAnomalyReadersSpec{ + DatasourceURL: "http://reader.test", + SamplingPeriod: "30s", + VMAnomalyHTTPClientSpec: vmv1.VMAnomalyHTTPClientSpec{ + TLSConfig: &vmv1beta1.TLSConfig{ + Cert: vmv1beta1.SecretOrConfigMap{ + Secret: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{Name: "tls"}, + Key: "cert", + }, + }, + KeySecret: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{Name: "tls"}, + Key: "key", + }, + }, + }, + }, + Writer: &vmv1.VMAnomalyWritersSpec{ + DatasourceURL: "http://writer.test", + }, + }, + }, + predefinedObjects: []runtime.Object{ + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "tls", + Namespace: "monitoring", + }, + Data: map[string][]byte{ + "cert": []byte("cert"), + "key": []byte("key"), + }, + }, + }, + expected: ` +models: + model_zscore: + class: zscore + queries: + - test_query + z_threshold: 2.5 +schedulers: + scheduler_1m: + class: scheduler.periodic.PeriodicScheduler + fit_every: 2m + fit_window: 3h + infer_every: 1m +reader: + class: vm + datasource_url: http://reader.test + sampling_period: 30s + queries: + test_query: + expr: vm_metric + verify_tls: true + tls_cert_file: /test/monitoring_tls_cert + tls_key_file: /test/monitoring_tls_key +writer: + class: vm + datasource_url: http://writer.test +monitoring: + pull: + port: "8080" +server: + port: "8490" +`, + }) + + // InsecureSkipVerify=true takes precedence over a provided CA => verify_tls: false + f(opts{ + cr: &vmv1.VMAnomaly{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-anomaly", + Namespace: "monitoring", + }, + Spec: vmv1.VMAnomalySpec{ + License: &vmv1beta1.License{ + Key: ptr.To("test"), + }, + ConfigRawYaml: ` +models: + model_zscore: + class: 'zscore' + z_threshold: 2.5 + queries: ['test_query'] +schedulers: + scheduler_1m: + class: "scheduler.periodic.PeriodicScheduler" + infer_every: 1m + fit_every: 2m + fit_window: 3h +reader: + queries: + test_query: + expr: vm_metric +writer: + datasource_url: "http://test.com" +`, + Reader: &vmv1.VMAnomalyReadersSpec{ + DatasourceURL: "http://reader.test", + SamplingPeriod: "30s", + VMAnomalyHTTPClientSpec: vmv1.VMAnomalyHTTPClientSpec{ + TLSConfig: &vmv1beta1.TLSConfig{ + InsecureSkipVerify: true, + CA: vmv1beta1.SecretOrConfigMap{ + Secret: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{Name: "tls"}, + Key: "ca", + }, + }, + Cert: vmv1beta1.SecretOrConfigMap{ + Secret: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{Name: "tls"}, + Key: "cert", + }, + }, + KeySecret: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{Name: "tls"}, + Key: "key", + }, + }, + }, + }, + Writer: &vmv1.VMAnomalyWritersSpec{ + DatasourceURL: "http://writer.test", + }, + }, + }, + predefinedObjects: []runtime.Object{ + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "tls", + Namespace: "monitoring", + }, + Data: map[string][]byte{ + "ca": []byte("ca"), + "cert": []byte("cert"), + "key": []byte("key"), + }, + }, + }, + expected: ` +models: + model_zscore: + class: zscore + queries: + - test_query + z_threshold: 2.5 +schedulers: + scheduler_1m: + class: scheduler.periodic.PeriodicScheduler + fit_every: 2m + fit_window: 3h + infer_every: 1m +reader: + class: vm + datasource_url: http://reader.test + sampling_period: 30s + queries: + test_query: + expr: vm_metric + verify_tls: false + tls_cert_file: /test/monitoring_tls_cert + tls_key_file: /test/monitoring_tls_key +writer: + class: vm + datasource_url: http://writer.test +monitoring: + pull: + port: "8080" +server: + port: "8490" +`, + }) + + // with settings including retention + f(opts{ + cr: &vmv1.VMAnomaly{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-anomaly", + Namespace: "monitoring", + }, + Spec: vmv1.VMAnomalySpec{ + License: &vmv1beta1.License{ + Key: ptr.To("test"), + }, + ConfigRawYaml: ` +models: + model_zscore: + class: 'zscore' + z_threshold: 2.5 + queries: ['test_query'] +schedulers: + scheduler_backtesting: + class: "backtesting" + fit_window: 3h + fit_every: 1h + from_s: 1000 + to_s: 2000 + exact: true + infer_every: 5m +reader: + queries: + test_query: + expr: vm_metric +writer: + datasource_url: "http://test.com" +settings: + restore_state: true + retention: + ttl: 24h + check_interval: 30m + logger_levels: + root: DEBUG +`, + Reader: &vmv1.VMAnomalyReadersSpec{ + DatasourceURL: "http://reader.test", + SamplingPeriod: "30s", + }, + Writer: &vmv1.VMAnomalyWritersSpec{ + DatasourceURL: "http://writer.test", + }, + }, + }, + expected: ` +models: + model_zscore: + class: zscore + queries: + - test_query + z_threshold: 2.5 +schedulers: + scheduler_backtesting: + class: backtesting + fit_window: 3h + from_iso: 0001-01-01T00:00:00Z + from_s: 1000 + to_iso: 0001-01-01T00:00:00Z + to_s: 2000 + fit_every: 1h + exact: true + infer_every: 5m +reader: + class: vm + datasource_url: http://reader.test + sampling_period: 30s + queries: + test_query: + expr: vm_metric +writer: + class: vm + datasource_url: http://writer.test +monitoring: + pull: + port: "8080" +settings: + restore_state: true + retention: + ttl: 24h + check_interval: 30m + logger_levels: + root: DEBUG +server: + port: "8490" `, }) From 9cdedccaff321449bb7fb96b30da85cfe6f87c94 Mon Sep 17 00:00:00 2001 From: Immanuel Tikhonov <122638311+immanuwell@users.noreply.github.com> Date: Sat, 13 Jun 2026 00:10:37 +0400 Subject: [PATCH 16/31] fix: allow watched files with '..' in their names (#2253) Thanks for contribution! --- cmd/config-reloader/file_watch.go | 9 ++- cmd/config-reloader/file_watch_test.go | 81 ++++++++++++++++++++++++++ docs/CHANGELOG.md | 1 + 3 files changed, 86 insertions(+), 5 deletions(-) create mode 100644 cmd/config-reloader/file_watch_test.go diff --git a/cmd/config-reloader/file_watch.go b/cmd/config-reloader/file_watch.go index 915ae1436..35e4577ce 100644 --- a/cmd/config-reloader/file_watch.go +++ b/cmd/config-reloader/file_watch.go @@ -153,11 +153,10 @@ func (dw *dirWatcher) start(ctx context.Context, updates chan struct{}) { } err = filepath.WalkDir(walkDir, func(path string, d fs.DirEntry, err error) error { - // hack for kubernetes configmaps and secrets. - // it uses ..YEAR_MONTH_DAY_HOUR.MIN.S directory for content updates - // and links it as a symlink - // just skip it, stat for the file will be evaluated with os.Stat below - if strings.Contains(path, "..") { + // Kubernetes projected volumes expose hidden ..* entries such as + // ..data and timestamped symlink targets. Skip only those synthetic + // path elements, not regular files like "rules..yaml". + if strings.HasPrefix(filepath.Base(path), "..") { return nil } diff --git a/cmd/config-reloader/file_watch_test.go b/cmd/config-reloader/file_watch_test.go new file mode 100644 index 000000000..7b9c1735f --- /dev/null +++ b/cmd/config-reloader/file_watch_test.go @@ -0,0 +1,81 @@ +package main + +import ( + "context" + "os" + "path/filepath" + "testing" + "time" +) + +func TestDirWatcherProcessesRegularFilesWithDoubleDotsInName(t *testing.T) { + dir := t.TempDir() + file := filepath.Join(dir, "rules..yaml") + if err := os.WriteFile(file, []byte("groups: []\n"), 0o644); err != nil { + t.Fatalf("failed to write initial file: %v", err) + } + + dw, err := newDirWatchers([]string{dir}) + if err != nil { + t.Fatalf("failed to create dir watcher: %v", err) + } + + updates := make(chan struct{}, 10) + ctx, cancel := context.WithCancel(context.Background()) + dw.start(ctx, updates) + defer dw.close() + defer cancel() + + if err := os.WriteFile(file, []byte("groups:\n- name: test\n"), 0o644); err != nil { + t.Fatalf("failed to update file: %v", err) + } + + select { + case <-updates: + case <-time.After(2 * time.Second): + t.Fatal("expected update after modifying a regular file containing '..' in its name") + } +} + +func TestDirWatcherSkipsKubernetesHiddenEntries(t *testing.T) { + dir := t.TempDir() + visibleFile := filepath.Join(dir, "rules.yaml") + if err := os.WriteFile(visibleFile, []byte("groups: []\n"), 0o644); err != nil { + t.Fatalf("failed to write visible file: %v", err) + } + hiddenFile := filepath.Join(dir, "..data") + if err := os.WriteFile(hiddenFile, []byte("v1\n"), 0o644); err != nil { + t.Fatalf("failed to write hidden file: %v", err) + } + + dw, err := newDirWatchers([]string{dir}) + if err != nil { + t.Fatalf("failed to create dir watcher: %v", err) + } + + updates := make(chan struct{}, 10) + ctx, cancel := context.WithCancel(context.Background()) + dw.start(ctx, updates) + defer dw.close() + defer cancel() + + if err := os.WriteFile(hiddenFile, []byte("v2\n"), 0o644); err != nil { + t.Fatalf("failed to update hidden file: %v", err) + } + + select { + case <-updates: + t.Fatal("did not expect update after modifying kubernetes hidden entry") + case <-time.After(300 * time.Millisecond): + } + + if err := os.WriteFile(visibleFile, []byte("groups:\n- name: test\n"), 0o644); err != nil { + t.Fatalf("failed to update visible file: %v", err) + } + + select { + case <-updates: + case <-time.After(2 * time.Second): + t.Fatal("expected update after modifying visible file") + } +} diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 7ba841df2..5dc9eec90 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -22,6 +22,7 @@ aliases: * BUGFIX: [config-reloader](https://docs.victoriametrics.com/operator/): fix `configreloader_last_reload_success_timestamp_seconds` metric to report time in seconds instead of milliseconds. * BUGFIX: [vmoperator](https://docs.victoriametrics.com/operator/): ignore `NotFound` errors, that may occur during attempt to update status on a missing resource. * BUGFIX: [vmanomaly](https://docs.victoriametrics.com/operator/resources/vmanomaly/): pass the configured TLS CA bundle to the reader, writer and monitoring clients. Previously the CA was mounted as a volume but dropped during config generation, so a `tlsConfig` with only a CA produced no `verify_tls` reference to it; `insecureSkipVerify` is now also propagated correctly. +* BUGFIX: [config-reloader](https://docs.victoriametrics.com/operator/): fix missed reload for watched files whose names contain `..` (e.g. `rules..yaml`). Previously any path containing `..` was silently skipped; now only Kubernetes synthetic entries whose basename starts with `..` (e.g. `..data`) are ignored. See [#2253](https://github.com/VictoriaMetrics/operator/pull/2253). ## [v0.68.5](https://github.com/VictoriaMetrics/operator/releases/tag/v0.68.5) **Release date:** 27 May 2026 From f708b62d8cd35f6e1ffae27b31b4f096e7952c50 Mon Sep 17 00:00:00 2001 From: Vadim Rutkovsky Date: Mon, 15 Jun 2026 11:58:55 +0200 Subject: [PATCH 17/31] Several fixes around config-reloader's blocking I/O (#2295) * fix: drop events when channel overflows to avoid blocking it * fix: don't block on sleep when ctx is cancelled * fix: add informer goroutine to the waitgroup * fix: don't bump contentUpdateErrorsTotal metric when request cancelled Also fixes a typo --- cmd/config-reloader/file_watch.go | 4 ++-- cmd/config-reloader/k8s_watch.go | 39 +++++++++++++++++++++++-------- cmd/config-reloader/main.go | 12 +++++----- 3 files changed, 37 insertions(+), 18 deletions(-) diff --git a/cmd/config-reloader/file_watch.go b/cmd/config-reloader/file_watch.go index 35e4577ce..4e7ea85ae 100644 --- a/cmd/config-reloader/file_watch.go +++ b/cmd/config-reloader/file_watch.go @@ -87,7 +87,7 @@ func (fw *fileWatcher) start(ctx context.Context, updates chan struct{}) { case <-t.C: if err := update(*configFileName); err != nil { logger.Errorf("cannot update file at force resync :%s", err) - contentUpdateErrosTotal.Inc() + contentUpdateErrorsTotal.Inc() continue } case event := <-fw.w.Events: @@ -98,7 +98,7 @@ func (fw *fileWatcher) start(ctx context.Context, updates chan struct{}) { logger.Infof("changed: %s, %s", event.Name, event.Op.String()) if err := update(*configFileName); err != nil { logger.Errorf("cannot update file :%s", err) - contentUpdateErrosTotal.Inc() + contentUpdateErrorsTotal.Inc() continue } } diff --git a/cmd/config-reloader/k8s_watch.go b/cmd/config-reloader/k8s_watch.go index ea4b5f3e4..1dfb159f1 100644 --- a/cmd/config-reloader/k8s_watch.go +++ b/cmd/config-reloader/k8s_watch.go @@ -73,15 +73,27 @@ func newKubernetesWatcher(ctx context.Context, secretName, namespace string) (*k if _, err := inf.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(obj interface{}) { s := obj.(*corev1.Secret) - syncChan <- syncEvent{op: "create", obj: s} + select { + case syncChan <- syncEvent{op: "create", obj: s}: + default: + logger.Infof("syncChan full, dropping create event for secret: %s", s.Name) + } }, UpdateFunc: func(oldObj, newObj interface{}) { s := newObj.(*corev1.Secret) - syncChan <- syncEvent{op: "update", obj: s} + select { + case syncChan <- syncEvent{op: "update", obj: s}: + default: + logger.Infof("syncChan full, dropping update event for secret: %s", s.Name) + } }, DeleteFunc: func(obj interface{}) { s := obj.(*corev1.Secret) - syncChan <- syncEvent{op: "delete", obj: s} + select { + case syncChan <- syncEvent{op: "delete", obj: s}: + default: + logger.Infof("syncChan full, dropping delete event for secret: %s", s.Name) + } }, }); err != nil { return nil, fmt.Errorf("cannot build eventHandler: %w", err) @@ -124,11 +136,14 @@ func (k *k8sWatcher) start(ctx context.Context, updates chan struct{}) { return fmt.Errorf("cannot write file content to disk: %w", err) } prevContent = newData - time.Sleep(time.Second) + select { + case <-time.After(time.Second): + case <-ctx.Done(): + return ctx.Err() + } select { case updates <- struct{}{}: default: - } return nil } @@ -141,7 +156,11 @@ func (k *k8sWatcher) start(ctx context.Context, updates chan struct{}) { logger.Errorf("cannot update secret: %s", err) } - go k.inf.Run(ctx.Done()) + k.wg.Add(1) + go func() { + defer k.wg.Done() + k.inf.Run(ctx.Done()) + }() k.wg.Add(1) go func() { defer k.wg.Done() @@ -154,10 +173,10 @@ func (k *k8sWatcher) start(ctx context.Context, updates chan struct{}) { select { case <-t.C: if err := updateSecret(&lastSecret); err != nil { - if errors.Is(err, errNotModified) { + if errors.Is(err, errNotModified) || errors.Is(err, context.Canceled) { continue } - contentUpdateErrosTotal.Inc() + contentUpdateErrorsTotal.Inc() logger.Errorf("cannot force sync secret content: %s", err) } case item := <-k.events: @@ -166,10 +185,10 @@ func (k *k8sWatcher) start(ctx context.Context, updates chan struct{}) { logger.Infof("get k8s sync event type: %s, for secret: %s", item.op, item.obj.Name) if err := updateSecret(s); err != nil { - if errors.Is(err, errNotModified) { + if errors.Is(err, errNotModified) || errors.Is(err, context.Canceled) { continue } - contentUpdateErrosTotal.Inc() + contentUpdateErrorsTotal.Inc() logger.Errorf("cannot sync secret content: %s", err) } case <-ctx.Done(): diff --git a/cmd/config-reloader/main.go b/cmd/config-reloader/main.go index 6b5ad5d0f..83f08e099 100644 --- a/cmd/config-reloader/main.go +++ b/cmd/config-reloader/main.go @@ -73,12 +73,12 @@ var ( ) var ( - configLastOkReloadTime = metrics.NewCounter(`configreloader_last_reload_success_timestamp_seconds`) - configLastReloadSuccess = metrics.NewCounter(`configreloader_last_reload_successful`) - configReloadErrorsTotal = metrics.NewCounter(`configreloader_last_reload_errors_total`) - configReloadsTotal = metrics.NewCounter(`configreloader_config_last_reload_total`) - k8sAPIWatchErrorsTotal = metrics.NewCounter(`configreloader_k8s_watch_errors_total`) - contentUpdateErrosTotal = metrics.NewCounter(`configreloader_secret_content_update_errors_total`) + configLastOkReloadTime = metrics.NewCounter(`configreloader_last_reload_success_timestamp_seconds`) + configLastReloadSuccess = metrics.NewCounter(`configreloader_last_reload_successful`) + configReloadErrorsTotal = metrics.NewCounter(`configreloader_last_reload_errors_total`) + configReloadsTotal = metrics.NewCounter(`configreloader_config_last_reload_total`) + k8sAPIWatchErrorsTotal = metrics.NewCounter(`configreloader_k8s_watch_errors_total`) + contentUpdateErrorsTotal = metrics.NewCounter(`configreloader_secret_content_update_errors_total`) ) func main() { From c852f013327a8cd0e22cffd3615fd053f08a27f4 Mon Sep 17 00:00:00 2001 From: Vadim Rutkovsky Date: Mon, 15 Jun 2026 14:23:41 +0200 Subject: [PATCH 18/31] victoriametrics_app=true metric label for all resources (#2296) * feat: ensure that all operator-controlled resources have victoriametrics_app=true metric label * docs: update 0.71.0 release details * Apply suggestion from @AndrewChubatiuk Signed-off-by: Andrii Chubatiuk --------- Signed-off-by: Andrii Chubatiuk Co-authored-by: Andrii Chubatiuk --- docs/CHANGELOG.md | 2 + .../operator/factory/build/vmscrape.go | 30 +++- .../operator/factory/build/vmscrape_test.go | 133 +++++++++++++++++- .../operator/factory/vmagent/nodescrape.go | 1 - 4 files changed, 159 insertions(+), 7 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 5dc9eec90..e335100bf 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -15,6 +15,8 @@ aliases: * Dependency: [vmoperator](https://docs.victoriametrics.com/operator/): Updated default versions for VM apps to [v1.145.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.145.0) version +* FEATURE: [vmoperator](https://docs.victoriametrics.com/operator/): add `victoriametrics_app=true` label to all metrics scraped by the operator. See [#2261](https://github.com/VictoriaMetrics/operator/issues/2261). + * BUGFIX: [vmoperator](https://docs.victoriametrics.com/operator/): update status currentRevision and currentReplicas for StatefulSet with OnDelete update strategy. See [#1242](https://github.com/VictoriaMetrics/operator/issues/1242). * BUGFIX: [vmoperator](https://docs.victoriametrics.com/operator/): retry reconcile errors, that may lead to expanding state, before resource could hang in expanding state. * BUGFIX: [vmcluster](https://docs.victoriametrics.com/operator/resources/vmcluster/), [vlcluster](https://docs.victoriametrics.com/operator/resources/vlcluster/) and [vtcluster](https://docs.victoriametrics.com/operator/resources/vtcluster/): when storage HPA was enabled, generated `-storageNode` flags could become incorrect after scaling, which could break expected routing to storage nodes; now the operator derives storage node count from the current StatefulSet state so generated flags stay correct during HPA-driven scaling. See [#2117](https://github.com/VictoriaMetrics/operator/issues/2117). diff --git a/internal/controller/operator/factory/build/vmscrape.go b/internal/controller/operator/factory/build/vmscrape.go index b9ef3aac7..b24066ea7 100644 --- a/internal/controller/operator/factory/build/vmscrape.go +++ b/internal/controller/operator/factory/build/vmscrape.go @@ -127,6 +127,9 @@ func VMServiceScrape(service *corev1.Service, b scrapeBuilder, additionalPortNam }, } } + for i := range scrape.Spec.Endpoints { + addVictoriaMetricsAppRelabelConfig(&scrape.Spec.Endpoints[i].EndpointRelabelings) + } return scrape } @@ -155,7 +158,6 @@ func VMPodScrape(b podScrapeBuilder, portName string) *vmv1beta1.VMPodScrape { "authKey": {authKey}, } } - selectorLabels := b.SelectorLabels() scrape := &vmv1beta1.VMPodScrape{ ObjectMeta: metav1.ObjectMeta{ @@ -174,10 +176,9 @@ func VMPodScrape(b podScrapeBuilder, portName string) *vmv1beta1.VMPodScrape { serviceScrapeSpec := b.GetServiceScrape() if serviceScrapeSpec != nil { for _, e := range serviceScrapeSpec.Endpoints { - if e.Port == *endpoint.Port { - endpoint.EndpointAuth = e.EndpointAuth - endpoint.EndpointScrapeParams = e.EndpointScrapeParams - endpoint.EndpointRelabelings = e.EndpointRelabelings + if e.Port == *scrape.Spec.PodMetricsEndpoints[0].Port { + scrape.Spec.PodMetricsEndpoints[0].EndpointScrapeParams = e.EndpointScrapeParams + scrape.Spec.PodMetricsEndpoints[0].EndpointRelabelings = e.EndpointRelabelings continue } scrape.Spec.PodMetricsEndpoints = append(scrape.Spec.PodMetricsEndpoints, vmv1beta1.PodMetricsEndpoint{ @@ -192,5 +193,24 @@ func VMPodScrape(b podScrapeBuilder, portName string) *vmv1beta1.VMPodScrape { scrape.Spec.SeriesLimit = serviceScrapeSpec.SeriesLimit scrape.Spec.AttachMetadata = serviceScrapeSpec.AttachMetadata } + for i := range scrape.Spec.PodMetricsEndpoints { + addVictoriaMetricsAppRelabelConfig(&scrape.Spec.PodMetricsEndpoints[i].EndpointRelabelings) + } return scrape } + +func addVictoriaMetricsAppRelabelConfig(relabelings *vmv1beta1.EndpointRelabelings) { + for _, rc := range relabelings.RelabelConfigs { + if rc != nil && (rc.TargetLabel == "victoriametrics_app" || rc.UnderScoreTargetLabel == "victoriametrics_app") { + return + } + } + relabelings.RelabelConfigs = append(relabelings.RelabelConfigs, victoriaMetricsAppRelabelConfig()) +} + +func victoriaMetricsAppRelabelConfig() *vmv1beta1.RelabelConfig { + return &vmv1beta1.RelabelConfig{ + TargetLabel: "victoriametrics_app", + Replacement: ptr.To("true"), + } +} diff --git a/internal/controller/operator/factory/build/vmscrape_test.go b/internal/controller/operator/factory/build/vmscrape_test.go index 092f8777c..3e77f1603 100644 --- a/internal/controller/operator/factory/build/vmscrape_test.go +++ b/internal/controller/operator/factory/build/vmscrape_test.go @@ -8,6 +8,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/utils/ptr" + vmv1 "github.com/VictoriaMetrics/operator/api/operator/v1" vmv1beta1 "github.com/VictoriaMetrics/operator/api/operator/v1beta1" ) @@ -32,7 +33,24 @@ func (tb *testScrapeObject) GetExtraArgs() map[string]string { return tb.extraArgs } +func (tb *testScrapeObject) GetNamespace() string { + return "default" +} + +func (tb *testScrapeObject) PrefixedName() string { + return "test" +} + +func (tb *testScrapeObject) SelectorLabels() map[string]string { + return map[string]string{"app": "test"} +} + +func (tb *testScrapeObject) AsOwner() metav1.OwnerReference { + return metav1.OwnerReference{Name: "test"} +} + func TestVMServiceScrapeForServiceWithSpec(t *testing.T) { + vmAppRelabel := []*vmv1beta1.RelabelConfig{victoriaMetricsAppRelabelConfig()} type opts struct { spec testScrapeObject service *corev1.Service @@ -67,6 +85,9 @@ func TestVMServiceScrapeForServiceWithSpec(t *testing.T) { }, wantServiceScrapeSpec: vmv1beta1.VMServiceScrapeSpec{ Endpoints: []vmv1beta1.Endpoint{{ + EndpointRelabelings: vmv1beta1.EndpointRelabelings{ + RelabelConfigs: vmAppRelabel, + }, EndpointScrapeParams: vmv1beta1.EndpointScrapeParams{ Path: "/metrics", }, @@ -101,6 +122,9 @@ func TestVMServiceScrapeForServiceWithSpec(t *testing.T) { spec: testScrapeObject{}, wantServiceScrapeSpec: vmv1beta1.VMServiceScrapeSpec{ Endpoints: []vmv1beta1.Endpoint{{ + EndpointRelabelings: vmv1beta1.EndpointRelabelings{ + RelabelConfigs: vmAppRelabel, + }, EndpointScrapeParams: vmv1beta1.EndpointScrapeParams{ Path: "/metrics", }, @@ -137,6 +161,9 @@ func TestVMServiceScrapeForServiceWithSpec(t *testing.T) { wantServiceScrapeSpec: vmv1beta1.VMServiceScrapeSpec{ Endpoints: []vmv1beta1.Endpoint{ { + EndpointRelabelings: vmv1beta1.EndpointRelabelings{ + RelabelConfigs: vmAppRelabel, + }, EndpointScrapeParams: vmv1beta1.EndpointScrapeParams{ Path: "/metrics", }, @@ -153,7 +180,7 @@ func TestVMServiceScrapeForServiceWithSpec(t *testing.T) { TargetLabel: "job", Regex: vmv1beta1.StringOrArray{"(.+)"}, Replacement: ptr.To("${1}-vmbackup"), - }}, + }, victoriaMetricsAppRelabelConfig()}, }, }, }, @@ -188,6 +215,9 @@ func TestVMServiceScrapeForServiceWithSpec(t *testing.T) { }, wantServiceScrapeSpec: vmv1beta1.VMServiceScrapeSpec{ Endpoints: []vmv1beta1.Endpoint{{ + EndpointRelabelings: vmv1beta1.EndpointRelabelings{ + RelabelConfigs: vmAppRelabel, + }, EndpointScrapeParams: vmv1beta1.EndpointScrapeParams{ Path: "/metrics", }, @@ -245,12 +275,18 @@ func TestVMServiceScrapeForServiceWithSpec(t *testing.T) { wantServiceScrapeSpec: vmv1beta1.VMServiceScrapeSpec{ Endpoints: []vmv1beta1.Endpoint{ { + EndpointRelabelings: vmv1beta1.EndpointRelabelings{ + RelabelConfigs: vmAppRelabel, + }, EndpointScrapeParams: vmv1beta1.EndpointScrapeParams{ Path: "/metrics", }, Port: "sidecar", }, { + EndpointRelabelings: vmv1beta1.EndpointRelabelings{ + RelabelConfigs: vmAppRelabel, + }, EndpointScrapeParams: vmv1beta1.EndpointScrapeParams{ Path: "/metrics", ScrapeInterval: "30s", @@ -295,6 +331,9 @@ func TestVMServiceScrapeForServiceWithSpec(t *testing.T) { }, wantServiceScrapeSpec: vmv1beta1.VMServiceScrapeSpec{ Endpoints: []vmv1beta1.Endpoint{{ + EndpointRelabelings: vmv1beta1.EndpointRelabelings{ + RelabelConfigs: vmAppRelabel, + }, EndpointScrapeParams: vmv1beta1.EndpointScrapeParams{ Path: "/metrics", Params: map[string][]string{"authKey": {"some-access-key"}}, @@ -317,3 +356,95 @@ func TestVMServiceScrapeForServiceWithSpec(t *testing.T) { }, }) } + +func TestVMServiceScrapeAddsVictoriaMetricsAppLabel(t *testing.T) { + service := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{Name: "test"}, + Spec: corev1.ServiceSpec{Ports: []corev1.ServicePort{ + {Name: "http"}, + {Name: "extra"}, + }}, + } + spec := testScrapeObject{serviceScrapeSpecTemplate: &vmv1beta1.VMServiceScrapeSpec{ + Endpoints: []vmv1beta1.Endpoint{ + {Port: "http"}, + {Port: "custom"}, + }, + }} + + scrape := VMServiceScrape(service, &spec, "extra") + + assert.Len(t, scrape.Spec.Endpoints, 3) + for i := range scrape.Spec.Endpoints { + assert.Contains(t, scrape.Spec.Endpoints[i].RelabelConfigs, victoriaMetricsAppRelabelConfig()) + } + +} + +func TestVMPodScrapeAddsVictoriaMetricsAppLabel(t *testing.T) { + spec := testScrapeObject{serviceScrapeSpecTemplate: &vmv1beta1.VMServiceScrapeSpec{ + Endpoints: []vmv1beta1.Endpoint{ + { + Port: "http", + EndpointScrapeParams: vmv1beta1.EndpointScrapeParams{ + Path: "/custom", + }, + }, + {Port: "extra"}, + }, + }} + + podScrape := VMPodScrape(&spec, "http") + + assert.Len(t, podScrape.Spec.PodMetricsEndpoints, 2) + assert.Equal(t, "/custom", podScrape.Spec.PodMetricsEndpoints[0].Path) + for i := range podScrape.Spec.PodMetricsEndpoints { + assert.Contains(t, podScrape.Spec.PodMetricsEndpoints[i].RelabelConfigs, victoriaMetricsAppRelabelConfig()) + } +} + +func TestVMServiceScrapeObjectsAddVictoriaMetricsAppLabel(t *testing.T) { + objectMeta := metav1.ObjectMeta{Name: "test", Namespace: "default"} + + f := func(name string, builder scrapeBuilder) { + service := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{Name: name}, + Spec: corev1.ServiceSpec{Ports: []corev1.ServicePort{ + {Name: "http"}, + }}, + } + + scrape := VMServiceScrape(service, builder) + + assert.Len(t, scrape.Spec.Endpoints, 1) + assert.Contains(t, scrape.Spec.Endpoints[0].RelabelConfigs, victoriaMetricsAppRelabelConfig()) + } + f("VMSingle", &vmv1beta1.VMSingle{ObjectMeta: objectMeta}) + f("VMAlert", &vmv1beta1.VMAlert{ObjectMeta: objectMeta}) + f("VMAuth", &vmv1beta1.VMAuth{ObjectMeta: objectMeta}) + f("VMSelect", &vmv1beta1.VMSelect{}) + f("VMInsert", &vmv1beta1.VMInsert{}) + f("VMStorage", &vmv1beta1.VMStorage{}) + f("VLSingle", &vmv1.VLSingle{ObjectMeta: objectMeta}) + f("VLSelect", &vmv1.VLSelect{}) + f("VLInsert", &vmv1.VLInsert{}) + f("VLStorage", &vmv1.VLStorage{}) + f("VTSingle", &vmv1.VTSingle{ObjectMeta: objectMeta}) + f("VTSelect", &vmv1.VTSelect{}) + f("VTInsert", &vmv1.VTInsert{}) + f("VTStorage", &vmv1.VTStorage{}) +} + +func TestVMPodScrapeObjectsAddVictoriaMetricsAppLabel(t *testing.T) { + objectMeta := metav1.ObjectMeta{Name: "test", Namespace: "default"} + + f := func(builder podScrapeBuilder, port string) { + scrape := VMPodScrape(builder, port) + + assert.Len(t, scrape.Spec.PodMetricsEndpoints, 1) + assert.Contains(t, scrape.Spec.PodMetricsEndpoints[0].RelabelConfigs, victoriaMetricsAppRelabelConfig()) + } + f(&vmv1beta1.VMAgent{ObjectMeta: objectMeta}, "http") + f(&vmv1.VLAgent{ObjectMeta: objectMeta}, "http") + f(&vmv1.VMAnomaly{ObjectMeta: objectMeta}, "monitoring-http") +} diff --git a/internal/controller/operator/factory/vmagent/nodescrape.go b/internal/controller/operator/factory/vmagent/nodescrape.go index f73444e5b..6cdf5c690 100644 --- a/internal/controller/operator/factory/vmagent/nodescrape.go +++ b/internal/controller/operator/factory/vmagent/nodescrape.go @@ -105,7 +105,6 @@ func generateNodeScrapeConfig( for _, trc := range sp.NodeScrapeRelabelTemplate { relabelings = append(relabelings, generateRelabelConfig(trc)) } - // Because of security risks, whenever enforcedNamespaceLabel is set, we want to append it to the // relabel_configs as the last relabeling, to ensure it overrides any other relabelings. relabelings = enforceNamespaceLabel(relabelings, sc.Namespace, se.EnforcedNamespaceLabel) From eabbd6efdb39f844a60e9bdfa19e948908f5720d Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Wed, 17 Jun 2026 07:32:36 +0200 Subject: [PATCH 19/31] Update VictoriaLogs Docker image tag from v1.50.0 to v1.51.0 (#2301) See https://github.com/VictoriaMetrics/VictoriaLogs/releases/tag/v1.51.0 --- docs/CHANGELOG.md | 1 + docs/env.md | 14 +++++++------- docs/resources/vlagent.md | 4 ++-- internal/config/config.go | 2 +- .../operator/factory/vlagent/vlagent_test.go | 4 ++-- 5 files changed, 13 insertions(+), 12 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index e335100bf..21ada87dd 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -14,6 +14,7 @@ aliases: ## tip * Dependency: [vmoperator](https://docs.victoriametrics.com/operator/): Updated default versions for VM apps to [v1.145.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.145.0) version +* Dependency: [vmoperator](https://docs.victoriametrics.com/operator/): Updated default versions for VL apps to [v1.51.0](https://github.com/VictoriaMetrics/VictoriaLogs/releases/tag/v1.51.0). * FEATURE: [vmoperator](https://docs.victoriametrics.com/operator/): add `victoriametrics_app=true` label to all metrics scraped by the operator. See [#2261](https://github.com/VictoriaMetrics/operator/issues/2261). diff --git a/docs/env.md b/docs/env.md index 2091a9719..ccb4ccdf1 100644 --- a/docs/env.md +++ b/docs/env.md @@ -1,12 +1,12 @@ | Environment variables | | --- | -| VM_METRICS_VERSION: `v1.145.0` # | -| VM_LOGS_VERSION: `v1.50.0` # | -| VM_ANOMALY_VERSION: `v1.29.3` # | -| VM_TRACES_VERSION: `v0.7.0` # | -| VM_OPERATOR_VERSION: `v0.68.3` # | -| VM_GATEWAY_API_ENABLED: `false` # | -| VM_VPA_API_ENABLED: `false` # | +| VM_METRICS_VERSION: `v1.145.0` #
Defines default image version for VictoriaMetrics components: VMSingle, VMCluster (vmselect/vminsert/vmstorage), VMAgent, VMAlert, VMAuth, VMBackup. Used as the image tag when no explicit version is set in the CR spec. | +| VM_LOGS_VERSION: `v1.51.0` #
Defines default image version for VictoriaLogs components: VLogs, VLAgent, VLSingle, VLCluster (vlselect/vlinsert/vlstorage). Used as the image tag when no explicit version is set in the CR spec. | +| VM_ANOMALY_VERSION: `v1.29.3` #
Defines default image version for VMAnomaly. Used as the image tag when no explicit version is set in the CR spec. | +| VM_TRACES_VERSION: `v0.7.0` #
Defines default image version for VictoriaTraces components: VTSingle, VTCluster (vtselect/vtinsert/vtstorage). Used as the image tag when no explicit version is set in the CR spec. | +| VM_OPERATOR_VERSION: `v0.72.0` #
Defines the operator's own version. Used for config-reloader image tag interpolation. | +| VM_GATEWAY_API_ENABLED: `false` #
Enables support for Kubernetes Gateway API. When enabled, operator manages HTTPRoute resources for VMAuth ingress configuration. | +| VM_VPA_API_ENABLED: `false` #
Enables support for VerticalPodAutoscaler API. When enabled, operator can create and manage VPA objects for VM components. | | WATCH_NAMESPACE: `-` #
Defines a list of namespaces to be watched by operator. Operator don't perform any cluster wide API calls if namespaces not empty. In case of empty list it performs only clusterwide api calls. | | VM_CONTAINERREGISTRY: `-` #
container registry name prefix, e.g. docker.io | | VM_CUSTOMCONFIGRELOADERIMAGE: `-` #
Deprecated: use VM_CONFIG_RELOADER_IMAGE instead | diff --git a/docs/resources/vlagent.md b/docs/resources/vlagent.md index 2fb681a98..675de34f4 100644 --- a/docs/resources/vlagent.md +++ b/docs/resources/vlagent.md @@ -77,7 +77,7 @@ metadata: spec: image: repository: victoriametrics/vlagent - tag: v1.47.0 + tag: v1.51.0 pullPolicy: Always ``` @@ -91,7 +91,7 @@ metadata: spec: image: repository: victoriametrics/vlagent - tag: v1.47.0 + tag: v1.51.0 pullPolicy: Always imagePullSecrets: - name: my-repo-secret diff --git a/internal/config/config.go b/internal/config/config.go index 40816f9d0..bae3042e2 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -36,7 +36,7 @@ var ( defaultEnvs = map[string]string{ "VM_METRICS_VERSION": "v1.145.0", - "VM_LOGS_VERSION": "v1.50.0", + "VM_LOGS_VERSION": "v1.51.0", "VM_ANOMALY_VERSION": "v1.29.3", "VM_TRACES_VERSION": "v0.7.0", "VM_OPERATOR_VERSION": getVersion("v0.68.3"), diff --git a/internal/controller/operator/factory/vlagent/vlagent_test.go b/internal/controller/operator/factory/vlagent/vlagent_test.go index 2bacd3917..b9bffbb23 100644 --- a/internal/controller/operator/factory/vlagent/vlagent_test.go +++ b/internal/controller/operator/factory/vlagent/vlagent_test.go @@ -949,7 +949,7 @@ serviceaccountname: vlagent-agent Spec: vmv1.VLAgentSpec{ CommonAppsParams: vmv1beta1.CommonAppsParams{ Image: vmv1beta1.Image{ - Tag: "v1.48.0", + Tag: "v1.51.0", }, UseDefaultResources: ptr.To(false), Port: "9425", @@ -975,7 +975,7 @@ serviceaccountname: vlagent-agent }, []runtime.Object{}, ` containers: - name: vlagent - image: victoriametrics/vlagent:v1.48.0 + image: victoriametrics/vlagent:v1.51.0 args: - -httpListenAddr=:9425 - -kubernetesCollector From 6089fd76948c1146fb2bf60ebb71e683a400c887 Mon Sep 17 00:00:00 2001 From: Immanuel Tikhonov <122638311+immanuwell@users.noreply.github.com> Date: Thu, 18 Jun 2026 12:49:51 +0400 Subject: [PATCH 20/31] fix: handle config-reloader dir watch removals (#2306) --- cmd/config-reloader/file_watch.go | 3 -- cmd/config-reloader/file_watch_test.go | 68 ++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 3 deletions(-) diff --git a/cmd/config-reloader/file_watch.go b/cmd/config-reloader/file_watch.go index 4e7ea85ae..0074c2606 100644 --- a/cmd/config-reloader/file_watch.go +++ b/cmd/config-reloader/file_watch.go @@ -221,9 +221,6 @@ func (dw *dirWatcher) start(ctx context.Context, updates chan struct{}) { case <-ctx.Done(): return case event := <-dw.w.Events: - if event.Op == fsnotify.Remove { - continue - } baseDir := filepath.Dir(event.Name) logger.Infof("dir update: base dir: %s", baseDir) reloadNeeded, err := updateCache(baseDir) diff --git a/cmd/config-reloader/file_watch_test.go b/cmd/config-reloader/file_watch_test.go index 7b9c1735f..02ddd60ad 100644 --- a/cmd/config-reloader/file_watch_test.go +++ b/cmd/config-reloader/file_watch_test.go @@ -79,3 +79,71 @@ func TestDirWatcherSkipsKubernetesHiddenEntries(t *testing.T) { t.Fatal("expected update after modifying visible file") } } + +func TestDirWatcherTriggersUpdateWhenFileRemoved(t *testing.T) { + dir := t.TempDir() + file := filepath.Join(dir, "rules.yaml") + if err := os.WriteFile(file, []byte("groups: []\n"), 0o644); err != nil { + t.Fatalf("failed to write initial file: %v", err) + } + + dw, err := newDirWatchers([]string{dir}, nil) + if err != nil { + t.Fatalf("failed to create dir watcher: %v", err) + } + + updates := make(chan struct{}, 10) + ctx, cancel := context.WithCancel(context.Background()) + dw.start(ctx, updates) + defer dw.close() + defer cancel() + + if err := os.Remove(file); err != nil { + t.Fatalf("failed to remove watched file: %v", err) + } + + select { + case <-updates: + case <-time.After(2 * time.Second): + t.Fatal("expected update after removing watched file") + } +} + +func TestDirWatcherSyncRemovesDeletedFilesFromTargetDir(t *testing.T) { + srcDir := t.TempDir() + targetDir := t.TempDir() + file := filepath.Join(srcDir, "rules.yaml") + targetFile := filepath.Join(targetDir, "rules.yaml") + if err := os.WriteFile(file, []byte("groups: []\n"), 0o644); err != nil { + t.Fatalf("failed to write initial source file: %v", err) + } + + dw, err := newDirWatchers([]string{srcDir}, []string{targetDir}) + if err != nil { + t.Fatalf("failed to create dir watcher: %v", err) + } + + updates := make(chan struct{}, 10) + ctx, cancel := context.WithCancel(context.Background()) + dw.start(ctx, updates) + defer dw.close() + defer cancel() + + if _, err := os.Stat(targetFile); err != nil { + t.Fatalf("expected file to exist in target dir after initial sync: %v", err) + } + + if err := os.Remove(file); err != nil { + t.Fatalf("failed to remove source file: %v", err) + } + + select { + case <-updates: + case <-time.After(2 * time.Second): + t.Fatal("expected update after removing source file") + } + + if _, err := os.Stat(targetFile); !os.IsNotExist(err) { + t.Fatalf("expected target file to be removed after source deletion, got err=%v", err) + } +} From 28d1c8e2f3425eabd5d138e620d1bc5ccc4f97d6 Mon Sep 17 00:00:00 2001 From: Zhu Jiekun Date: Thu, 18 Jun 2026 11:28:31 +0800 Subject: [PATCH 21/31] dependency: update VictoriaTraces version to v0.9.3 (#2304) --- docs/CHANGELOG.md | 1 + docs/env.md | 2 +- internal/config/config.go | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 21ada87dd..7f04f17ba 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -15,6 +15,7 @@ aliases: * Dependency: [vmoperator](https://docs.victoriametrics.com/operator/): Updated default versions for VM apps to [v1.145.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.145.0) version * Dependency: [vmoperator](https://docs.victoriametrics.com/operator/): Updated default versions for VL apps to [v1.51.0](https://github.com/VictoriaMetrics/VictoriaLogs/releases/tag/v1.51.0). +* Dependency: [vmoperator](https://docs.victoriametrics.com/operator/): Updated default versions for VT apps to [v0.9.3](https://github.com/VictoriaMetrics/VictoriaTraces/releases/tag/v0.9.3) version. * FEATURE: [vmoperator](https://docs.victoriametrics.com/operator/): add `victoriametrics_app=true` label to all metrics scraped by the operator. See [#2261](https://github.com/VictoriaMetrics/operator/issues/2261). diff --git a/docs/env.md b/docs/env.md index ccb4ccdf1..810698195 100644 --- a/docs/env.md +++ b/docs/env.md @@ -3,7 +3,7 @@ | VM_METRICS_VERSION: `v1.145.0` #
Defines default image version for VictoriaMetrics components: VMSingle, VMCluster (vmselect/vminsert/vmstorage), VMAgent, VMAlert, VMAuth, VMBackup. Used as the image tag when no explicit version is set in the CR spec. | | VM_LOGS_VERSION: `v1.51.0` #
Defines default image version for VictoriaLogs components: VLogs, VLAgent, VLSingle, VLCluster (vlselect/vlinsert/vlstorage). Used as the image tag when no explicit version is set in the CR spec. | | VM_ANOMALY_VERSION: `v1.29.3` #
Defines default image version for VMAnomaly. Used as the image tag when no explicit version is set in the CR spec. | -| VM_TRACES_VERSION: `v0.7.0` #
Defines default image version for VictoriaTraces components: VTSingle, VTCluster (vtselect/vtinsert/vtstorage). Used as the image tag when no explicit version is set in the CR spec. | +| VM_TRACES_VERSION: `v0.9.3` #
Defines default image version for VictoriaTraces components: VTSingle, VTCluster (vtselect/vtinsert/vtstorage). Used as the image tag when no explicit version is set in the CR spec. | | VM_OPERATOR_VERSION: `v0.72.0` #
Defines the operator's own version. Used for config-reloader image tag interpolation. | | VM_GATEWAY_API_ENABLED: `false` #
Enables support for Kubernetes Gateway API. When enabled, operator manages HTTPRoute resources for VMAuth ingress configuration. | | VM_VPA_API_ENABLED: `false` #
Enables support for VerticalPodAutoscaler API. When enabled, operator can create and manage VPA objects for VM components. | diff --git a/internal/config/config.go b/internal/config/config.go index bae3042e2..b0efd79e4 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -37,8 +37,8 @@ var ( defaultEnvs = map[string]string{ "VM_METRICS_VERSION": "v1.145.0", "VM_LOGS_VERSION": "v1.51.0", - "VM_ANOMALY_VERSION": "v1.29.3", - "VM_TRACES_VERSION": "v0.7.0", + "VM_ANOMALY_VERSION": "v1.29.6", + "VM_TRACES_VERSION": "v0.9.3", "VM_OPERATOR_VERSION": getVersion("v0.68.3"), } ) From dc7b4f83d3dc27c59dddd42538d32d30bbbc6572 Mon Sep 17 00:00:00 2001 From: Immanuel Tikhonov <122638311+immanuwell@users.noreply.github.com> Date: Wed, 17 Jun 2026 23:06:33 +0400 Subject: [PATCH 22/31] fix: handle config-reloader secret watch tombstones (#2303) * fix: handle config-reloader secret watch tombstones Signed-off-by: immanuwell * added changelog --------- Signed-off-by: immanuwell Co-authored-by: Andrii Chubatiuk --- cmd/config-reloader/file_watch_test.go | 41 +--------------------- cmd/config-reloader/k8s_watch.go | 33 ++++++++++++++++-- cmd/config-reloader/k8s_watch_test.go | 48 ++++++++++++++++++++++++++ docs/CHANGELOG.md | 1 + docs/env.md | 14 ++++---- 5 files changed, 87 insertions(+), 50 deletions(-) create mode 100644 cmd/config-reloader/k8s_watch_test.go diff --git a/cmd/config-reloader/file_watch_test.go b/cmd/config-reloader/file_watch_test.go index 02ddd60ad..d38957558 100644 --- a/cmd/config-reloader/file_watch_test.go +++ b/cmd/config-reloader/file_watch_test.go @@ -87,7 +87,7 @@ func TestDirWatcherTriggersUpdateWhenFileRemoved(t *testing.T) { t.Fatalf("failed to write initial file: %v", err) } - dw, err := newDirWatchers([]string{dir}, nil) + dw, err := newDirWatchers([]string{dir}) if err != nil { t.Fatalf("failed to create dir watcher: %v", err) } @@ -108,42 +108,3 @@ func TestDirWatcherTriggersUpdateWhenFileRemoved(t *testing.T) { t.Fatal("expected update after removing watched file") } } - -func TestDirWatcherSyncRemovesDeletedFilesFromTargetDir(t *testing.T) { - srcDir := t.TempDir() - targetDir := t.TempDir() - file := filepath.Join(srcDir, "rules.yaml") - targetFile := filepath.Join(targetDir, "rules.yaml") - if err := os.WriteFile(file, []byte("groups: []\n"), 0o644); err != nil { - t.Fatalf("failed to write initial source file: %v", err) - } - - dw, err := newDirWatchers([]string{srcDir}, []string{targetDir}) - if err != nil { - t.Fatalf("failed to create dir watcher: %v", err) - } - - updates := make(chan struct{}, 10) - ctx, cancel := context.WithCancel(context.Background()) - dw.start(ctx, updates) - defer dw.close() - defer cancel() - - if _, err := os.Stat(targetFile); err != nil { - t.Fatalf("expected file to exist in target dir after initial sync: %v", err) - } - - if err := os.Remove(file); err != nil { - t.Fatalf("failed to remove source file: %v", err) - } - - select { - case <-updates: - case <-time.After(2 * time.Second): - t.Fatal("expected update after removing source file") - } - - if _, err := os.Stat(targetFile); !os.IsNotExist(err) { - t.Fatalf("expected target file to be removed after source deletion, got err=%v", err) - } -} diff --git a/cmd/config-reloader/k8s_watch.go b/cmd/config-reloader/k8s_watch.go index 1dfb159f1..fc5401ef7 100644 --- a/cmd/config-reloader/k8s_watch.go +++ b/cmd/config-reloader/k8s_watch.go @@ -72,7 +72,11 @@ func newKubernetesWatcher(ctx context.Context, secretName, namespace string) (*k syncChan := make(chan syncEvent, 10) if _, err := inf.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(obj interface{}) { - s := obj.(*corev1.Secret) + s, ok := secretFromEvent(obj) + if !ok { + logger.Errorf("cannot process create event for unexpected object type %T", obj) + return + } select { case syncChan <- syncEvent{op: "create", obj: s}: default: @@ -80,7 +84,11 @@ func newKubernetesWatcher(ctx context.Context, secretName, namespace string) (*k } }, UpdateFunc: func(oldObj, newObj interface{}) { - s := newObj.(*corev1.Secret) + s, ok := secretFromEvent(newObj) + if !ok { + logger.Errorf("cannot process update event for unexpected object type %T", newObj) + return + } select { case syncChan <- syncEvent{op: "update", obj: s}: default: @@ -88,7 +96,11 @@ func newKubernetesWatcher(ctx context.Context, secretName, namespace string) (*k } }, DeleteFunc: func(obj interface{}) { - s := obj.(*corev1.Secret) + s, ok := secretFromEvent(obj) + if !ok { + logger.Errorf("cannot process delete event for unexpected object type %T", obj) + return + } select { case syncChan <- syncEvent{op: "delete", obj: s}: default: @@ -102,6 +114,21 @@ func newKubernetesWatcher(ctx context.Context, secretName, namespace string) (*k return &k8sWatcher{inf: inf, c: c, events: syncChan, namespace: namespace, secretName: secretName}, nil } +func secretFromEvent(obj interface{}) (*corev1.Secret, bool) { + switch s := obj.(type) { + case *corev1.Secret: + return s, true + case cache.DeletedFinalStateUnknown: + secret, ok := s.Obj.(*corev1.Secret) + return secret, ok + case *cache.DeletedFinalStateUnknown: + secret, ok := s.Obj.(*corev1.Secret) + return secret, ok + default: + return nil, false + } +} + var errNotModified = fmt.Errorf("file content not modified") func (k *k8sWatcher) load(ctx context.Context) error { diff --git a/cmd/config-reloader/k8s_watch_test.go b/cmd/config-reloader/k8s_watch_test.go new file mode 100644 index 000000000..f06a741e1 --- /dev/null +++ b/cmd/config-reloader/k8s_watch_test.go @@ -0,0 +1,48 @@ +package main + +import ( + "testing" + + corev1 "k8s.io/api/core/v1" + "k8s.io/client-go/tools/cache" +) + +func TestSecretFromEvent(t *testing.T) { + secret := &corev1.Secret{} + + t.Run("secret object", func(t *testing.T) { + got, ok := secretFromEvent(secret) + if !ok { + t.Fatal("expected secret object to be accepted") + } + if got != secret { + t.Fatal("expected original secret pointer to be returned") + } + }) + + t.Run("deleted final state unknown value", func(t *testing.T) { + got, ok := secretFromEvent(cache.DeletedFinalStateUnknown{Obj: secret}) + if !ok { + t.Fatal("expected tombstone value to be accepted") + } + if got != secret { + t.Fatal("expected tombstone secret pointer to be returned") + } + }) + + t.Run("deleted final state unknown pointer", func(t *testing.T) { + got, ok := secretFromEvent(&cache.DeletedFinalStateUnknown{Obj: secret}) + if !ok { + t.Fatal("expected tombstone pointer to be accepted") + } + if got != secret { + t.Fatal("expected tombstone secret pointer to be returned") + } + }) + + t.Run("unexpected type", func(t *testing.T) { + if _, ok := secretFromEvent("not-a-secret"); ok { + t.Fatal("expected unexpected object type to be rejected") + } + }) +} diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 7f04f17ba..d02b9f8d3 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -19,6 +19,7 @@ aliases: * FEATURE: [vmoperator](https://docs.victoriametrics.com/operator/): add `victoriametrics_app=true` label to all metrics scraped by the operator. See [#2261](https://github.com/VictoriaMetrics/operator/issues/2261). +* BUGFIX: [config-reloader](https://docs.victoriametrics.com/operator/): fix possible panic on Secret watch events when the informer's local cache fell out of sync and Kubernetes delivered a stale tombstone entry instead of the Secret object. The config-reloader now unwraps tombstones correctly and logs an error for any other unexpected types. * BUGFIX: [vmoperator](https://docs.victoriametrics.com/operator/): update status currentRevision and currentReplicas for StatefulSet with OnDelete update strategy. See [#1242](https://github.com/VictoriaMetrics/operator/issues/1242). * BUGFIX: [vmoperator](https://docs.victoriametrics.com/operator/): retry reconcile errors, that may lead to expanding state, before resource could hang in expanding state. * BUGFIX: [vmcluster](https://docs.victoriametrics.com/operator/resources/vmcluster/), [vlcluster](https://docs.victoriametrics.com/operator/resources/vlcluster/) and [vtcluster](https://docs.victoriametrics.com/operator/resources/vtcluster/): when storage HPA was enabled, generated `-storageNode` flags could become incorrect after scaling, which could break expected routing to storage nodes; now the operator derives storage node count from the current StatefulSet state so generated flags stay correct during HPA-driven scaling. See [#2117](https://github.com/VictoriaMetrics/operator/issues/2117). diff --git a/docs/env.md b/docs/env.md index 810698195..a29b4fa95 100644 --- a/docs/env.md +++ b/docs/env.md @@ -1,12 +1,12 @@ | Environment variables | | --- | -| VM_METRICS_VERSION: `v1.145.0` #
Defines default image version for VictoriaMetrics components: VMSingle, VMCluster (vmselect/vminsert/vmstorage), VMAgent, VMAlert, VMAuth, VMBackup. Used as the image tag when no explicit version is set in the CR spec. | -| VM_LOGS_VERSION: `v1.51.0` #
Defines default image version for VictoriaLogs components: VLogs, VLAgent, VLSingle, VLCluster (vlselect/vlinsert/vlstorage). Used as the image tag when no explicit version is set in the CR spec. | -| VM_ANOMALY_VERSION: `v1.29.3` #
Defines default image version for VMAnomaly. Used as the image tag when no explicit version is set in the CR spec. | -| VM_TRACES_VERSION: `v0.9.3` #
Defines default image version for VictoriaTraces components: VTSingle, VTCluster (vtselect/vtinsert/vtstorage). Used as the image tag when no explicit version is set in the CR spec. | -| VM_OPERATOR_VERSION: `v0.72.0` #
Defines the operator's own version. Used for config-reloader image tag interpolation. | -| VM_GATEWAY_API_ENABLED: `false` #
Enables support for Kubernetes Gateway API. When enabled, operator manages HTTPRoute resources for VMAuth ingress configuration. | -| VM_VPA_API_ENABLED: `false` #
Enables support for VerticalPodAutoscaler API. When enabled, operator can create and manage VPA objects for VM components. | +| VM_METRICS_VERSION: `v1.145.0` # | +| VM_LOGS_VERSION: `v1.51.0` # | +| VM_ANOMALY_VERSION: `v1.29.6` # | +| VM_TRACES_VERSION: `v0.9.3` # | +| VM_OPERATOR_VERSION: `v0.68.3` # | +| VM_GATEWAY_API_ENABLED: `false` # | +| VM_VPA_API_ENABLED: `false` # | | WATCH_NAMESPACE: `-` #
Defines a list of namespaces to be watched by operator. Operator don't perform any cluster wide API calls if namespaces not empty. In case of empty list it performs only clusterwide api calls. | | VM_CONTAINERREGISTRY: `-` #
container registry name prefix, e.g. docker.io | | VM_CUSTOMCONFIGRELOADERIMAGE: `-` #
Deprecated: use VM_CONFIG_RELOADER_IMAGE instead | From d246f0eb73df40c857112b8c79b9d26d24ddbc05 Mon Sep 17 00:00:00 2001 From: Andrii Chubatiuk Date: Sat, 20 Jun 2026 16:05:53 +0300 Subject: [PATCH 23/31] ported anomaly changes --- api/operator/v1/vmanomaly_types.go | 47 ++- config/crd/overlay/crd.descriptionless.yaml | 8 +- config/crd/overlay/crd.yaml | 21 +- docs/api.md | 7 +- .../operator/factory/build/defaults.go | 18 +- .../operator/factory/vmanomaly/config.go | 2 +- .../factory/vmanomaly/config/config.go | 68 +++- .../factory/vmanomaly/config/config_test.go | 325 +++++++++++++++++- .../factory/vmanomaly/config/models.go | 140 +++++--- .../factory/vmanomaly/config/readers.go | 41 +-- .../factory/vmanomaly/config/schedulers.go | 103 ++++-- .../factory/vmanomaly/config/writers.go | 9 +- .../operator/factory/vmanomaly/pod.go | 16 +- .../operator/factory/vmanomaly/statefulset.go | 28 +- .../factory/vmanomaly/statefulset_test.go | 10 + .../vmanomaly/vmanomaly_reconcile_test.go | 70 ++++ 16 files changed, 729 insertions(+), 184 deletions(-) diff --git a/api/operator/v1/vmanomaly_types.go b/api/operator/v1/vmanomaly_types.go index 5dc3a09db..bfe29e9a8 100644 --- a/api/operator/v1/vmanomaly_types.go +++ b/api/operator/v1/vmanomaly_types.go @@ -115,6 +115,10 @@ type VMAnomalyWritersSpec struct { // Metrics to save the output (in metric names or labels) // +optional MetricFormat VMAnomalyVMWriterMetricFormatSpec `json:"metricFormat,omitempty" yaml:"metric_format,omitempty"` + // ConnectionRetryAttempts defines the number of attempts to retry the connection in case of failure + // +optional + // +kubebuilder:validation:Minimum=1 + ConnectionRetryAttempts int `json:"connectionRetryAttempts,omitempty" yaml:"connection_retry_attempts,omitempty"` // +optional VMAnomalyHTTPClientSpec `json:",inline,omitempty" yaml:",inline,omitempty"` } @@ -161,9 +165,12 @@ type VMAnomalyReadersSpec struct { QueryFromLastSeenTimestamp bool `json:"queryFromLastSeenTimestamp,omitempty" yaml:"query_from_last_seen_timestamp,omitempty"` // It allows overriding the default -search.latencyOffsetflag of VictoriaMetrics LatencyOffset string `json:"latencyOffset,omitempty" yaml:"latency_offset,omitempty"` + // Offset adds a time shift to the query window for all queries, e.g. to account for delayed data ingestion + // +optional + Offset string `json:"offset,omitempty" yaml:"offset,omitempty"` // Optional argoverrides how search.maxPointsPerTimeseries flagimpacts vmanomaly on splitting long fitWindow queries into smaller sub-intervals MaxPointsPerQuery int `json:"maxPointsPerQuery,omitempty" yaml:"max_points_per_query,omitempty"` - // Optional argumentspecifies the IANA timezone to account for local shifts, like DST, in models sensitive to seasonal patterns + // Optional argument specifies the IANA timezone to account for local shifts, like DST, in models sensitive to seasonal patterns Timezone string `json:"tz,omitempty" yaml:"tz,omitempty"` // Optional argumentallows defining valid data ranges for input of all the queries in queries DataRange []string `json:"dataRange,omitempty" yaml:"data_range,omitempty"` @@ -180,6 +187,11 @@ type VMAnomalyStatus struct { ParsingSpecError string `json:"-" yaml:"-"` } +// SetLastSpec implements objectWithLastAppliedState interface +func (cr *VMAnomaly) SetLastSpec(prevSpec VMAnomalySpec) { + cr.ParsedLastAppliedSpec = &prevSpec +} + // GetStatusMetadata returns metadata for object status func (cr *VMAnomaly) GetStatusMetadata() *vmv1beta1.StatusMetadata { return &cr.Status.StatusMetadata @@ -205,9 +217,8 @@ type VMAnomaly struct { Spec VMAnomalySpec `json:"spec,omitempty"` // ParsedLastAppliedSpec contains last-applied configuration spec - ParsedLastAppliedSpec *VMAnomalySpec `json:"-" yaml:"-"` - - Status VMAnomalyStatus `json:"status,omitempty"` + ParsedLastAppliedSpec *VMAnomalySpec `json:"-" yaml:"-"` + Status VMAnomalyStatus `json:"status,omitempty"` } // VMAnomalyMonitoringSpec defines configuration for VMAnomaly monitoring @@ -218,7 +229,7 @@ type VMAnomalyMonitoringSpec struct { } // VMAnomalyMonitoringPullSpec defines pull monitoring configuration -// which is enabled by default and served at POD_IP:8490/metrics +// which is enabled by default and served at POD_IP:8080/metrics type VMAnomalyMonitoringPullSpec struct { // Port defines a port for metrics scrape Port string `json:"port"` @@ -253,16 +264,14 @@ type VMAnomalyServerSpec struct { // MaxConcurrentTasks defines maximum number of concurrent anomaly detection tasks // +optional // +kubebuilder:validation:Minimum=1 - // +kubebuilder:validation:Maximum=20 MaxConcurrentTasks int `json:"maxConcurrentTasks,omitempty" yaml:"max_concurrent_tasks,omitempty"` // UIDefaultState defines default query state for anomaly UI // +optional UIDefaultState string `json:"uiDefaultState,omitempty" yaml:"ui_default_state,omitempty"` -} - -// SetLastSpec implements objectWithLastAppliedState interface -func (cr *VMAnomaly) SetLastSpec(prevSpec VMAnomalySpec) { - cr.ParsedLastAppliedSpec = &prevSpec + // UseReaderConnectionSettings when set to true, anomaly UI reuses connection settings + // (credentials, TLS, etc.) from the reader configuration to connect to datasources + // +optional + UseReaderConnectionSettings bool `json:"useReaderConnectionSettings,omitempty" yaml:"use_reader_connection_settings,omitempty"` } // AsOwner returns owner references with current object as owner @@ -304,11 +313,7 @@ func (cr *VMAnomaly) GetStatus() *VMAnomalyStatus { // DefaultStatusFields implements reconcile.ObjectWithDeepCopyAndStatus interface func (cr *VMAnomaly) DefaultStatusFields(vs *VMAnomalyStatus) { - var shardCnt int32 - if cr.IsSharded() { - shardCnt = *cr.Spec.ShardCount - } - vs.Shards = shardCnt + vs.Shards = cr.GetShardCount() } // UnmarshalJSON implements json.Unmarshaler interface @@ -407,7 +412,10 @@ func (cr *VMAnomaly) GetServiceScrape() *vmv1beta1.VMServiceScrapeSpec { // Port returns port for accessing anomaly UI func (cr *VMAnomaly) Port() string { - return cr.Spec.Port + if cr == nil || cr.Spec.Server == nil || len(cr.Spec.Server.Port) == 0 { + return "8490" + } + return cr.Spec.Server.Port } // GetVolumeName returns volume name for persistent storage @@ -438,7 +446,10 @@ func (cr *VMAnomaly) ProbeScheme() string { // ProbePort implements build.probeCRD interface func (cr *VMAnomaly) ProbePort() string { - return cr.Port() + if cr == nil || cr.Spec.Monitoring == nil || cr.Spec.Monitoring.Pull == nil || len(cr.Spec.Monitoring.Pull.Port) == 0 { + return "8080" + } + return cr.Spec.Monitoring.Pull.Port } // ProbeNeedLiveness implements build.probeCRD interface diff --git a/config/crd/overlay/crd.descriptionless.yaml b/config/crd/overlay/crd.descriptionless.yaml index e7d2a3a6b..6a3d2fb90 100644 --- a/config/crd/overlay/crd.descriptionless.yaml +++ b/config/crd/overlay/crd.descriptionless.yaml @@ -12148,6 +12148,8 @@ spec: type: string maxPointsPerQuery: type: integer + offset: + type: string queryFromLastSeenTimestamp: type: boolean queryRangePath: @@ -12317,7 +12319,6 @@ spec: addr: type: string maxConcurrentTasks: - maximum: 20 minimum: 1 type: integer pathPrefix: @@ -12326,6 +12327,8 @@ spec: type: string uiDefaultState: type: string + useReaderConnectionSettings: + type: boolean type: object serviceAccountName: type: string @@ -12641,6 +12644,9 @@ spec: type: object x-kubernetes-map-type: atomic type: object + connectionRetryAttempts: + minimum: 1 + type: integer datasourceURL: type: string healthPath: diff --git a/config/crd/overlay/crd.yaml b/config/crd/overlay/crd.yaml index e098ad8f5..f4f00fd4e 100644 --- a/config/crd/overlay/crd.yaml +++ b/config/crd/overlay/crd.yaml @@ -23287,7 +23287,7 @@ spec: pull: description: |- VMAnomalyMonitoringPullSpec defines pull monitoring configuration - which is enabled by default and served at POD_IP:8490/metrics + which is enabled by default and served at POD_IP:8080/metrics properties: port: description: Port defines a port for metrics scrape @@ -23816,6 +23816,10 @@ spec: flagimpacts vmanomaly on splitting long fitWindow queries into smaller sub-intervals type: integer + offset: + description: Offset adds a time shift to the query window for + all queries, e.g. to account for delayed data ingestion + type: string queryFromLastSeenTimestamp: description: If True, then query will be performed from the last seen timestamp for a given series. @@ -23988,8 +23992,8 @@ spec: type: string type: object tz: - description: Optional argumentspecifies the IANA timezone to account - for local shifts, like DST, in models sensitive to seasonal + description: Optional argument specifies the IANA timezone to + account for local shifts, like DST, in models sensitive to seasonal patterns type: string required: @@ -24122,7 +24126,6 @@ spec: maxConcurrentTasks: description: MaxConcurrentTasks defines maximum number of concurrent anomaly detection tasks - maximum: 20 minimum: 1 type: integer pathPrefix: @@ -24137,6 +24140,11 @@ spec: description: UIDefaultState defines default query state for anomaly UI type: string + useReaderConnectionSettings: + description: |- + UseReaderConnectionSettings when set to true, anomaly UI reuses connection settings + (credentials, TLS, etc.) from the reader configuration to connect to datasources + type: boolean type: object serviceAccountName: description: ServiceAccountName is the name of the ServiceAccount @@ -24872,6 +24880,11 @@ spec: type: object x-kubernetes-map-type: atomic type: object + connectionRetryAttempts: + description: ConnectionRetryAttempts defines the number of attempts + to retry the connection in case of failure + minimum: 1 + type: integer datasourceURL: description: |- DatasourceURL defines remote write url for write requests diff --git a/docs/api.md b/docs/api.md index 967d79047..2d63f04c6 100644 --- a/docs/api.md +++ b/docs/api.md @@ -599,7 +599,7 @@ Appears in: [VMAnomalyMonitoringPushSpec](#vmanomalymonitoringpushspec), [VMAnom #### VMAnomalyMonitoringPullSpec VMAnomalyMonitoringPullSpec defines pull monitoring configuration -which is enabled by default and served at POD_IP:8490/metrics +which is enabled by default and served at POD_IP:8080/metrics Appears in: [VMAnomalyMonitoringSpec](#vmanomalymonitoringspec) @@ -655,13 +655,14 @@ Appears in: [VMAnomalySpec](#vmanomalyspec) | healthPath#
_string_ | _(Required)_
HealthPath defines absolute or relative URL address where to check availability of the remote webserver | | latencyOffset#
_string_ | _(Required)_
It allows overriding the default -search.latencyOffsetflag of VictoriaMetrics | | maxPointsPerQuery#
_integer_ | _(Required)_
Optional argoverrides how search.maxPointsPerTimeseries flagimpacts vmanomaly on splitting long fitWindow queries into smaller sub-intervals | +| offset#
_string_ | _(Optional)_
Offset adds a time shift to the query window for all queries, e.g. to account for delayed data ingestion | | queryFromLastSeenTimestamp#
_boolean_ | _(Required)_
If True, then query will be performed from the last seen timestamp for a given series. | | queryRangePath#
_string_ | _(Required)_
Performs PromQL/MetricsQL range query | | samplingPeriod#
_string_ | _(Required)_
Frequency of the points returned | | tenantID#
_string_ | _(Required)_
TenantID defines for VictoriaMetrics Cluster version only, tenants are identified by accountID, accountID:projectID or multitenant. | | timeout#
_string_ | _(Required)_
Timeout for the requests, passed as a string | | tlsConfig#
_[TLSConfig](#tlsconfig)_ | _(Required)_
TLSConfig defines tls connection configuration | -| tz#
_string_ | _(Required)_
Optional argumentspecifies the IANA timezone to account for local shifts, like DST, in models sensitive to seasonal patterns | +| tz#
_string_ | _(Required)_
Optional argument specifies the IANA timezone to account for local shifts, like DST, in models sensitive to seasonal patterns | #### VMAnomalyServerSpec @@ -677,6 +678,7 @@ Appears in: [VMAnomalySpec](#vmanomalyspec) | pathPrefix#
_string_ | _(Optional)_
PathPrefix defines optional URL path prefix for all HTTP routes
If set to 'my-app' or '/my-app', routes will be served under '/my-app/...' | | port#
_string_ | _(Optional)_
Port defines port to listen on | | uiDefaultState#
_string_ | _(Optional)_
UIDefaultState defines default query state for anomaly UI | +| useReaderConnectionSettings#
_boolean_ | _(Optional)_
UseReaderConnectionSettings when set to true, anomaly UI reuses connection settings
(credentials, TLS, etc.) from the reader configuration to connect to datasources | #### VMAnomalySpec @@ -766,6 +768,7 @@ Appears in: [VMAnomalySpec](#vmanomalyspec) | --- | --- | | basicAuth#
_[BasicAuth](#basicauth)_ | _(Required)_
Basic auth defines basic authorization configuration | | bearer#
_[BearerAuth](#bearerauth)_ | _(Required)_
BearerAuth defines authorization with Authorization: Bearer header | +| connectionRetryAttempts#
_integer_ | _(Optional)_
ConnectionRetryAttempts defines the number of attempts to retry the connection in case of failure | | datasourceURL#
_string_ | _(Required)_
DatasourceURL defines remote write url for write requests
provided endpoint must serve /api/v1/import path
vmanomaly joins datasourceURL + "/api/v1/import" | | healthPath#
_string_ | _(Required)_
HealthPath defines absolute or relative URL address where to check availability of the remote webserver | | metricFormat#
_[VMAnomalyVMWriterMetricFormatSpec](#vmanomalyvmwritermetricformatspec)_ | _(Optional)_
Metrics to save the output (in metric names or labels) | diff --git a/internal/controller/operator/factory/build/defaults.go b/internal/controller/operator/factory/build/defaults.go index 8b21e2a97..667f8f82a 100644 --- a/internal/controller/operator/factory/build/defaults.go +++ b/internal/controller/operator/factory/build/defaults.go @@ -322,11 +322,19 @@ func addVMAnomalyDefaults(objI any) { } addDefaultsToCommonParams(&cr.Spec.CommonAppsParams, &cp, &cv) if cr.Spec.Monitoring == nil { - cr.Spec.Monitoring = &vmv1.VMAnomalyMonitoringSpec{ - Pull: &vmv1.VMAnomalyMonitoringPullSpec{ - Port: "8080", - }, - } + cr.Spec.Monitoring = &vmv1.VMAnomalyMonitoringSpec{} + } + if cr.Spec.Monitoring.Pull == nil { + cr.Spec.Monitoring.Pull = &vmv1.VMAnomalyMonitoringPullSpec{} + } + if len(cr.Spec.Monitoring.Pull.Port) == 0 { + cr.Spec.Monitoring.Pull.Port = "8080" + } + if cr.Spec.Server == nil { + cr.Spec.Server = &vmv1.VMAnomalyServerSpec{} + } + if len(cr.Spec.Server.Port) == 0 { + cr.Spec.Server.Port = cv.Port } } diff --git a/internal/controller/operator/factory/vmanomaly/config.go b/internal/controller/operator/factory/vmanomaly/config.go index 5e66e4369..e7a065cda 100644 --- a/internal/controller/operator/factory/vmanomaly/config.go +++ b/internal/controller/operator/factory/vmanomaly/config.go @@ -25,7 +25,7 @@ func createOrUpdateConfig(ctx context.Context, rclient client.Client, cr, prevCR newSecretConfig := &corev1.Secret{ ObjectMeta: build.ResourceMeta(build.SecretConfigResourceKind, cr), Data: map[string][]byte{ - secretConfigKey: data, + configEnvsubstFilename: data, }, } owner := cr.AsOwner() diff --git a/internal/controller/operator/factory/vmanomaly/config/config.go b/internal/controller/operator/factory/vmanomaly/config/config.go index f85d2383b..38c1149c5 100644 --- a/internal/controller/operator/factory/vmanomaly/config/config.go +++ b/internal/controller/operator/factory/vmanomaly/config/config.go @@ -21,6 +21,32 @@ type validatable interface { validate() error } +type PartialConfig struct { + Schedulers map[string]*scheduler `yaml:"schedulers,omitempty"` + Models map[string]*model `yaml:"models,omitempty"` + Queries map[string]*query `yaml:"queries,omitempty"` +} + +func (pc *PartialConfig) Validate() error { + for name, s := range pc.Schedulers { + if s == nil { + return fmt.Errorf("scheduler=%q is nil", name) + } + if err := s.validate(); err != nil { + return fmt.Errorf("failed to validate scheduler=%q: %w", name, err) + } + } + for name, m := range pc.Models { + if m == nil { + return fmt.Errorf("model=%q is nil", name) + } + if err := m.validate(); err != nil { + return fmt.Errorf("failed to validate model=%q: %w", name, err) + } + } + return nil +} + type config struct { Schedulers map[string]*scheduler `yaml:"schedulers,omitempty"` Models map[string]*model `yaml:"models,omitempty"` @@ -33,30 +59,39 @@ type config struct { } type server struct { - Addr string `yaml:"addr,omitempty"` - Port string `yaml:"port,omitempty"` - PathPrefix string `yaml:"path_prefix,omitempty"` - MaxConcurrentTasks int `yaml:"max_concurrent_tasks,omitempty"` - UIDefaultState string `yaml:"ui_default_state,omitempty"` + Addr string `yaml:"addr,omitempty"` + Port string `yaml:"port,omitempty"` + PathPrefix string `yaml:"path_prefix,omitempty"` + MaxConcurrentTasks int `yaml:"max_concurrent_tasks,omitempty"` + UIDefaultState string `yaml:"ui_default_state,omitempty"` + UseReaderConnectionSettings bool `yaml:"use_reader_connection_settings,omitempty"` } func (s *server) validate() error { if s == nil { return nil } - if s.MaxConcurrentTasks != 0 && (s.MaxConcurrentTasks < 1 || s.MaxConcurrentTasks > 20) { - return fmt.Errorf("max_concurrent_tasks must be between 1 and 20, got %d", s.MaxConcurrentTasks) + if s.MaxConcurrentTasks < 0 { + return fmt.Errorf("max_concurrent_tasks must be a positive integer, got %d", s.MaxConcurrentTasks) } return nil } +type retention struct { + TTL duration `yaml:"ttl,omitempty"` + CheckInterval duration `yaml:"check_interval,omitempty"` +} + type settings struct { - Workers int `yaml:"n_workers,omitempty"` - ScoreOutsideRange float64 `yaml:"anomaly_score_outside_data_range,omitempty"` - RestoreState bool `yaml:"restore_state,omitempty"` + Workers int `yaml:"n_workers,omitempty"` + // ScoreOutsideRange is a pointer so an explicit 0.0 survives marshalling. + ScoreOutsideRange *float64 `yaml:"anomaly_score_outside_data_range,omitempty"` + RestoreState bool `yaml:"restore_state,omitempty"` + Retention *retention `yaml:"retention,omitempty"` + LoggerLevels map[string]string `yaml:"logger_levels,omitempty"` } -func (c *config) override(cr *vmv1.VMAnomaly, ac *build.AssetsCache) error { +func (c *config) build(cr *vmv1.VMAnomaly, ac *build.AssetsCache) error { crCanonicalName := strings.Join([]string{cr.Namespace, cr.Name}, "/") if cr.Spec.Server != nil { srv := cr.Spec.Server @@ -72,6 +107,8 @@ func (c *config) override(cr *vmv1.VMAnomaly, ac *build.AssetsCache) error { } c.Preset = strings.ToLower(c.Preset) if strings.HasPrefix(c.Preset, "ui") { + s := new(noopScheduler) + s.setClass("noop") c.Reader = &reader{ Class: "noop", } @@ -80,9 +117,7 @@ func (c *config) override(cr *vmv1.VMAnomaly, ac *build.AssetsCache) error { } c.Schedulers = map[string]*scheduler{ "noop": { - validatable: &noopScheduler{ - Class: "noop", - }, + anomalyScheduler: s, }, } c.Models = map[string]*model{ @@ -98,7 +133,7 @@ func (c *config) override(cr *vmv1.VMAnomaly, ac *build.AssetsCache) error { c.Monitoring = &monitoring{ Pull: &endpoint{ Addr: "0.0.0.0", - Port: cr.Spec.Monitoring.Pull.Port, + Port: cr.ProbePort(), }, } return nil @@ -166,6 +201,7 @@ func (c *config) override(cr *vmv1.VMAnomaly, ac *build.AssetsCache) error { } c.Monitoring = &m } + return nil } @@ -353,7 +389,7 @@ func Load(cr *vmv1.VMAnomaly, ac *build.AssetsCache) ([]byte, error) { if err != nil { return nil, fmt.Errorf("failed to unmarshal anomaly configuration, name=%q: %w", cr.Name, err) } - if err = c.override(cr, ac); err != nil { + if err = c.build(cr, ac); err != nil { return nil, fmt.Errorf("failed to update secret values with values from anomaly instance, name=%q: %w", cr.Name, err) } if err = c.validate(); err != nil { diff --git a/internal/controller/operator/factory/vmanomaly/config/config_test.go b/internal/controller/operator/factory/vmanomaly/config/config_test.go index d3157ca58..2fff9f8d5 100644 --- a/internal/controller/operator/factory/vmanomaly/config/config_test.go +++ b/internal/controller/operator/factory/vmanomaly/config/config_test.go @@ -179,6 +179,7 @@ settings: "label2": "value2", }, }, + ConnectionRetryAttempts: 3, VMAnomalyHTTPClientSpec: vmv1.VMAnomalyHTTPClientSpec{ TenantID: "0:2", TLSConfig: &vmv1beta1.TLSConfig{ @@ -211,6 +212,7 @@ settings: DatasourceURL: "http://custom.ds", QueryRangePath: "/api/v1/query_range", SamplingPeriod: "10s", + Offset: "5m", VMAnomalyHTTPClientSpec: vmv1.VMAnomalyHTTPClientSpec{ TenantID: "0:1", TLSConfig: &vmv1beta1.TLSConfig{ @@ -272,6 +274,7 @@ reader: datasource_url: http://custom.ds sampling_period: 10s query_range_path: /api/v1/query_range + offset: 5m queries: test: expr: vm_metric @@ -290,6 +293,7 @@ writer: for: custom_$QUERY_KEY label1: value1 label2: value2 + connection_retry_attempts: 3 tenant_id: "0:2" verify_tls: /test/monitoring_tls_remote-ca tls_cert_file: /test/monitoring_tls_remote-cert @@ -649,10 +653,11 @@ writer: DatasourceURL: "http://writer.test", }, Server: &vmv1.VMAnomalyServerSpec{ - Addr: "127.0.0.1", - Port: "9090", - PathPrefix: "my-anomaly", - MaxConcurrentTasks: 10, + Addr: "127.0.0.1", + Port: "9090", + PathPrefix: "my-anomaly", + MaxConcurrentTasks: 10, + UseReaderConnectionSettings: true, }, }, }, @@ -687,6 +692,7 @@ server: port: "9090" path_prefix: my-anomaly max_concurrent_tasks: 10 + use_reader_connection_settings: true `, }) @@ -763,6 +769,8 @@ writer: monitoring: pull: port: "8080" +server: + port: "8490" `, }) @@ -785,6 +793,7 @@ models: scale: [0.5, 1.5] min_subseason: hourly decay: 0.5 + global_smoothing: 0.5 schedulers: scheduler_1m: class: "scheduler.periodic.PeriodicScheduler" @@ -795,6 +804,7 @@ reader: queries: test_query: expr: vm_metric + offset: 1m writer: datasource_url: "http://test.com" `, @@ -818,6 +828,7 @@ models: - 1.5 decay: 0.5 min_subseason: hourly + global_smoothing: 0.5 schedulers: scheduler_1m: class: scheduler.periodic.PeriodicScheduler @@ -831,16 +842,109 @@ reader: queries: test_query: expr: vm_metric + offset: 1m writer: class: vm datasource_url: http://writer.test monitoring: pull: port: "8080" +server: + port: "8490" +`, + }) + + // ui preset with nil monitoring - must not panic + f(opts{ + cr: &vmv1.VMAnomaly{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-anomaly-ui", + Namespace: "monitoring", + }, + Spec: vmv1.VMAnomalySpec{ + License: &vmv1beta1.License{ + Key: ptr.To("test"), + }, + ConfigRawYaml: `preset: ui`, + Server: &vmv1.VMAnomalyServerSpec{ + PathPrefix: "/", + }, + // Monitoring intentionally nil to reproduce the panic + }, + }, + expected: ` +models: + placeholder: + class: zscore + schedulers: + - noop +schedulers: + noop: + class: noop +reader: + class: noop + datasource_url: "" + sampling_period: null +writer: + class: noop + datasource_url: "" +monitoring: + pull: + addr: 0.0.0.0 + port: "8080" +server: + port: "8490" + path_prefix: / +preset: ui +`, + }) + + // ui preset with explicit monitoring pull port + f(opts{ + cr: &vmv1.VMAnomaly{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-anomaly-ui-monitoring", + Namespace: "monitoring", + }, + Spec: vmv1.VMAnomalySpec{ + License: &vmv1beta1.License{ + Key: ptr.To("test"), + }, + ConfigRawYaml: `preset: ui`, + Monitoring: &vmv1.VMAnomalyMonitoringSpec{ + Pull: &vmv1.VMAnomalyMonitoringPullSpec{ + Port: "9999", + }, + }, + }, + }, + expected: ` +models: + placeholder: + class: zscore + schedulers: + - noop +schedulers: + noop: + class: noop +reader: + class: noop + datasource_url: "" + sampling_period: null +writer: + class: noop + datasource_url: "" +monitoring: + pull: + addr: 0.0.0.0 + port: "9999" +server: + port: "8490" +preset: ui `, }) - // server section validation error - maxConcurrentTasks out of range + // server section validation error - maxConcurrentTasks must be a positive integer f(opts{ cr: &vmv1.VMAnomaly{ ObjectMeta: metav1.ObjectMeta{ @@ -878,10 +982,219 @@ writer: DatasourceURL: "http://writer.test", }, Server: &vmv1.VMAnomalyServerSpec{ - MaxConcurrentTasks: 25, // out of range (1-20) + MaxConcurrentTasks: -1, // negative is invalid; vmanomaly imposes no upper bound }, }, }, wantErr: true, }) + + // tz is serialized as a string (reader/query/scheduler), an explicit zero + // anomaly_score_outside_data_range survives marshalling, and an unset decay is omitted + f(opts{ + cr: &vmv1.VMAnomaly{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-anomaly", + Namespace: "monitoring", + }, + Spec: vmv1.VMAnomalySpec{ + License: &vmv1beta1.License{Key: ptr.To("test")}, + ConfigRawYaml: ` +settings: + anomaly_score_outside_data_range: 0 +models: + m_online: + class: zscore_online + queries: ['q1'] +schedulers: + s1: + class: periodic + infer_every: 1m + fit_window: 1h + tz: "Europe/Kyiv" +reader: + queries: + q1: + expr: up + tz: "America/New_York" +writer: {} +`, + Reader: &vmv1.VMAnomalyReadersSpec{ + DatasourceURL: "http://reader.test", + SamplingPeriod: "30s", + Timezone: "UTC", + }, + Writer: &vmv1.VMAnomalyWritersSpec{ + DatasourceURL: "http://writer.test", + }, + }, + }, + expected: ` +models: + m_online: + class: zscore_online + queries: + - q1 +schedulers: + s1: + class: periodic + fit_window: 1h + infer_every: 1m + tz: Europe/Kyiv +reader: + class: vm + datasource_url: http://reader.test + sampling_period: 30s + tz: UTC + queries: + q1: + expr: up + tz: America/New_York +writer: + class: vm + datasource_url: http://writer.test +monitoring: + pull: + port: "8080" +settings: + anomaly_score_outside_data_range: 0 +server: + port: "8490" +`, + }) + + // contamination accepts a float + f(opts{ + cr: &vmv1.VMAnomaly{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-anomaly", + Namespace: "monitoring", + }, + Spec: vmv1.VMAnomalySpec{ + License: &vmv1beta1.License{Key: ptr.To("test")}, + ConfigRawYaml: ` +models: + m_iforest: + class: isolation_forest + queries: ['q1'] + contamination: 0.05 +schedulers: + s1: + class: periodic + infer_every: 1m + fit_window: 1h +reader: + queries: + q1: + expr: up +writer: {} +`, + Reader: &vmv1.VMAnomalyReadersSpec{ + DatasourceURL: "http://reader.test", + SamplingPeriod: "30s", + }, + Writer: &vmv1.VMAnomalyWritersSpec{ + DatasourceURL: "http://writer.test", + }, + Server: &vmv1.VMAnomalyServerSpec{ + MaxConcurrentTasks: 50, // no upper bound + }, + }, + }, + expected: ` +models: + m_iforest: + class: isolation_forest + queries: + - q1 + contamination: 0.05 +schedulers: + s1: + class: periodic + fit_window: 1h + infer_every: 1m +reader: + class: vm + datasource_url: http://reader.test + sampling_period: 30s + queries: + q1: + expr: up +writer: + class: vm + datasource_url: http://writer.test +monitoring: + pull: + port: "8080" +server: + port: "8490" + max_concurrent_tasks: 50 +`, + }) + + // contamination accepts the string "auto" + f(opts{ + cr: &vmv1.VMAnomaly{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-anomaly", + Namespace: "monitoring", + }, + Spec: vmv1.VMAnomalySpec{ + License: &vmv1beta1.License{Key: ptr.To("test")}, + ConfigRawYaml: ` +models: + m_iforest: + class: isolation_forest + queries: ['q1'] + contamination: auto +schedulers: + s1: + class: periodic + infer_every: 1m + fit_window: 1h +reader: + queries: + q1: + expr: up +writer: {} +`, + Reader: &vmv1.VMAnomalyReadersSpec{ + DatasourceURL: "http://reader.test", + SamplingPeriod: "30s", + }, + Writer: &vmv1.VMAnomalyWritersSpec{ + DatasourceURL: "http://writer.test", + }, + }, + }, + expected: ` +models: + m_iforest: + class: isolation_forest + queries: + - q1 + contamination: auto +schedulers: + s1: + class: periodic + fit_window: 1h + infer_every: 1m +reader: + class: vm + datasource_url: http://reader.test + sampling_period: 30s + queries: + q1: + expr: up +writer: + class: vm + datasource_url: http://writer.test +monitoring: + pull: + port: "8080" +server: + port: "8490" +`, + }) + } diff --git a/internal/controller/operator/factory/vmanomaly/config/models.go b/internal/controller/operator/factory/vmanomaly/config/models.go index 177e34718..8d49c9362 100644 --- a/internal/controller/operator/factory/vmanomaly/config/models.go +++ b/internal/controller/operator/factory/vmanomaly/config/models.go @@ -16,67 +16,55 @@ const ( ) type commonModelParams struct { - Class string `yaml:"class"` - Queries []string `yaml:"queries,omitempty"` - Schedulers []string `yaml:"schedulers,omitempty"` - ProvideSeries []string `yaml:"provide_series,omitempty"` - DetectionDirection modelDetectionDirection `yaml:"detection_direction,omitempty"` - MinDevFromExpected float64 `yaml:"min_dev_from_expected,omitempty"` - GroupBy []string `yaml:"groupby,omitempty"` - Scale []float64 `yaml:"scale,omitempty"` - ClipPredictions bool `yaml:"clip_predictions,omitempty"` - ScoreOutsideDataRange float64 `yaml:"anomaly_score_outside_data_range,omitempty"` + Class string `yaml:"class"` + Queries []string `yaml:"queries,omitempty"` + Schedulers []string `yaml:"schedulers,omitempty"` + ProvideSeries []string `yaml:"provide_series,omitempty"` + DetectionDirection modelDetectionDirection `yaml:"detection_direction,omitempty"` + MinDevFromExpected float64 `yaml:"min_dev_from_expected,omitempty"` + GroupBy []string `yaml:"groupby,omitempty"` + Scale []float64 `yaml:"scale,omitempty"` + ClipPredictions bool `yaml:"clip_predictions,omitempty"` + // ScoreOutsideDataRange is a pointer so an explicit 0.0 survives marshalling. + ScoreOutsideDataRange *float64 `yaml:"anomaly_score_outside_data_range,omitempty"` } func (p commonModelParams) queries() []string { return p.Queries } +func (p commonModelParams) addPrefix(prefix string) { + for i := range p.Schedulers { + p.Schedulers[i] = fmt.Sprintf("%s-%s", prefix, p.Schedulers[i]) + } + for i := range p.Queries { + p.Queries[i] = fmt.Sprintf("%s-%s", prefix, p.Queries[i]) + } +} + func (p commonModelParams) schedulers() []string { return p.Schedulers } +func (p *commonModelParams) setClass(class string) { + p.Class = class +} + type anomalyModel interface { validatable + setClass(string) schedulers() []string queries() []string + addPrefix(string) } type model struct { anomalyModel } -var ( - _ yaml.Marshaler = (*model)(nil) - _ yaml.Unmarshaler = (*model)(nil) -) - -// MarshalYAML implements yaml.Marshaller interface -func (m *model) MarshalYAML() (any, error) { - return m.anomalyModel, nil -} - -type onlineModel struct { - Decay float64 `yaml:"decay,omitempty"` -} - -func (m *onlineModel) validate() error { - // See https://docs.victoriametrics.com/anomaly-detection/components/models/#decay - // Valid values are in the range [0, 1]. - if m.Decay < 0 || m.Decay > 1 { - return fmt.Errorf("decay must be in range [0, 1], got %f", m.Decay) - } - return nil -} - -// UnmarshalYAML implements yaml.Unmarshaler interface -func (m *model) UnmarshalYAML(unmarshal func(any) error) error { - var h header - if err := unmarshal(&h); err != nil { - return err - } +func (m *model) init(class string) error { var mdl anomalyModel - switch h.Class { + switch class { case "model.auto.AutoTunedModel", "auto": mdl = new(autoTunedModel) case "model.prophet.ProphetModel", "prophet": @@ -102,12 +90,57 @@ func (m *model) UnmarshalYAML(unmarshal func(any) error) error { case "model.isolation_forest.IsolationForestMultivariateModel", "isolation_forest_multivariate": mdl = new(isolationForestMultivariateModel) default: - return fmt.Errorf("model class=%q is not supported", h.Class) + return fmt.Errorf("model class=%q is not supported", class) } - if err := unmarshal(mdl); err != nil { + m.anomalyModel = mdl + return nil +} + +var ( + _ yaml.Marshaler = (*model)(nil) + _ yaml.Unmarshaler = (*model)(nil) +) + +// Validate validates raw config +func (m *model) Validate(data []byte) error { + if err := yaml.Unmarshal(data, m); err != nil { + return err + } + return m.validate() +} + +// MarshalYAML implements yaml.Marshaller interface +func (m *model) MarshalYAML() (any, error) { + return m.anomalyModel, nil +} + +// UnmarshalYAML implements yaml.Unmarshaler interface +func (m *model) UnmarshalYAML(unmarshal func(any) error) error { + var h header + if err := unmarshal(&h); err != nil { + return err + } + if err := m.init(h.Class); err != nil { + return err + } + if err := unmarshal(m.anomalyModel); err != nil { return err } - m.anomalyModel = mdl + return nil +} + +type onlineModel struct { + // Decay is a pointer to distinguish "unset" (omitted, vmanomaly applies its default) + // from an explicit value, which must be in the range (0, 1]. + Decay *float64 `yaml:"decay,omitempty"` +} + +func (m *onlineModel) validate() error { + // See https://docs.victoriametrics.com/anomaly-detection/components/models/#decay + // Valid values are in the range (0, 1]; unset is allowed and defaulted by vmanomaly. + if m.Decay != nil && (*m.Decay <= 0 || *m.Decay > 1) { + return fmt.Errorf("decay must be in range (0, 1], got %f", *m.Decay) + } return nil } @@ -126,7 +159,7 @@ type autoTunedOptimizationParams struct { OptimizedBusinessParams []string `yaml:"optimized_business_params,omitempty"` Seed int `yaml:"seed,omitempty"` Splits int `yaml:"n_splits,omitempty"` - Trails int `yaml:"n_trails,omitempty"` + Trials int `yaml:"n_trials,omitempty"` Timeout *duration `yaml:"timeout,omitempty"` } @@ -144,9 +177,10 @@ func (m *holtWintersModel) validate() error { type isolationForestModel struct { commonModelParams `yaml:",inline"` - Contamination string `yaml:"contamination,omitempty"` - SeasonalFeatures []string `yaml:"seasonal_features,omitempty"` - Args map[string]any `yaml:"args,omitempty"` + // Contamination is a float (e.g. 0.01) or the string "auto"; vmanomaly accepts both. + Contamination any `yaml:"contamination,omitempty"` + SeasonalFeatures []string `yaml:"seasonal_features,omitempty"` + Args map[string]any `yaml:"args,omitempty"` } func (m *isolationForestModel) validate() error { @@ -155,9 +189,10 @@ func (m *isolationForestModel) validate() error { type isolationForestMultivariateModel struct { commonModelParams `yaml:",inline"` - Contamination string `yaml:"contamination,omitempty"` - SeasonalFeatures []string `yaml:"seasonal_features,omitempty"` - Args map[string]any `yaml:"args,omitempty"` + // Contamination is a float (e.g. 0.01) or the string "auto"; vmanomaly accepts both. + Contamination any `yaml:"contamination,omitempty"` + SeasonalFeatures []string `yaml:"seasonal_features,omitempty"` + Args map[string]any `yaml:"args,omitempty"` } func (m *isolationForestMultivariateModel) validate() error { @@ -193,12 +228,13 @@ type onlineQuantileModel struct { onlineModel `yaml:",inline"` Quantiles []float64 `yaml:"quantiles,omitempty"` SeasonalInterval *duration `yaml:"seasonal_interval,omitempty"` - MinSubseason string `yaml:"min_subseason"` + MinSubseason string `yaml:"min_subseason,omitempty"` UseTransform bool `yaml:"use_transform,omitempty"` - GlobalSmoothing float64 `yaml:"global_smooth,omitempty"` + GlobalSmoothing float64 `yaml:"global_smoothing,omitempty"` SeasonStartsFrom time.Time `yaml:"season_starts_from,omitempty"` MinSamplesSeen int `yaml:"min_n_samples_seen,omitempty"` Compression int `yaml:"compression,omitempty"` + IqrThreshold float64 `yaml:"iqr_threshold,omitempty"` } func (m *onlineQuantileModel) validate() error { diff --git a/internal/controller/operator/factory/vmanomaly/config/readers.go b/internal/controller/operator/factory/vmanomaly/config/readers.go index 068c047d1..9d103c225 100644 --- a/internal/controller/operator/factory/vmanomaly/config/readers.go +++ b/internal/controller/operator/factory/vmanomaly/config/readers.go @@ -5,22 +5,22 @@ import ( "slices" "strconv" "strings" - "time" ) type reader struct { - Class string `yaml:"class"` - DatasourceURL string `yaml:"datasource_url"` - SamplingPeriod *duration `yaml:"sampling_period"` - QueryRangePath string `yaml:"query_range_path,omitempty"` - ExtraFilters []string `yaml:"extra_filters,omitempty"` - QueryFromLastSeenTimestamp bool `yaml:"query_from_last_seen_timestamp,omitempty"` - LatencyOffset *duration `yaml:"latency_offset,omitempty"` - MaxPointsPerQuery int `yaml:"max_points_per_query,omitempty"` - Timezone time.Location `yaml:"tz,omitempty"` - DataRange []string `yaml:"data_range,omitempty"` - Queries map[string]readerQuery `yaml:"queries,omitempty"` - ClientConfig clientConfig `yaml:",inline"` + Class string `yaml:"class"` + DatasourceURL string `yaml:"datasource_url"` + SamplingPeriod *duration `yaml:"sampling_period"` + QueryRangePath string `yaml:"query_range_path,omitempty"` + ExtraFilters []string `yaml:"extra_filters,omitempty"` + QueryFromLastSeenTimestamp bool `yaml:"query_from_last_seen_timestamp,omitempty"` + LatencyOffset *duration `yaml:"latency_offset,omitempty"` + Offset *duration `yaml:"offset,omitempty"` + MaxPointsPerQuery int `yaml:"max_points_per_query,omitempty"` + Timezone string `yaml:"tz,omitempty"` + DataRange []string `yaml:"data_range,omitempty"` + Queries map[string]*query `yaml:"queries,omitempty"` + ClientConfig clientConfig `yaml:",inline"` } func (r *reader) validate() error { @@ -56,11 +56,12 @@ func (r *reader) validate() error { return nil } -type readerQuery struct { - Expr string `yaml:"expr"` - Step *duration `yaml:"step,omitempty"` - DataRange []string `yaml:"data_range,omitempty"` - MaxPointsPerQuery int `yaml:"max_points_per_query,omitempty"` - TZ time.Location `yaml:"tz,omitempty"` - TenantID string `yaml:"tenant_id,omitempty"` +type query struct { + Expr string `yaml:"expr"` + Step *duration `yaml:"step,omitempty"` + DataRange []string `yaml:"data_range,omitempty"` + MaxPointsPerQuery int `yaml:"max_points_per_query,omitempty"` + TZ string `yaml:"tz,omitempty"` + TenantID string `yaml:"tenant_id,omitempty"` + Offset *duration `yaml:"offset,omitempty"` } diff --git a/internal/controller/operator/factory/vmanomaly/config/schedulers.go b/internal/controller/operator/factory/vmanomaly/config/schedulers.go index e425bd2e9..d2b7a5a03 100644 --- a/internal/controller/operator/factory/vmanomaly/config/schedulers.go +++ b/internal/controller/operator/factory/vmanomaly/config/schedulers.go @@ -7,8 +7,13 @@ import ( "gopkg.in/yaml.v2" ) -type scheduler struct { +type anomalyScheduler interface { validatable + setClass(string) +} + +type scheduler struct { + anomalyScheduler } var ( @@ -16,14 +21,32 @@ var ( _ yaml.Unmarshaler = (*scheduler)(nil) ) -// UnmarshalYAML implements yaml.Unmarshaller interface +// Validate validates raw config +func (s *scheduler) Validate(data []byte) error { + if err := yaml.Unmarshal(data, s); err != nil { + return err + } + return s.validate() +} + +// UnmarshalYAML implements yaml.Unmarshaler interface func (s *scheduler) UnmarshalYAML(unmarshal func(any) error) error { var h header if err := unmarshal(&h); err != nil { return err } - var sch validatable - switch h.Class { + if err := s.init(h.Class); err != nil { + return err + } + if err := unmarshal(s.anomalyScheduler); err != nil { + return err + } + return nil +} + +func (s *scheduler) init(class string) error { + var sch anomalyScheduler + switch class { case "scheduler.periodic.PeriodicScheduler", "periodic": sch = new(periodicScheduler) case "scheduler.oneoff.OneoffScheduler", "oneoff": @@ -31,51 +54,58 @@ func (s *scheduler) UnmarshalYAML(unmarshal func(any) error) error { case "scheduler.backtesting.BacktestingScheduler", "backtesting": sch = new(backtestingScheduler) default: - return fmt.Errorf("anomaly scheduler class=%q is not supported", h.Class) + return fmt.Errorf("anomaly scheduler class=%q is not supported", class) } - if err := unmarshal(sch); err != nil { - return err - } - s.validatable = sch + s.anomalyScheduler = sch return nil } // MarshalYAML implements yaml.Marshaler interface func (s *scheduler) MarshalYAML() (any, error) { - return s.validatable, nil + return s.anomalyScheduler, nil } -type noopScheduler struct { +type commonSchedulerParams struct { Class string `yaml:"class"` } +func (p *commonSchedulerParams) setClass(class string) { + p.Class = class +} + +type noopScheduler struct { + commonSchedulerParams `yaml:",inline"` +} + func (s *noopScheduler) validate() error { return nil } +// Docs: https://docs.victoriametrics.com/anomaly-detection/components/scheduler/#parameters-1 type periodicScheduler struct { - Class string `yaml:"class"` - FitEvery *duration `yaml:"fit_every,omitempty"` - FitWindow *duration `yaml:"fit_window"` - InferEvery *duration `yaml:"infer_every"` - StartFrom time.Time `yaml:"start_from,omitempty"` - Timezone time.Location `yaml:"tz,omitempty"` + commonSchedulerParams `yaml:",inline"` + FitEvery *duration `yaml:"fit_every,omitempty"` + FitWindow *duration `yaml:"fit_window"` + InferEvery *duration `yaml:"infer_every"` + StartFrom time.Time `yaml:"start_from,omitempty"` + Timezone string `yaml:"tz,omitempty"` } func (s *periodicScheduler) validate() error { return nil } +// Docs: https://docs.victoriametrics.com/anomaly-detection/components/scheduler/#parameters-2 type oneoffScheduler struct { - Class string `yaml:"class"` - InferStartISO time.Time `yaml:"infer_start_iso,omitempty"` - InferStartS int64 `yaml:"infer_start_s,omitempty"` - InferEndISO time.Time `yaml:"infer_end_iso,omitempty"` - InferEndS int64 `yaml:"infer_end_s,omitempty"` - FitStartISO time.Time `yaml:"fit_start_iso"` - FitStartS int64 `yaml:"fit_start_s"` - FitEndISO time.Time `yaml:"fit_end_iso"` - FitEndS int64 `yaml:"fit_end_s"` + commonSchedulerParams `yaml:",inline"` + InferStartISO time.Time `yaml:"infer_start_iso,omitempty"` + InferStartS int64 `yaml:"infer_start_s,omitempty"` + InferEndISO time.Time `yaml:"infer_end_iso,omitempty"` + InferEndS int64 `yaml:"infer_end_s,omitempty"` + FitStartISO time.Time `yaml:"fit_start_iso"` + FitStartS int64 `yaml:"fit_start_s"` + FitEndISO time.Time `yaml:"fit_end_iso"` + FitEndS int64 `yaml:"fit_end_s"` } func (s *oneoffScheduler) validate() error { @@ -124,16 +154,19 @@ func (s *oneoffScheduler) validate() error { return nil } +// Docs: https://docs.victoriametrics.com/anomaly-detection/components/scheduler/#parameters-3 type backtestingScheduler struct { - Class string `yaml:"class"` - FitWindow *duration `yaml:"fit_window"` - FromISO time.Time `yaml:"from_iso"` - FromS int64 `yaml:"from_s"` - ToISO time.Time `yaml:"to_iso"` - ToS int64 `yaml:"to_s"` - FitEvery *duration `yaml:"fit_every"` - Jobs int `yaml:"n_jobs,omitempty"` - InferenceOnly bool `yaml:"inference_only,omitempty"` + commonSchedulerParams `yaml:",inline"` + FitWindow *duration `yaml:"fit_window"` + FromISO time.Time `yaml:"from_iso"` + FromS int64 `yaml:"from_s"` + ToISO time.Time `yaml:"to_iso"` + ToS int64 `yaml:"to_s"` + FitEvery *duration `yaml:"fit_every"` + Jobs int `yaml:"n_jobs,omitempty"` + InferenceOnly bool `yaml:"inference_only,omitempty"` + Exact bool `yaml:"exact,omitempty"` + InferEvery *duration `yaml:"infer_every,omitempty"` } func (s *backtestingScheduler) validate() error { diff --git a/internal/controller/operator/factory/vmanomaly/config/writers.go b/internal/controller/operator/factory/vmanomaly/config/writers.go index e77a72850..214e71f27 100644 --- a/internal/controller/operator/factory/vmanomaly/config/writers.go +++ b/internal/controller/operator/factory/vmanomaly/config/writers.go @@ -8,10 +8,11 @@ import ( // Ref: https://docs.victoriametrics.com/anomaly-detection/components/writer/#vm-writer type writer struct { - Class string `yaml:"class"` - DatasourceURL string `yaml:"datasource_url"` - MetricFormat *writerMetricFormat `yaml:"metric_format,omitempty"` - ClientConfig clientConfig `yaml:",inline"` + Class string `yaml:"class"` + DatasourceURL string `yaml:"datasource_url"` + MetricFormat *writerMetricFormat `yaml:"metric_format,omitempty"` + ConnectionRetryAttempts int `yaml:"connection_retry_attempts,omitempty"` + ClientConfig clientConfig `yaml:",inline"` } func (w *writer) validate() error { diff --git a/internal/controller/operator/factory/vmanomaly/pod.go b/internal/controller/operator/factory/vmanomaly/pod.go index 7e2d0e547..689aea242 100644 --- a/internal/controller/operator/factory/vmanomaly/pod.go +++ b/internal/controller/operator/factory/vmanomaly/pod.go @@ -18,13 +18,12 @@ import ( ) const ( - secretConfigKey = "vmanomaly.yaml" - anomalyDir = "/etc/vmanomaly" - confDir = anomalyDir + "/config" - confFile = confDir + "/vmanomaly.yaml" - tlsAssetsDir = anomalyDir + "/tls" - storageDir = "/storage" - configVolumeName = "config-volume" + anomalyDir = "/etc/vmanomaly" + confDir = anomalyDir + "/config" + tlsAssetsDir = anomalyDir + "/tls" + storageDir = "/storage" + configVolumeName = "config" + configEnvsubstFilename = "vmanomaly.env.yaml" ) func newPodSpec(cr *vmv1.VMAnomaly, ac *build.AssetsCache) (*corev1.PodSpec, error) { @@ -163,9 +162,8 @@ func newPodSpec(cr *vmv1.VMAnomaly, ac *build.AssetsCache) (*corev1.PodSpec, err } } } - // vmanomaly accepts configuration file as a last element of args - args = append(args, confFile) + args = append(args, path.Join(confDir, configEnvsubstFilename)) container := corev1.Container{ Args: args, diff --git a/internal/controller/operator/factory/vmanomaly/statefulset.go b/internal/controller/operator/factory/vmanomaly/statefulset.go index f279158b3..f07bdc9d4 100644 --- a/internal/controller/operator/factory/vmanomaly/statefulset.go +++ b/internal/controller/operator/factory/vmanomaly/statefulset.go @@ -31,8 +31,11 @@ func buildScrape(cr *vmv1.VMAnomaly) *vmv1beta1.VMPodScrape { return build.VMPodScrape(cr, "monitoring-http") } -// CreateOrUpdate creates vmanomalyand and builds config for it +// CreateOrUpdate creates vmanomaly and builds config for it func CreateOrUpdate(ctx context.Context, cr *vmv1.VMAnomaly, rclient client.Client) error { + if cr.Paused() { + return nil + } var prevCR *vmv1.VMAnomaly if cr.ParsedLastAppliedSpec != nil { prevCR = cr.DeepCopy() @@ -60,13 +63,7 @@ func CreateOrUpdate(ctx context.Context, cr *vmv1.VMAnomaly, rclient client.Clie } } - rcfg := map[build.ResourceKind]*build.ResourceCfg{ - build.TLSAssetsResourceKind: { - MountDir: tlsAssetsDir, - SecretName: build.ResourceName(build.TLSAssetsResourceKind, cr), - }, - } - ac := build.NewAssetsCache(ctx, rclient, rcfg) + ac := getAssetsCache(ctx, rclient, cr) configHash, err := createOrUpdateConfig(ctx, rclient, cr, prevCR, ac) if err != nil { return err @@ -127,13 +124,12 @@ func newK8sApp(cr *vmv1.VMAnomaly, configHash string, ac *build.AssetsCache) (*a "checksum/config": configHash, }) } - app := &appsv1.StatefulSet{ ObjectMeta: metav1.ObjectMeta{ Name: build.ShardName(cr), Namespace: cr.GetNamespace(), Labels: cr.FinalLabels(), - Annotations: cr.FinalAnnotations(), + Annotations: podAnnotations, OwnerReferences: []metav1.OwnerReference{cr.AsOwner()}, }, Spec: appsv1.StatefulSetSpec{ @@ -147,7 +143,7 @@ func newK8sApp(cr *vmv1.VMAnomaly, configHash string, ac *build.AssetsCache) (*a Template: corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ Labels: build.ShardPodLabels(cr), - Annotations: podAnnotations, + Annotations: cr.PodAnnotations(), }, Spec: *podSpec, }, @@ -289,3 +285,13 @@ func getShard(cr *vmv1.VMAnomaly, appTpl *appsv1.StatefulSet, num int32) (*appsv patchShardContainers(app.Spec.Template.Spec.Containers, num, cr.GetShardCount()) return app, nil } + +func getAssetsCache(ctx context.Context, rclient client.Client, cr *vmv1.VMAnomaly) *build.AssetsCache { + cfg := map[build.ResourceKind]*build.ResourceCfg{ + build.TLSAssetsResourceKind: { + MountDir: tlsAssetsDir, + SecretName: build.ResourceName(build.TLSAssetsResourceKind, cr), + }, + } + return build.NewAssetsCache(ctx, rclient, cfg) +} diff --git a/internal/controller/operator/factory/vmanomaly/statefulset_test.go b/internal/controller/operator/factory/vmanomaly/statefulset_test.go index 981b69623..451ceb1f3 100644 --- a/internal/controller/operator/factory/vmanomaly/statefulset_test.go +++ b/internal/controller/operator/factory/vmanomaly/statefulset_test.go @@ -18,6 +18,7 @@ import ( vmv1 "github.com/VictoriaMetrics/operator/api/operator/v1" vmv1beta1 "github.com/VictoriaMetrics/operator/api/operator/v1beta1" + "github.com/VictoriaMetrics/operator/internal/config" "github.com/VictoriaMetrics/operator/internal/controller/operator/factory/build" "github.com/VictoriaMetrics/operator/internal/controller/operator/factory/k8stools" ) @@ -25,6 +26,7 @@ import ( func TestCreateOrUpdate(t *testing.T) { type opts struct { cr *vmv1.VMAnomaly + cfgMutator func(*config.BaseOperatorConf) validate func(sts *appsv1.StatefulSet, idx int) wantErr bool predefinedObjects []runtime.Object @@ -35,6 +37,14 @@ func TestCreateOrUpdate(t *testing.T) { fclient := k8stools.GetTestClientWithObjects(o.predefinedObjects) build.AddDefaults(fclient.Scheme()) fclient.Scheme().Default(o.cr) + cfg := config.MustGetBaseConfig() + if o.cfgMutator != nil { + defaultCfg := *cfg + o.cfgMutator(cfg) + defer func() { + *config.MustGetBaseConfig() = defaultCfg + }() + } err := CreateOrUpdate(ctx, o.cr, fclient) if o.wantErr { assert.Error(t, err) diff --git a/internal/controller/operator/factory/vmanomaly/vmanomaly_reconcile_test.go b/internal/controller/operator/factory/vmanomaly/vmanomaly_reconcile_test.go index 34c2d3862..50fc4c3df 100644 --- a/internal/controller/operator/factory/vmanomaly/vmanomaly_reconcile_test.go +++ b/internal/controller/operator/factory/vmanomaly/vmanomaly_reconcile_test.go @@ -5,12 +5,16 @@ import ( "testing" "github.com/stretchr/testify/assert" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/utils/ptr" vmv1 "github.com/VictoriaMetrics/operator/api/operator/v1" + vmv1beta1 "github.com/VictoriaMetrics/operator/api/operator/v1beta1" "github.com/VictoriaMetrics/operator/internal/controller/operator/factory/build" "github.com/VictoriaMetrics/operator/internal/controller/operator/factory/k8stools" ) @@ -253,3 +257,69 @@ schedulers: }, }) } + +func TestCreateOrUpdate_Paused(t *testing.T) { + cr := &vmv1.VMAnomaly{ + ObjectMeta: metav1.ObjectMeta{ + Name: "example-anomaly", + Namespace: "default", + }, + Spec: vmv1.VMAnomalySpec{ + ConfigRawYaml: ` +models: + M1: + class: "zscore" + z_threshold: 2.5 + queries: ["q1"] + schedulers: ["S1"] +reader: + queries: + q1: + expr: "sum(up)" +schedulers: + S1: + class: "periodic" + infer_every: "1m" +`, + Reader: &vmv1.VMAnomalyReadersSpec{ + DatasourceURL: "http://reader-url", + SamplingPeriod: "1m", + }, + Writer: &vmv1.VMAnomalyWritersSpec{ + DatasourceURL: "http://writer-url", + }, + CommonAppsParams: vmv1beta1.CommonAppsParams{ + ReplicaCount: ptr.To(int32(1)), + Paused: true, + }, + }, + } + nsn := types.NamespacedName{Namespace: cr.Namespace, Name: cr.PrefixedName()} + fclient := k8stools.GetTestClientWithObjects([]runtime.Object{cr}) + ctx := context.TODO() + build.AddDefaults(fclient.Scheme()) + fclient.Scheme().Default(cr) + + assert.NoError(t, CreateOrUpdate(ctx, cr, fclient)) + + var sts appsv1.StatefulSet + err := fclient.Get(ctx, nsn, &sts) + assert.Error(t, err) + assert.True(t, k8serrors.IsNotFound(err)) + + // unpause and verify reconciliation + cr.Spec.Paused = false + assert.NoError(t, CreateOrUpdate(ctx, cr, fclient)) + err = fclient.Get(ctx, nsn, &sts) + assert.NoError(t, err) + + // pause and update replica count + cr.Spec.Paused = true + cr.Spec.ReplicaCount = ptr.To(int32(2)) + assert.NoError(t, CreateOrUpdate(ctx, cr, fclient)) + + // check that replicas count is not updated + err = fclient.Get(ctx, nsn, &sts) + assert.NoError(t, err) + assert.Equal(t, int32(1), *sts.Spec.Replicas) +} From 15983354474bc7dceaff9852938e5eb432d8b8b7 Mon Sep 17 00:00:00 2001 From: Vadim Rutkovsky Date: Mon, 22 Jun 2026 12:07:08 +0200 Subject: [PATCH 24/31] test: pass operator env vars in upgrade tests --- test/e2e/upgrade/utils.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/e2e/upgrade/utils.go b/test/e2e/upgrade/utils.go index 22654ea6b..89b5c6b95 100644 --- a/test/e2e/upgrade/utils.go +++ b/test/e2e/upgrade/utils.go @@ -101,7 +101,9 @@ func updateOperator(ctx context.Context, k8sClient client.Client, operatorImage, var dep appsv1.Deployment if err := k8sClient.Get(ctx, nsn, &dep); err == nil { - dep.Spec.Template.Spec.Containers[0].Image = operatorImage + container := &dep.Spec.Template.Spec.Containers[0] + container.Image = operatorImage + container.Env = operatorEnvVars(watchNamespace, envs) Expect(k8sClient.Update(ctx, &dep)).ToNot(HaveOccurred()) } else { By("creating ServiceAccount for operator") From 1c44958747958b141e2cb04d10ac63e00c280fe6 Mon Sep 17 00:00:00 2001 From: Vadim Rutkovsky Date: Fri, 12 Jun 2026 18:12:30 +0200 Subject: [PATCH 25/31] test: trim overlapping vmdistributed and vmanomaly tests (#2262) --- Makefile | 18 ++++++-- test/e2e/upgrade/upgrade_test.go | 65 +++++++++++++++++++++------ test/e2e/upgrade/utils.go | 16 ++++++- test/e2e/vmdistributed_test.go | 75 -------------------------------- 4 files changed, 81 insertions(+), 93 deletions(-) diff --git a/Makefile b/Makefile index d2a0cd282..6fb9ba378 100644 --- a/Makefile +++ b/Makefile @@ -9,6 +9,7 @@ TAG ?= $(shell echo $$(git describe --long --all | tr '/' '-')$$( \ git diff-index --quiet HEAD -- || echo '-dirty-'$$( \ git diff-index -u HEAD -- ':!config' ':!docs' | openssl sha1 | cut -d' ' -f2 | cut -c 1-8))) OPERATOR_IMAGE ?= $(REGISTRY)/$(ORG)/$(REPO):$(TAG) +CONFIG_RELOADER_IMAGE ?= $(REGISTRY)/$(ORG)/$(REPO):config-reloader-$(TAG) VERSION ?= $(if $(findstring $(TAG),$(TAG:v%=%)),0.0.0,$(TAG:v%=%)) DATEINFO_TAG ?= $(shell date -u +'%Y%m%d-%H%M%S') NAMESPACE ?= vm @@ -162,7 +163,7 @@ test: manifests generate fmt vet envtest ## Run tests. # Utilize Kind or modify the e2e tests to load the image locally, enabling compatibility with other vendors. .PHONY: test-e2e # Run the e2e tests against a Kind k8s instance that is spun up. test-e2e: load-kind ginkgo crust-gather mirrord - env CGO_ENABLED=1 OPERATOR_IMAGE=$(OPERATOR_IMAGE) REPORTS_DIR=$(shell pwd) CRUST_GATHER_BIN=$(CRUST_GATHER_BIN) $(MIRRORD_BIN) exec -f ./mirrord.json -- $(GINKGO_BIN) \ + env CGO_ENABLED=1 OPERATOR_IMAGE=$(OPERATOR_IMAGE) CONFIG_RELOADER_IMAGE=$(CONFIG_RELOADER_IMAGE) REPORTS_DIR=$(shell pwd) CRUST_GATHER_BIN=$(CRUST_GATHER_BIN) $(MIRRORD_BIN) exec -f ./mirrord.json -- $(GINKGO_BIN) \ -ldflags="-linkmode=external" \ --output-interceptor-mode=none \ -procs=$(E2E_TESTS_CONCURRENCY) \ @@ -210,6 +211,10 @@ docker-build: ## Build docker image with the manager. ${DOCKER_BUILD_ARGS} \ -t $(REGISTRY)/$(ORG)/$(REPO):$(TAG) . +.PHONY: docker-build-config-reloader +docker-build-config-reloader: ## Build docker image with config-reloader. + TAG=config-reloader-$(TAG) COMPONENT=config-reloader ROOT=./cmd/config-reloader $(MAKE) docker-build + build-operator: ROOT=./cmd build-operator: build @@ -318,13 +323,18 @@ undeploy: kustomize ## Undeploy controller from the K8s cluster specified in ~/. $(KUSTOMIZE) build $(OVERLAY) | $(KUBECTL) delete $(if $(NAMESPACE),-n $(NAMESPACE),) --ignore-not-found=$(ignore-not-found) -f - # builds image and loads it into kind. -load-kind: docker-build kind +ensure-kind-cluster: kind if [ "`$(KIND) get clusters`" != "kind" ]; then \ $(KIND) create cluster --config=./kind.yaml; \ else \ $(KUBECTL) cluster-info --context kind-kind; \ - fi; \ - $(KIND) load docker-image $(REGISTRY)/$(ORG)/$(REPO):$(TAG); \ + fi + +load-kind: docker-build docker-build-config-reloader ensure-kind-cluster + if [ "$(CONTAINER_TOOL)" != "podman" ]; then \ + $(KIND) load docker-image $(REGISTRY)/$(ORG)/$(REPO):$(TAG); \ + $(KIND) load docker-image $(CONFIG_RELOADER_IMAGE); \ + fi deploy-kind: OVERLAY=config/base-with-webhook deploy-kind: load-kind deploy diff --git a/test/e2e/upgrade/upgrade_test.go b/test/e2e/upgrade/upgrade_test.go index 46b560438..b8f05339c 100644 --- a/test/e2e/upgrade/upgrade_test.go +++ b/test/e2e/upgrade/upgrade_test.go @@ -59,7 +59,7 @@ var ( {URL: "http://localhost:8428/api/v1/write"}, }, CommonConfigReloaderParams: vmv1beta1.CommonConfigReloaderParams{ - ConfigReloaderImage: "quay.io/victoriametrics/operator:config-reloader-v0.65.0", + ConfigReloaderImage: configReloaderImage(), }, CommonAppsParams: vmv1beta1.CommonAppsParams{ ReplicaCount: ptr.To[int32](1), @@ -101,7 +101,7 @@ var ( vmauth = &vmv1beta1.VMAuth{ Spec: vmv1beta1.VMAuthSpec{ CommonConfigReloaderParams: vmv1beta1.CommonConfigReloaderParams{ - ConfigReloaderImage: "quay.io/victoriametrics/operator:config-reloader-v0.65.0", + ConfigReloaderImage: configReloaderImage(), }, CommonAppsParams: vmv1beta1.CommonAppsParams{ ReplicaCount: ptr.To[int32](1), @@ -193,7 +193,7 @@ var ( vmalert = &vmv1beta1.VMAlert{ Spec: vmv1beta1.VMAlertSpec{ CommonConfigReloaderParams: vmv1beta1.CommonConfigReloaderParams{ - ConfigReloaderImage: "quay.io/victoriametrics/operator:config-reloader-v0.65.0", + ConfigReloaderImage: configReloaderImage(), }, CommonAppsParams: vmv1beta1.CommonAppsParams{ ReplicaCount: ptr.To[int32](1), @@ -309,7 +309,7 @@ var ( vmalertmanager = &vmv1beta1.VMAlertmanager{ Spec: vmv1beta1.VMAlertmanagerSpec{ CommonConfigReloaderParams: vmv1beta1.CommonConfigReloaderParams{ - ConfigReloaderImage: "quay.io/victoriametrics/operator:config-reloader-v0.65.0", + ConfigReloaderImage: configReloaderImage(), }, CommonAppsParams: vmv1beta1.CommonAppsParams{ ReplicaCount: ptr.To[int32](1), @@ -652,10 +652,16 @@ var _ = Describe("operator upgrade", Label("upgrade"), func() { cr.Spec.StatefulMode = true })}, {version: "v0.68.4", cr: with(vlagent)}, - {version: "v0.68.4", cr: with(vlagent, func(cr *vmv1.VLAgent) { - cr.Spec.K8sCollector.Enabled = true - cr.Spec.ServiceAccountName = "vlagent-collector" + {version: "v0.68.4", cr: vlagentK8sCollector}, + {version: "v0.68.5", cr: with(vmagent)}, + {version: "v0.68.5", cr: with(vmagent, func(cr *vmv1beta1.VMAgent) { + cr.Spec.DaemonSetMode = true + })}, + {version: "v0.68.5", cr: with(vmagent, func(cr *vmv1beta1.VMAgent) { + cr.Spec.StatefulMode = true })}, + {version: "v0.68.5", cr: with(vlagent)}, + {version: "v0.68.5", cr: vlagentK8sCollector}, }, }, // nolint:dupl @@ -689,7 +695,9 @@ var _ = Describe("operator upgrade", Label("upgrade"), func() { {version: "v0.68.4", cr: with(vmalert)}, {version: "v0.68.4", cr: with(vmauth)}, {version: "v0.68.4", cr: with(vmalertmanager)}, - {version: "v0.68.4", cr: with(vmanomaly)}, + {version: "v0.68.5", cr: with(vmalert)}, + {version: "v0.68.5", cr: with(vmauth)}, + {version: "v0.68.5", cr: with(vmalertmanager)}, }, }, // nolint:dupl @@ -717,6 +725,9 @@ var _ = Describe("operator upgrade", Label("upgrade"), func() { {version: "v0.68.4", cr: with(vmsingle)}, {version: "v0.68.4", cr: with(vtsingle)}, {version: "v0.68.4", cr: with(vlsingle)}, + {version: "v0.68.5", cr: with(vmsingle)}, + {version: "v0.68.5", cr: with(vtsingle)}, + {version: "v0.68.5", cr: with(vlsingle)}, }, }, // nolint:dupl @@ -751,6 +762,10 @@ var _ = Describe("operator upgrade", Label("upgrade"), func() { {version: "v0.68.4", cr: with(vlcluster, func(cr *vmv1.VLCluster) { cr.Spec.RequestsLoadBalancer.Enabled = true })}, + {version: "v0.68.5", cr: with(vlcluster)}, + {version: "v0.68.5", cr: with(vlcluster, func(cr *vmv1.VLCluster) { + cr.Spec.RequestsLoadBalancer.Enabled = true + })}, }, }, // nolint:dupl @@ -785,6 +800,10 @@ var _ = Describe("operator upgrade", Label("upgrade"), func() { {version: "v0.68.4", cr: with(vtcluster, func(cr *vmv1.VTCluster) { cr.Spec.RequestsLoadBalancer.Enabled = true })}, + {version: "v0.68.5", cr: with(vtcluster)}, + {version: "v0.68.5", cr: with(vtcluster, func(cr *vmv1.VTCluster) { + cr.Spec.RequestsLoadBalancer.Enabled = true + })}, }, }, // nolint:dupl @@ -798,6 +817,7 @@ var _ = Describe("operator upgrade", Label("upgrade"), func() { {version: "v0.68.2", cr: with(vmcluster)}, {version: "v0.68.3", cr: with(vmcluster)}, {version: "v0.68.4", cr: with(vmcluster)}, + {version: "v0.68.5", cr: with(vmcluster)}, }, }, // nolint:dupl @@ -896,6 +916,29 @@ var _ = Describe("operator upgrade", Label("upgrade"), func() { }, } })}, + {version: "v0.68.5", isEnterprise: true, cr: with(vmcluster, func(cr *vmv1beta1.VMCluster) { + cr.Spec.RequestsLoadBalancer.Enabled = true + cr.Spec.VMStorage.Image.Tag = "v1.136.0-enterprise-cluster" + cr.Spec.VMSelect.Image.Tag = "v1.136.0-enterprise-cluster" + cr.Spec.VMInsert.Image.Tag = "v1.136.0-enterprise-cluster" + cr.Spec.RequestsLoadBalancer.Spec.Image.Tag = "v1.136.0-enterprise" + cr.Spec.VMStorage.VMBackup = &vmv1beta1.VMBackup{ + Destination: "fs:///tmp", + DestinationDisableSuffixAdd: true, + Image: vmv1beta1.Image{ + Tag: "v1.136.0-enterprise", + }, + AcceptEULA: true, + } + cr.Spec.License = &vmv1beta1.License{ + KeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: "license", + }, + Key: "key", + }, + } + })}, }, envs: map[string]string{ "VM_LOOPBACK": "localhost", @@ -905,11 +948,7 @@ var _ = Describe("operator upgrade", Label("upgrade"), func() { { name: "VMDistributed", pairs: []crVersionPair{ - {version: "v0.68.0", cr: with(vmdistributed)}, - {version: "v0.68.1", cr: with(vmdistributed)}, - {version: "v0.68.2", cr: with(vmdistributed)}, - {version: "v0.68.3", cr: with(vmdistributed)}, - {version: "v0.68.4", cr: with(vmdistributed)}, + {version: "v0.68.5", cr: with(vmdistributed)}, }, }, })) diff --git a/test/e2e/upgrade/utils.go b/test/e2e/upgrade/utils.go index 89b5c6b95..3d17cc2fc 100644 --- a/test/e2e/upgrade/utils.go +++ b/test/e2e/upgrade/utils.go @@ -4,7 +4,9 @@ import ( "context" "fmt" "maps" + "os" "os/exec" + "strings" "time" "github.com/google/go-cmp/cmp" //nolint:staticcheck @@ -31,6 +33,18 @@ const ( operatorImageBase = "victoriametrics/operator" ) +func configReloaderImage() string { + if image := os.Getenv("CONFIG_RELOADER_IMAGE"); image != "" { + return image + } + image := os.Getenv("OPERATOR_IMAGE") + tagIdx := strings.LastIndex(image, ":") + if tagIdx > strings.LastIndex(image, "/") { + return image[:tagIdx+1] + "config-reloader-" + image[tagIdx+1:] + } + return "" +} + // operatorEnvVars builds the env var list for the operator pod, // TODO[vrutkovs]: do we need to copy it? func operatorEnvVars(watchNamespace string, extraEnvs map[string]string) []corev1.EnvVar { @@ -41,7 +55,7 @@ func operatorEnvVars(watchNamespace string, extraEnvs map[string]string) []corev "VM_PODWAITREADYTIMEOUT": "20s", "VM_PODWAITREADYINTERVALCHECK": "1s", "VM_APPREADYTIMEOUT": "50s", - "VM_CONFIG_RELOADER_IMAGE": "quay.io/victoriametrics/operator:config-reloader-v0.68.3", + "VM_CONFIG_RELOADER_IMAGE": configReloaderImage(), "WATCH_NAMESPACE": watchNamespace, "VM_CONTAINERREGISTRY": "quay.io", } diff --git a/test/e2e/vmdistributed_test.go b/test/e2e/vmdistributed_test.go index 6ad75f1c2..5acaa7811 100644 --- a/test/e2e/vmdistributed_test.go +++ b/test/e2e/vmdistributed_test.go @@ -404,81 +404,6 @@ var _ = Describe("e2e VMDistributed", Label("vm", "vmdistributed"), func() { Expect(ownerRef.Name).To(Equal(cr.Name)) }) - It("should successfully create a VMDistributed with VMCluster references and override spec", func() { - By("creating an initial VMClusters") - nsn.Name = "vmd-override-clusters" - zonesCount := 2 - zs := make([]vmv1alpha1.VMDistributedZone, zonesCount) - vmClusters := make([]*vmv1beta1.VMCluster, zonesCount) - vmAgents := make([]*vmv1beta1.VMAgent, zonesCount) - vmClusterFn := []func(*vmv1beta1.VMClusterSpec){ - func(s *vmv1beta1.VMClusterSpec) { - s.RetentionPeriod = "1w" - }, - func(s *vmv1beta1.VMClusterSpec) { - s.RetentionPeriod = "2w" - }, - } - for i := range zs { - objMeta := metav1.ObjectMeta{ - Namespace: namespace, - Name: fmt.Sprintf("%s-%d", nsn.Name, i+1), - } - zs[i].Name = objMeta.Name - zs[i].VMCluster.Spec = genVMClusterSpec(vmClusterFn[i]) - zs[i].VMCluster.Name = objMeta.Name - zs[i].VMAgent.Name = objMeta.Name - vmClusters[i] = &vmv1beta1.VMCluster{ - ObjectMeta: objMeta, - Spec: genVMClusterSpec(), - } - vmAgents[i] = &vmv1beta1.VMAgent{ - ObjectMeta: objMeta, - Spec: genVMAgentSpec(), - } - } - - var wg sync.WaitGroup - createVMClusters(ctx, &wg, k8sClient, vmClusters...) - createVMAgents(ctx, &wg, k8sClient, vmAgents...) - createVMAuth(ctx, &wg, k8sClient, nsn.Name, namespace) - wg.Wait() - - By("creating a VMDistributed referencing the existing VMCluster with an override spec") - cr := &vmv1alpha1.VMDistributed{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: namespace, - Name: nsn.Name, - }, - Spec: vmv1alpha1.VMDistributedSpec{ - ZoneCommon: vmv1alpha1.VMDistributedZoneCommon{ - ReadyTimeout: &metav1.Duration{Duration: 2 * time.Minute}, - UpdatePause: &metav1.Duration{Duration: 1 * time.Second}, - }, - VMAuth: vmv1alpha1.VMDistributedAuth{Name: nsn.Name}, - Zones: zs, - }, - } - DeferCleanup(func() { - Expect(finalize.SafeDelete(ctx, k8sClient, cr)).ToNot(HaveOccurred()) - }) - Expect(k8sClient.Create(ctx, cr)).ToNot(HaveOccurred()) - - By("waiting for VMDistributed to become operational") - Eventually(func() error { - return expectObjectStatusOperational(ctx, k8sClient, &vmv1alpha1.VMDistributed{}, nsn) - }, eventualDistributedExpandingTimeout).WithContext(ctx).ShouldNot(HaveOccurred()) - verifyOwnerReferences(ctx, cr, vmClusters, namespace) - - By("verifying that the referenced VMClusters have the override applied") - var updatedCluster1, updatedCluster2 vmv1beta1.VMCluster - Expect(k8sClient.Get(ctx, types.NamespacedName{Name: vmClusters[0].Name, Namespace: namespace}, &updatedCluster1)).ToNot(HaveOccurred()) - Expect(updatedCluster1.Spec.RetentionPeriod).To(Equal("1w")) - - Expect(k8sClient.Get(ctx, types.NamespacedName{Name: vmClusters[1].Name, Namespace: namespace}, &updatedCluster2)).ToNot(HaveOccurred()) - Expect(updatedCluster2.Spec.RetentionPeriod).To(Equal("2w")) - }) - It("should apply global overrides before cluster-specific overrides", func() { By("creating initial VMClusters") nsn.Name = "vmd-global-override" From 67250521b6100e69f185c1f7d74208233c1f23af Mon Sep 17 00:00:00 2001 From: Vadim Rutkovsky Date: Fri, 10 Apr 2026 15:54:41 +0200 Subject: [PATCH 26/31] test: skip enterprise tests when license is not available (#2054) --- test/e2e/upgrade/upgrade_test.go | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/test/e2e/upgrade/upgrade_test.go b/test/e2e/upgrade/upgrade_test.go index b8f05339c..63d78e641 100644 --- a/test/e2e/upgrade/upgrade_test.go +++ b/test/e2e/upgrade/upgrade_test.go @@ -21,6 +21,7 @@ import ( vmv1 "github.com/VictoriaMetrics/operator/api/operator/v1" vmv1alpha1 "github.com/VictoriaMetrics/operator/api/operator/v1alpha1" vmv1beta1 "github.com/VictoriaMetrics/operator/api/operator/v1beta1" + "github.com/VictoriaMetrics/operator/test/e2e" "github.com/VictoriaMetrics/operator/test/e2e/suite" ) @@ -424,8 +425,9 @@ var ( ) type crVersionPair struct { - version string - cr client.Object + version string + cr func(string) client.Object + isEnterprise bool } type entry struct { @@ -439,14 +441,18 @@ func entries(es []entry) []TableEntry { var result []TableEntry for _, e := range es { for _, p := range e.pairs { - obj := p.cr.DeepCopyObject().(client.Object) - result = append(result, Entry(fmt.Sprintf("from %s: %s (%T)", p.version, e.name, obj), p.version, e.genDeps, []client.Object{obj}, e.envs)) + obj := p.cr(p.version) + result = append(result, Entry(fmt.Sprintf("from %s: %s (%T)", p.version, e.name, obj), p.version, e.genDeps, []client.Object{obj}, e.envs, p.isEnterprise)) } } return result } -func ensureNoPodRollout(version string, genDeps func(string) []client.Object, objs []client.Object, envs map[string]string) { +func ensureNoPodRollout(version string, genDeps func(string) []client.Object, objs []client.Object, envs map[string]string, isEnterprise bool) { + if isEnterprise && e2e.LICENSE_KEY == "" { + Skip("skipping enterprise test: LICENSE_KEY is not set") + } + namespace := createRandomNamespace(ctx, k8sClient) previousOperatorImage := fmt.Sprintf("quay.io/%s:%s", operatorImageBase, version) @@ -679,25 +685,27 @@ var _ = Describe("operator upgrade", Label("upgrade"), func() { {version: "v0.68.0", cr: with(vmalert)}, {version: "v0.68.0", cr: with(vmauth)}, {version: "v0.68.0", cr: with(vmalertmanager)}, - {version: "v0.68.0", cr: with(vmanomaly)}, + {version: "v0.68.0", cr: with(vmanomaly), isEnterprise: true}, {version: "v0.68.1", cr: with(vmalert)}, {version: "v0.68.1", cr: with(vmauth)}, {version: "v0.68.1", cr: with(vmalertmanager)}, - {version: "v0.68.1", cr: with(vmanomaly)}, + {version: "v0.68.1", cr: with(vmanomaly), isEnterprise: true}, {version: "v0.68.2", cr: with(vmalert)}, {version: "v0.68.2", cr: with(vmauth)}, {version: "v0.68.2", cr: with(vmalertmanager)}, - {version: "v0.68.2", cr: with(vmanomaly)}, + {version: "v0.68.2", cr: with(vmanomaly), isEnterprise: true}, {version: "v0.68.3", cr: with(vmalert)}, {version: "v0.68.3", cr: with(vmauth)}, {version: "v0.68.3", cr: with(vmalertmanager)}, - {version: "v0.68.3", cr: with(vmanomaly)}, + {version: "v0.68.3", cr: with(vmanomaly), isEnterprise: true}, {version: "v0.68.4", cr: with(vmalert)}, {version: "v0.68.4", cr: with(vmauth)}, {version: "v0.68.4", cr: with(vmalertmanager)}, + {version: "v0.68.4", cr: with(vmanomaly), isEnterprise: true}, {version: "v0.68.5", cr: with(vmalert)}, {version: "v0.68.5", cr: with(vmauth)}, {version: "v0.68.5", cr: with(vmalertmanager)}, + {version: "v0.68.5", cr: with(vmanomaly), isEnterprise: true}, }, }, // nolint:dupl @@ -824,7 +832,7 @@ var _ = Describe("operator upgrade", Label("upgrade"), func() { { name: "VMCluster with VMBackup", pairs: []crVersionPair{ - {version: "v0.65.0", cr: with(vmcluster, func(cr *vmv1beta1.VMCluster) { + {version: "v0.65.0", isEnterprise: true, cr: with(vmcluster, func(cr *vmv1beta1.VMCluster) { cr.Spec.RequestsLoadBalancer.Enabled = true cr.Spec.VMStorage.Image.Tag = "v1.136.0-enterprise-cluster" cr.Spec.VMSelect.Image.Tag = "v1.136.0-enterprise-cluster" @@ -847,7 +855,7 @@ var _ = Describe("operator upgrade", Label("upgrade"), func() { }, } })}, - {version: "v0.68.3", cr: with(vmcluster, func(cr *vmv1beta1.VMCluster) { + {version: "v0.68.3", isEnterprise: true, cr: with(vmcluster, func(cr *vmv1beta1.VMCluster) { cr.Spec.RequestsLoadBalancer.Enabled = true cr.Spec.VMStorage.Image.Tag = "v1.136.0-enterprise-cluster" cr.Spec.VMSelect.Image.Tag = "v1.136.0-enterprise-cluster" @@ -870,7 +878,7 @@ var _ = Describe("operator upgrade", Label("upgrade"), func() { }, } })}, - {version: "v0.68.4", cr: with(vmcluster, func(cr *vmv1beta1.VMCluster) { + {version: "v0.68.4", isEnterprise: true, cr: with(vmcluster, func(cr *vmv1beta1.VMCluster) { cr.Spec.RequestsLoadBalancer.Enabled = true cr.Spec.VMStorage.Image.Tag = "v1.136.0-enterprise-cluster" cr.Spec.VMSelect.Image.Tag = "v1.136.0-enterprise-cluster" From 15aeaa14109a68992462fc5884af64fe84d2d5ac Mon Sep 17 00:00:00 2001 From: Vadim Rutkovsky Date: Tue, 23 Jun 2026 11:43:17 +0200 Subject: [PATCH 27/31] test: set default resources for upgrade tests --- test/e2e/upgrade/utils.go | 1 + 1 file changed, 1 insertion(+) diff --git a/test/e2e/upgrade/utils.go b/test/e2e/upgrade/utils.go index 3d17cc2fc..a7c6304b4 100644 --- a/test/e2e/upgrade/utils.go +++ b/test/e2e/upgrade/utils.go @@ -83,6 +83,7 @@ func operatorEnvVars(watchNamespace string, extraEnvs map[string]string) []corev "MEM": "20Mi", } for _, prefix := range resourceEnvsPrefixes { + envs[fmt.Sprintf("VM_%s_USEDEFAULTRESOURCES", prefix)] = "true" for _, t := range []string{"LIMIT", "REQUEST"} { for rn, rv := range resources { envName := fmt.Sprintf("VM_%s_RESOURCE_%s_%s", prefix, t, rn) From 0176394a58bd65285ec52315907257b2ab57c9dc Mon Sep 17 00:00:00 2001 From: Vadim Rutkovsky Date: Tue, 23 Jun 2026 12:18:56 +0200 Subject: [PATCH 28/31] test: remove vmanomaly upgrade tests --- test/e2e/upgrade/upgrade_test.go | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/test/e2e/upgrade/upgrade_test.go b/test/e2e/upgrade/upgrade_test.go index 63d78e641..e40e42114 100644 --- a/test/e2e/upgrade/upgrade_test.go +++ b/test/e2e/upgrade/upgrade_test.go @@ -26,7 +26,7 @@ import ( ) var ( - vmanomaly = &vmv1.VMAnomaly{ + _ = &vmv1.VMAnomaly{ Spec: vmv1.VMAnomalySpec{ Reader: &vmv1.VMAnomalyReadersSpec{ DatasourceURL: "http://vmsingle-anomaly.svc:8428", @@ -685,27 +685,21 @@ var _ = Describe("operator upgrade", Label("upgrade"), func() { {version: "v0.68.0", cr: with(vmalert)}, {version: "v0.68.0", cr: with(vmauth)}, {version: "v0.68.0", cr: with(vmalertmanager)}, - {version: "v0.68.0", cr: with(vmanomaly), isEnterprise: true}, {version: "v0.68.1", cr: with(vmalert)}, {version: "v0.68.1", cr: with(vmauth)}, {version: "v0.68.1", cr: with(vmalertmanager)}, - {version: "v0.68.1", cr: with(vmanomaly), isEnterprise: true}, {version: "v0.68.2", cr: with(vmalert)}, {version: "v0.68.2", cr: with(vmauth)}, {version: "v0.68.2", cr: with(vmalertmanager)}, - {version: "v0.68.2", cr: with(vmanomaly), isEnterprise: true}, {version: "v0.68.3", cr: with(vmalert)}, {version: "v0.68.3", cr: with(vmauth)}, {version: "v0.68.3", cr: with(vmalertmanager)}, - {version: "v0.68.3", cr: with(vmanomaly), isEnterprise: true}, {version: "v0.68.4", cr: with(vmalert)}, {version: "v0.68.4", cr: with(vmauth)}, {version: "v0.68.4", cr: with(vmalertmanager)}, - {version: "v0.68.4", cr: with(vmanomaly), isEnterprise: true}, {version: "v0.68.5", cr: with(vmalert)}, {version: "v0.68.5", cr: with(vmauth)}, {version: "v0.68.5", cr: with(vmalertmanager)}, - {version: "v0.68.5", cr: with(vmanomaly), isEnterprise: true}, }, }, // nolint:dupl From 39492eb85bc05c5ca02e44b6d506814bb6a1b6ba Mon Sep 17 00:00:00 2001 From: Nikolay Date: Mon, 22 Jun 2026 13:45:11 +0200 Subject: [PATCH 29/31] internal/config: update VM_METRICS_VERSION to v1.146.0 (#2314) Changelog [v1.146.0](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/docs/victoriametrics/changelog/CHANGELOG.md#v11460) Signed-off-by: f41gh7 --- docs/CHANGELOG.md | 2 +- docs/env.md | 2 +- internal/config/config.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index d02b9f8d3..9ff316bf2 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -13,7 +13,7 @@ aliases: ## tip -* Dependency: [vmoperator](https://docs.victoriametrics.com/operator/): Updated default versions for VM apps to [v1.145.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.145.0) version +* Dependency: [vmoperator](https://docs.victoriametrics.com/operator/): Updated default versions for VM apps to [v1.146.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.146.0) version * Dependency: [vmoperator](https://docs.victoriametrics.com/operator/): Updated default versions for VL apps to [v1.51.0](https://github.com/VictoriaMetrics/VictoriaLogs/releases/tag/v1.51.0). * Dependency: [vmoperator](https://docs.victoriametrics.com/operator/): Updated default versions for VT apps to [v0.9.3](https://github.com/VictoriaMetrics/VictoriaTraces/releases/tag/v0.9.3) version. diff --git a/docs/env.md b/docs/env.md index a29b4fa95..1db604d37 100644 --- a/docs/env.md +++ b/docs/env.md @@ -1,6 +1,6 @@ | Environment variables | | --- | -| VM_METRICS_VERSION: `v1.145.0` # | +| VM_METRICS_VERSION: `v1.146.0` # | | VM_LOGS_VERSION: `v1.51.0` # | | VM_ANOMALY_VERSION: `v1.29.6` # | | VM_TRACES_VERSION: `v0.9.3` # | diff --git a/internal/config/config.go b/internal/config/config.go index b0efd79e4..c9e0bc484 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -35,7 +35,7 @@ var ( initConf sync.Once defaultEnvs = map[string]string{ - "VM_METRICS_VERSION": "v1.145.0", + "VM_METRICS_VERSION": "v1.146.0", "VM_LOGS_VERSION": "v1.51.0", "VM_ANOMALY_VERSION": "v1.29.6", "VM_TRACES_VERSION": "v0.9.3", From 02e98c2d0960d7fbfd475bfc38030786a9b3fa2e Mon Sep 17 00:00:00 2001 From: Vadim Rutkovsky Date: Wed, 24 Jun 2026 13:54:44 +0200 Subject: [PATCH 30/31] fix: set UnderScoreTargetLabel in victoriaMetricsAppRelabelConfig to prevent infinite reconcile loop --- internal/controller/operator/factory/build/vmscrape.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/internal/controller/operator/factory/build/vmscrape.go b/internal/controller/operator/factory/build/vmscrape.go index b24066ea7..d6be44d93 100644 --- a/internal/controller/operator/factory/build/vmscrape.go +++ b/internal/controller/operator/factory/build/vmscrape.go @@ -210,7 +210,8 @@ func addVictoriaMetricsAppRelabelConfig(relabelings *vmv1beta1.EndpointRelabelin func victoriaMetricsAppRelabelConfig() *vmv1beta1.RelabelConfig { return &vmv1beta1.RelabelConfig{ - TargetLabel: "victoriametrics_app", - Replacement: ptr.To("true"), + TargetLabel: "victoriametrics_app", + UnderScoreTargetLabel: "victoriametrics_app", + Replacement: ptr.To("true"), } } From d190123775634a5bd726e181ba9863091fee0827 Mon Sep 17 00:00:00 2001 From: Vadim Rutkovsky Date: Wed, 24 Jun 2026 19:52:08 +0200 Subject: [PATCH 31/31] fix: vlagent path collision in upgrade tests --- test/e2e/upgrade/upgrade_test.go | 54 ++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 13 deletions(-) diff --git a/test/e2e/upgrade/upgrade_test.go b/test/e2e/upgrade/upgrade_test.go index e40e42114..5be5a63ec 100644 --- a/test/e2e/upgrade/upgrade_test.go +++ b/test/e2e/upgrade/upgrade_test.go @@ -95,11 +95,27 @@ var ( corev1.ResourceMemory: resource.MustParse("128Mi"), }, }, - TerminationGracePeriodSeconds: ptr.To(int64(1)), - }, + TerminationGracePeriodSeconds: ptr.To(int64(1)), }, - } - vmauth = &vmv1beta1.VMAuth{ + }, +} +vlagentK8sCollector = withVersion(vlagent, func(cr *vmv1.VLAgent, version string) { + cr.Spec.K8sCollector.Enabled = true + cr.Spec.ServiceAccountName = "vlagent-collector" + tmpPath := fmt.Sprintf("/var/lib/vlagent-data-%s", version) + cr.Spec.TmpDataPath = ptr.To(tmpPath) + cr.Spec.Volumes = append(cr.Spec.Volumes, corev1.Volume{ + Name: "tmp-data", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{Path: tmpPath}, + }, + }) + cr.Spec.VolumeMounts = append(cr.Spec.VolumeMounts, corev1.VolumeMount{ + Name: "tmp-data", + MountPath: tmpPath, + }) +}) +vmauth = &vmv1beta1.VMAuth{ Spec: vmv1beta1.VMAuthSpec{ CommonConfigReloaderParams: vmv1beta1.CommonConfigReloaderParams{ ConfigReloaderImage: configReloaderImage(), @@ -410,12 +426,24 @@ type object[T any] interface { DeepCopy() T } -func with[T object[T]](cr T, opts ...func(T)) T { - obj := cr.DeepCopy() - for _, o := range opts { - o(obj) +func with[T object[T]](cr T, opts ...func(T)) func(string) client.Object { + return func(_ string) client.Object { + obj := cr.DeepCopy() + for _, o := range opts { + o(obj) + } + return any(obj).(client.Object) + } +} + +func withVersion[T object[T]](cr T, opts ...func(T, string)) func(string) client.Object { + return func(version string) client.Object { + obj := cr.DeepCopy() + for _, o := range opts { + o(obj, version) + } + return any(obj).(client.Object) } - return obj } var ( @@ -675,10 +703,10 @@ var _ = Describe("operator upgrade", Label("upgrade"), func() { name: "VMAlert/VMAuth/VMAlertmanager/VMAnomaly", genDeps: func(ns string) []client.Object { return []client.Object{ - with(vmsingle, func(cr *vmv1beta1.VMSingle) { - cr.Name = "anomaly" - cr.Namespace = ns - }), + with(vmsingle, func(cr *vmv1beta1.VMSingle) { + cr.Name = "anomaly" + cr.Namespace = ns + })("latest"), } }, pairs: []crVersionPair{