diff --git a/helm/docs/monitoring-infrastructure.md b/helm/docs/monitoring-infrastructure.md
index d346d131..df683b9f 100644
--- a/helm/docs/monitoring-infrastructure.md
+++ b/helm/docs/monitoring-infrastructure.md
@@ -8,14 +8,20 @@ how they were validated.
The chart provides two categories of monitoring integration:
-1. **Prometheus `prometheus.io/*` annotations** on all Services (always enabled).
- These allow standard Prometheus installations using `kubernetes_sd_configs`
- to auto-discover and scrape CloudZero Agent metrics without any CRDs.
+1. **Prometheus `prometheus.io/*` annotations** on all Services (enabled by
+ default, controlled by `components.monitoring.scrapeAnnotations`). These
+ allow standard Prometheus installations using `kubernetes_sd_configs` to
+ auto-discover and scrape CloudZero Agent metrics without any CRDs.
2. **Prometheus Operator CRDs** (opt-in via `components.monitoring.enabled`).
When enabled, the chart creates `ServiceMonitor` and `PrometheusRule`
resources that the Prometheus Operator automatically picks up.
+When both are active simultaneously, Prometheus deployments that honor both
+annotation-based discovery and ServiceMonitors may scrape each target twice.
+Set `components.monitoring.scrapeAnnotations: false` to disable the annotations
+when using ServiceMonitors.
+
These resources are designed to be useful regardless of the customer's
monitoring stack. The `ServiceMonitor` and `PrometheusRule` CRDs are the
standard interoperability format understood by the Prometheus Operator, but
@@ -32,6 +38,10 @@ components:
# false = never install CRDs (default while feature is being validated)
enabled: false
+ # true (default) = keep prometheus.io/* annotations on Services
+ # false = remove redundant annotations from Services
+ scrapeAnnotations: true
+
# Override namespace for CRDs (default: same as agent namespace)
namespace: ""
@@ -288,11 +298,15 @@ Validated using multiple test scenarios on the `bach` cluster:
Tested via `helm template` with all three modes:
-| `components.monitoring.enabled` | ServiceMonitors | PrometheusRules | `prometheus.io/*` annotations |
-| ------------------------------- | --------------- | --------------- | ----------------------------- |
-| `null` (no CRDs in cluster) | 0 | 0 | 3 (always) |
-| `true` | 4 | 1 | 3 (always) |
-| `false` | 0 | 0 | 3 (always) |
+| `components.monitoring.enabled` | ServiceMonitors | PrometheusRules | `prometheus.io/*` annotations† |
+| ------------------------------- | --------------- | --------------- | ----------------------------------------- |
+| `null` (no CRDs in cluster) | 0 | 0 | 3 |
+| `true` | 4 | 1 | 3 |
+| `false` | 0 | 0 | 3 |
+
+† Annotation count assumes `components.monitoring.scrapeAnnotations: true`
+(default). Set to `false` to omit annotations, e.g. when `enabled` is `true` or
+`"auto"` to avoid duplicate scraping.
### Test Suite
diff --git a/helm/templates/agent-service.yaml b/helm/templates/agent-service.yaml
index eff24e69..ed3037ad 100644
--- a/helm/templates/agent-service.yaml
+++ b/helm/templates/agent-service.yaml
@@ -11,11 +11,15 @@ metadata:
.Values.commonMetaLabels
)
) | nindent 2 }}
+ {{- $promAnnotations := dict -}}
+ {{- if not (eq .Values.components.monitoring.scrapeAnnotations false) -}}
+ {{- $promAnnotations = dict "prometheus.io/scrape" "true" "prometheus.io/port" "9090" "prometheus.io/path" "/metrics" -}}
+ {{- end -}}
{{- include "cloudzero-agent.generateAnnotations" (dict
"root" .
"annotations" (list
.Values.defaults.annotations
- (dict "prometheus.io/scrape" "true" "prometheus.io/port" "9090" "prometheus.io/path" "/metrics")
+ $promAnnotations
)
) | nindent 2 }}
spec:
diff --git a/helm/templates/aggregator-service.yaml b/helm/templates/aggregator-service.yaml
index be32b42b..8fac4514 100644
--- a/helm/templates/aggregator-service.yaml
+++ b/helm/templates/aggregator-service.yaml
@@ -12,12 +12,16 @@ metadata:
.Values.components.aggregator.labels
)
) | nindent 2 }}
+ {{- $promAnnotations := dict -}}
+ {{- if not (eq .Values.components.monitoring.scrapeAnnotations false) -}}
+ {{- $promAnnotations = dict "prometheus.io/scrape" "true" "prometheus.io/port" (.Values.aggregator.collector.port | quote) "prometheus.io/path" "/metrics" -}}
+ {{- end -}}
{{- include "cloudzero-agent.generateAnnotations" (dict
"root" .
"annotations" (list
.Values.defaults.annotations
.Values.components.aggregator.annotations
- (dict "prometheus.io/scrape" "true" "prometheus.io/port" (.Values.aggregator.collector.port | quote) "prometheus.io/path" "/metrics")
+ $promAnnotations
)
) | nindent 2 }}
spec:
diff --git a/helm/templates/webhook-service.yaml b/helm/templates/webhook-service.yaml
index b3703de4..c208a36d 100644
--- a/helm/templates/webhook-service.yaml
+++ b/helm/templates/webhook-service.yaml
@@ -11,13 +11,17 @@ metadata:
.Values.components.webhookServer.labels
)
) | nindent 2 }}
+ {{- $promAnnotations := dict -}}
+ {{- if not (eq .Values.components.monitoring.scrapeAnnotations false) -}}
+ {{- $promAnnotations = dict "prometheus.io/scrape" "true" "prometheus.io/port" "8443" "prometheus.io/path" "/metrics" "prometheus.io/scheme" "https" -}}
+ {{- end -}}
{{- include "cloudzero-agent.generateAnnotations" (dict
"root" .
"annotations" (list
.Values.defaults.annotations
.Values.components.webhookServer.annotations
(dict "nginx.ingress.kubernetes.io/ssl-redirect" "false")
- (dict "prometheus.io/scrape" "true" "prometheus.io/port" "8443" "prometheus.io/path" "/metrics" "prometheus.io/scheme" "https")
+ $promAnnotations
)
) | nindent 2 }}
namespace: {{ .Release.Namespace }}
diff --git a/helm/tests/defaults_service_test.yaml b/helm/tests/defaults_service_test.yaml
index 9aa083d5..7e7f4a59 100644
--- a/helm/tests/defaults_service_test.yaml
+++ b/helm/tests/defaults_service_test.yaml
@@ -2,15 +2,18 @@
#
# This test validates that Service resources properly inherit
# defaults.labels and defaults.annotations from the chart's defaults section.
+# Also tests monitoring.scrapeAnnotations controls prometheus.io/* annotations.
#
# Services only support metadata-level defaults (labels and annotations).
# PodSpec defaults (affinity, tolerations, etc.) do not apply to Services.
#
# Templates tested:
+# - agent-service.yaml
# - aggregator-service.yaml
# - webhook-service.yaml
suite: defaults.* properties apply to Service resources
templates:
+ - agent-service.yaml
- aggregator-service.yaml
- webhook-service.yaml
tests:
@@ -91,3 +94,54 @@ tests:
- equal:
path: metadata.annotations.test-defaults-annotation
value: sentinel-value-annotation
+
+ # ============================================================================
+ # monitoring.scrapeAnnotations tests
+ # ============================================================================
+ - it: should include prometheus.io annotations on agent-service by default
+ template: agent-service.yaml
+ asserts:
+ - equal:
+ path: metadata.annotations["prometheus.io/scrape"]
+ value: "true"
+
+ - it: should omit prometheus.io annotations on agent-service when scrapeAnnotations is false
+ template: agent-service.yaml
+ set:
+ components.monitoring.scrapeAnnotations: false
+ asserts:
+ - isNull:
+ path: metadata.annotations["prometheus.io/scrape"]
+
+ - it: should include prometheus.io annotations on aggregator-service by default
+ template: aggregator-service.yaml
+ asserts:
+ - equal:
+ path: metadata.annotations["prometheus.io/scrape"]
+ value: "true"
+
+ - it: should omit prometheus.io annotations on aggregator-service when scrapeAnnotations is false
+ template: aggregator-service.yaml
+ set:
+ components.monitoring.scrapeAnnotations: false
+ asserts:
+ - isNull:
+ path: metadata.annotations["prometheus.io/scrape"]
+
+ - it: should include prometheus.io annotations on webhook-service by default
+ template: webhook-service.yaml
+ set:
+ insightsController.enabled: true
+ asserts:
+ - equal:
+ path: metadata.annotations["prometheus.io/scrape"]
+ value: "true"
+
+ - it: should omit prometheus.io annotations on webhook-service when scrapeAnnotations is false
+ template: webhook-service.yaml
+ set:
+ insightsController.enabled: true
+ components.monitoring.scrapeAnnotations: false
+ asserts:
+ - isNull:
+ path: metadata.annotations["prometheus.io/scrape"]
diff --git a/helm/values.schema.json b/helm/values.schema.json
index 0f8afc47..b23a586e 100644
--- a/helm/values.schema.json
+++ b/helm/values.schema.json
@@ -6309,6 +6309,10 @@
}
]
},
+ "scrapeAnnotations": {
+ "default": true,
+ "type": "boolean"
+ },
"sharedSecret": {
"default": false,
"type": "boolean"
diff --git a/helm/values.yaml b/helm/values.yaml
index 47e1ad8b..3d75bf4c 100644
--- a/helm/values.yaml
+++ b/helm/values.yaml
@@ -859,11 +859,27 @@ components:
#
# To opt in now, set to "auto" or true.
#
- # Regardless of this setting, prometheus.io/* annotations are always added to
- # Services for customers using standard Prometheus service discovery.
+ # By default, prometheus.io/* annotations are added to Services for customers
+ # using standard Prometheus service discovery. Set monitoring.scrapeAnnotations:
+ # false to disable them when using Prometheus Operator ServiceMonitors to avoid
+ # Prometheus scraping each target twice.
monitoring:
enabled: null
+ # Controls whether prometheus.io/* annotations are added to Services.
+ #
+ # Background: When monitoring.enabled is true, the chart creates
+ # ServiceMonitor CRDs that instruct the Prometheus Operator to scrape
+ # CloudZero Agent metrics. In that setup, the prometheus.io/* annotations on
+ # Services become redundant and in clusters where both annotation-based
+ # and CRD-based discovery are active, same metrics could be scraped twice.
+ #
+ # - true (default): Keep the prometheus.io/* annotations set on Services.
+ # This value ensures backward compatibility
+ #
+ # - false: Remove the redundant prometheus.io/* annotations from Services.
+ scrapeAnnotations: true
+
# Namespace override for PrometheusRule and ServiceMonitor CRDs.
# null (default) = same namespace as the agent installation.
# Some Prometheus Operator deployments require CRDs to be in a specific
diff --git a/tests/helm/template/alloy.yaml b/tests/helm/template/alloy.yaml
index 8ed6edc7..0778c99c 100644
--- a/tests/helm/template/alloy.yaml
+++ b/tests/helm/template/alloy.yaml
@@ -1104,6 +1104,7 @@ data:
enabled: null
labels: {}
namespace: null
+ scrapeAnnotations: true
sharedSecret: false
prometheus:
image: