Skip to content

Commit 3f2e32f

Browse files
committed
Warn about conflicts with ksm or node exporter service monitors (#2211)
Signed-off-by: Pete Wall <[email protected]>
1 parent 86bf6be commit 3f2e32f

File tree

7 files changed

+116
-19
lines changed

7 files changed

+116
-19
lines changed

charts/k8s-monitoring/CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,11 @@
22

33
## 3.7.0
44

5+
* Add an integration for PostgreSQL, including support for Database Observability (@petewall)
56
* Add the ability to set otel_annotations flag for the k8sattributes processor (@petewall)
67
* Add more options to the secretFilter component in the pod logs features (@petewall)
8+
* Check for the presence of kube-state-metrics or Node Exporter ServiceMonitors if clusterMetrics and
9+
prometheusOperatorObjects features are enabled (@petewall)
710

811
## 3.6.2
912

charts/k8s-monitoring/charts/feature-cluster-metrics/templates/_notes.tpl

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,42 @@
1717
Scrape Kubernetes Cluster metrics
1818
{{- end }}
1919

20-
{{- define "feature.clusterMetrics.notes.actions" }}{{- end }}
2120

22-
{{- define "feature.clusterMetrics.summary" -}}
21+
{{- define "feature.clusterMetrics.notes.actions" }}
22+
{{- $serviceMonitorScrapingEnabled := and .Values.prometheusOperatorObjects.enabled (dig "serviceMonitors" "enabled" true .Values.prometheusOperatorObjects)}}
23+
{{- if $serviceMonitorScrapingEnabled }}
24+
{{- $values := .Values.clusterMetrics }}
25+
{{- if (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/ServiceMonitor") }}
26+
{{- $namespaces := list }}
27+
{{- range $serviceMonitor := (lookup "monitoring.coreos.com/v1" "ServiceMonitor" "" "").items }}
28+
{{- if contains "kube-state-metrics" $serviceMonitor.metadata.name }}
29+
{{- if (index $values "kube-state-metrics").enabled }}
30+
{{- $namespaces = append $namespaces $serviceMonitor.metadata.namespace }}
31+
⚠️ Detected a ServiceMonitor named {{ $serviceMonitor.metadata.name }} in namespace {{ $serviceMonitor.metadata.namespace }}, but this chart has already enabled the Cluster Metrics feature. This might result in duplicated metrics from kube-state-metrics.
32+
{{- end }}
33+
{{- end }}
34+
{{- if contains "node-exporter" $serviceMonitor.metadata.name }}
35+
{{- if (index $values "node-exporter").enabled }}
36+
{{- $namespaces = append $namespaces $serviceMonitor.metadata.namespace }}
37+
⚠️ Detected a ServiceMonitor named {{ $serviceMonitor.metadata.name }} in namespace {{ $serviceMonitor.metadata.namespace }}, but this chart has already enabled the Cluster Metrics feature. This might result in duplicated metrics from Node Exporter.
38+
{{- end }}
39+
{{- end }}
40+
{{- end }}
41+
{{- if $namespaces }}
42+
To prevent duplicate metrics, either delete the ServiceMonitor, or filter them out with:
43+
prometheusOperatorObjects:
44+
serviceMonitors:
45+
excludeNamespaces:{{ $namespaces | uniq | toYaml | nindent 6 }}
46+
OR
47+
prometheusOperatorObjects:
48+
serviceMonitors:
49+
labelExpressions: <label expression to exclude the ServiceMonitor>
50+
{{- end }}
51+
{{- end }}
52+
{{- end }}
53+
{{- end }}
54+
55+
{{- define "feature.clusterMetrics.summary" }}
2356
{{- $sources := list }}
2457
{{- if .Values.controlPlane.enabled }}{{- $sources = append $sources "controlPlane" }}{{ end }}
2558
{{- if .Values.kubelet.enabled }}{{- $sources = append $sources "kubelet" }}{{ end }}

charts/k8s-monitoring/templates/NOTES.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@ It will:
2424
{{- $collectorName := (index $.Values $feature).collector }}
2525
{{- $Collector := dict "Namespace" $.Release.Namespace }}
2626
{{- if $collectorName }}
27-
{{- $Collector = merge $Collector (dict "ServiceName" (include "collector.alloy.fullname" (merge $ (dict "collectorName" $collectorName)))) }}
27+
{{- $Collector = merge $Collector (dict "ServiceName" (include "collector.alloy.fullname" (merge (dict "collectorName" $collectorName) $))) }}
2828
{{- end }}
29-
{{- include (printf "feature.%s.notes.actions" $feature) (dict "Values" $.Values "Collector" $Collector) | indent 0 }}
29+
{{- include (printf "feature.%s.notes.actions" $feature) (merge (dict "Collector" $Collector) $) | indent 0 }}
3030
{{- end }}
3131
{{- if $isTranslating }}
3232

charts/k8s-monitoring/templates/features/_feature_cluster_metrics.tpl

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -40,15 +40,23 @@ cluster_metrics "feature" {
4040
{{- define "features.clusterMetrics.collector.values" }}{{- end -}}
4141

4242
{{- define "features.clusterMetrics.validate" }}
43-
{{- if .Values.clusterMetrics.enabled -}}
44-
{{- $featureName := "Kubernetes Cluster metrics" }}
45-
{{- $destinations := include "features.clusterMetrics.destinations" . | fromYamlArray }}
46-
{{- include "destinations.validate_destination_list" (dict "destinations" $destinations "type" "metrics" "ecosystem" "prometheus" "feature" $featureName) }}
47-
{{- range $collector := include "features.clusterMetrics.collectors" . | fromYamlArray }}
48-
{{- include "collectors.require_collector" (dict "Values" $.Values "name" $collector "feature" $featureName) }}
49-
{{- end -}}
43+
{{- if .Values.clusterMetrics.enabled }}
44+
{{- $featureName := "Kubernetes Cluster metrics" }}
45+
{{- $destinations := include "features.clusterMetrics.destinations" . | fromYamlArray }}
46+
{{- include "destinations.validate_destination_list" (dict "destinations" $destinations "type" "metrics" "ecosystem" "prometheus" "feature" $featureName) }}
5047

48+
{{- range $collector := include "features.clusterMetrics.collectors" . | fromYamlArray }}
49+
{{- include "collectors.require_collector" (dict "Values" $.Values "name" $collector "feature" $featureName) }}
50+
{{- end }}
51+
52+
{{- include "features.clusterMetrics.validate.opencost" . }}
53+
{{- include "feature.clusterMetrics.validate" (dict "Values" $.Values.clusterMetrics) }}
54+
{{- end }}
55+
{{- end }}
56+
57+
{{- define "features.clusterMetrics.validate.opencost" }}
5158
{{- if .Values.clusterMetrics.opencost.enabled}}
59+
{{- $destinations := include "features.clusterMetrics.destinations" . | fromYamlArray }}
5260
{{- if ne .Values.cluster.name .Values.clusterMetrics.opencost.opencost.exporter.defaultClusterId }}
5361
{{- $msg := list "" "The OpenCost default cluster id should match the cluster name." }}
5462
{{- $msg = append $msg "Please set:" }}
@@ -73,7 +81,7 @@ cluster_metrics "feature" {
7381
{{- $msg = append $msg (printf "Where <metrics destination name> is one of %s" (include "english_list_or" $destinations)) }}
7482
{{- end }}
7583
{{- fail (join "\n" $msg) }}
76-
{{- end -}}
84+
{{- end }}
7785

7886
{{- $destinationFound := false }}
7987
{{- range $index, $destinationName := $destinations }}
@@ -178,7 +186,7 @@ cluster_metrics "feature" {
178186
{{- fail (join "\n" $msg) }}
179187
{{- end }}
180188
{{- end }}
181-
{{- end -}}
189+
{{- end }}
182190

183191
{{- if eq $destinationFound false }}
184192
{{- $msg := list "" (printf "The destination \"%s\" is not a Prometheus data source." $.Values.clusterMetrics.opencost.metricsSource) }}
@@ -190,9 +198,7 @@ cluster_metrics "feature" {
190198
{{- $msg = append $msg " metricsSource: <metrics destination name>" }}
191199
{{- $msg = append $msg (printf "Where <metrics destination name> is one of %s" (include "english_list_or" $destinations)) }}
192200
{{- fail (join "\n" $msg) }}
193-
{{- end -}}
194-
{{- end -}}
195-
{{- end -}}
196-
{{- include "feature.clusterMetrics.validate" (dict "Values" $.Values.clusterMetrics) }}
201+
{{- end }}
202+
{{- end }}
197203
{{- end -}}
198204
{{- end -}}

charts/k8s-monitoring/tests/__snapshot__/feature_notes_test.yaml.snap

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,3 @@ print the warning about ecosystem translation if the destinations don't match:
1313
* One or more features will have their data translated to a different storage ecosystem. This may result in an imperfect
1414
translation of labels and attributes. For more information, consult:
1515
https://github.com/grafana/k8s-monitoring-helm/tree/main/charts/k8s-monitoring/docs/destinations#ecosystem-translation
16-

charts/k8s-monitoring/tests/feature_notes_test.yaml

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,45 @@ tests:
1818
enabled: true
1919
asserts:
2020
- matchSnapshotRaw: {}
21+
22+
# - it: print the warning about ServiceMonitors if they exist on the system
23+
# capabilities:
24+
# apiVersions: [monitoring.coreos.com/v1/ServiceMonitor]
25+
# kubernetesProvider:
26+
# scheme:
27+
# "monitoring.coreos.com/v1/ServiceMonitor":
28+
# gvr:
29+
# version: "monitoring.coreos.com/v1"
30+
# resource: "servicemonitors"
31+
# namespaced: true
32+
# objects:
33+
# - apiVersion: "monitoring.coreos.com/v1"
34+
# kind: ServiceMonitor
35+
# metadata:
36+
# name: my-kube-state-metrics-service-monitor
37+
# namespace: monitoring
38+
# - apiVersion: "monitoring.coreos.com/v1"
39+
# kind: ServiceMonitor
40+
# metadata:
41+
# name: prometheus-node-exporter
42+
# namespace: monitoring
43+
# - apiVersion: "monitoring.coreos.com/v1"
44+
# kind: ServiceMonitor
45+
# metadata:
46+
# name: redis
47+
# namespace: monitoring
48+
# set:
49+
# cluster:
50+
# name: ci-test-cluster
51+
# destinations:
52+
# - name: test
53+
# type: prometheus
54+
# url: https://prometheus.example.com
55+
# clusterMetrics:
56+
# enabled: true
57+
# prometheusOperatorObjects:
58+
# enabled: true
59+
# alloy-metrics:
60+
# enabled: true
61+
# asserts:
62+
# - matchSnapshotRaw: {}

charts/k8s-monitoring/tests/platform_validations_openshift_test.yaml

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@ templates:
55
tests:
66
- it: asks you to set the platform to OpenShift
77
set:
8-
cluster: {name: openshift-cluster}
8+
cluster:
9+
name: openshift-cluster
910
capabilities:
1011
apiVersions:
1112
- security.openshift.io/v1/SecurityContextConstraints
@@ -15,3 +16,16 @@ tests:
1516
This Kubernetes cluster appears to be OpenShift. Please set the platform to enable compatibility:
1617
global:
1718
platform: openshift
19+
20+
- it: does not ask when the platform is set
21+
set:
22+
global:
23+
platform: openshift
24+
cluster:
25+
name: openshift-cluster
26+
alloy-metrics: {enabled: true, extraConfig: "// Enabled"}
27+
capabilities:
28+
apiVersions:
29+
- security.openshift.io/v1/SecurityContextConstraints
30+
asserts:
31+
- notFailedTemplate: {}

0 commit comments

Comments
 (0)