Skip to content

Commit

Permalink
Merge pull request #774 from zeeke/metrics-exporter-drop-labels
Browse files Browse the repository at this point in the history
[metrics 5/x] Add node label to sriov_* metrics
  • Loading branch information
SchSeba authored Oct 7, 2024
2 parents 31175eb + 3ff1b85 commit aecf473
Show file tree
Hide file tree
Showing 5 changed files with 113 additions and 33 deletions.
16 changes: 8 additions & 8 deletions bindata/manifests/metrics-exporter/metrics-prometheus-rule.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,28 +11,28 @@ spec:
interval: 30s
rules:
- expr: |
sriov_vf_tx_packets * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice
sriov_vf_tx_packets * on (pciAddr,node) group_left(pod,namespace,dev_type) sriov_kubepoddevice
record: network:sriov_vf_tx_packets
- expr: |
sriov_vf_rx_packets * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice
sriov_vf_rx_packets * on (pciAddr,node) group_left(pod,namespace,dev_type) sriov_kubepoddevice
record: network:sriov_vf_rx_packets
- expr: |
sriov_vf_tx_bytes * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice
sriov_vf_tx_bytes * on (pciAddr,node) group_left(pod,namespace,dev_type) sriov_kubepoddevice
record: network:sriov_vf_tx_bytes
- expr: |
sriov_vf_rx_bytes * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice
sriov_vf_rx_bytes * on (pciAddr,node) group_left(pod,namespace,dev_type) sriov_kubepoddevice
record: network:sriov_vf_rx_bytes
- expr: |
sriov_vf_tx_dropped * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice
sriov_vf_tx_dropped * on (pciAddr,node) group_left(pod,namespace,dev_type) sriov_kubepoddevice
record: network:sriov_vf_tx_dropped
- expr: |
sriov_vf_rx_dropped * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice
sriov_vf_rx_dropped * on (pciAddr,node) group_left(pod,namespace,dev_type) sriov_kubepoddevice
record: network:sriov_vf_rx_dropped
- expr: |
sriov_vf_rx_broadcast * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice
sriov_vf_rx_broadcast * on (pciAddr,node) group_left(pod,namespace,dev_type) sriov_kubepoddevice
record: network:sriov_vf_rx_broadcast
- expr: |
sriov_vf_rx_multicast * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice
sriov_vf_rx_multicast * on (pciAddr,node) group_left(pod,namespace,dev_type) sriov_kubepoddevice
record: network:sriov_vf_rx_multicast
{{ end }}

11 changes: 11 additions & 0 deletions bindata/manifests/metrics-exporter/metrics-prometheus.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,17 @@ spec:
bearerTokenFile: "/var/run/secrets/kubernetes.io/serviceaccount/token"
scheme: "https"
honorLabels: true
relabelings:
- action: replace
sourceLabels:
- __meta_kubernetes_endpoint_node_name
targetLabel: node
- action: labeldrop
regex: pod
- action: labeldrop
regex: container
- action: labeldrop
regex: namespace
tlsConfig:
serverName: sriov-network-metrics-exporter-service.{{.Namespace}}.svc
caFile: /etc/prometheus/configmaps/serving-certs-ca-bundle/service-ca.crt
Expand Down
4 changes: 2 additions & 2 deletions hack/run-e2e-conformance-virtual-ocp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -191,9 +191,9 @@ export DEV_MODE=TRUE
export CLUSTER_HAS_EMULATED_PF=TRUE
export OPERATOR_LEADER_ELECTION_ENABLE=true
export METRICS_EXPORTER_PROMETHEUS_OPERATOR_ENABLED=true
export METRICS_EXPORTER_PROMETHEUS_DEPLOY_RULE=true
export METRICS_EXPORTER_PROMETHEUS_DEPLOY_RULES=true
export METRICS_EXPORTER_PROMETHEUS_OPERATOR_SERVICE_ACCOUNT=${METRICS_EXPORTER_PROMETHEUS_OPERATOR_SERVICE_ACCOUNT:-"prometheus-k8s"}
export METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE=${METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE:-"openshfit-monitoring"}
export METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE=${METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE:-"openshift-monitoring"}

export SRIOV_NETWORK_OPERATOR_IMAGE="$registry/$NAMESPACE/sriov-network-operator:latest"
export SRIOV_NETWORK_CONFIG_DAEMON_IMAGE="$registry/$NAMESPACE/sriov-network-config-daemon:latest"
Expand Down
95 changes: 72 additions & 23 deletions test/conformance/tests/test_exporter_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,18 @@ import (
"github.com/prometheus/common/model"

corev1 "k8s.io/api/core/v1"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)

var _ = Describe("[sriov] Metrics Exporter", Ordered, func() {
var _ = Describe("[sriov] Metrics Exporter", Ordered, ContinueOnFailure, func() {
var node string
var nic *sriovv1.InterfaceExt

BeforeAll(func() {
if cluster.VirtualCluster() {
Skip("IGB driver does not support VF statistics")
}

err := namespaces.Create(namespaces.Test, clients)
Expect(err).ToNot(HaveOccurred())

Expand Down Expand Up @@ -73,6 +70,9 @@ var _ = Describe("[sriov] Metrics Exporter", Ordered, func() {
})

It("collects metrics regarding receiving traffic via VF", func() {
if cluster.VirtualCluster() {
Skip("IGB driver does not support VF statistics")
}

pod := createTestPod(node, []string{"test-me-network"})
DeferCleanup(namespaces.CleanPods, namespaces.Test, clients)
Expand All @@ -98,27 +98,76 @@ var _ = Describe("[sriov] Metrics Exporter", Ordered, func() {
Expect(finalRxPackets).Should(BeNumerically(">", initialRxPackets))
})

It("PrometheusRule should provide namespaced metrics", func() {
pod := createTestPod(node, []string{"test-me-network"})
DeferCleanup(namespaces.CleanPods, namespaces.Test, clients)
Context("When Prometheus operator is available", func() {
BeforeEach(func() {
_, err := clients.ServiceMonitors(operatorNamespace).List(context.Background(), metav1.ListOptions{})
if k8serrors.IsNotFound(err) {
Skip("Prometheus operator not available in the cluster")
}
})

namespacedMetricNames := []string{
"network:sriov_vf_rx_bytes",
"network:sriov_vf_tx_bytes",
"network:sriov_vf_rx_packets",
"network:sriov_vf_tx_packets",
"network:sriov_vf_rx_dropped",
"network:sriov_vf_tx_dropped",
"network:sriov_vf_rx_broadcast",
"network:sriov_vf_rx_multicast",
}
It("PrometheusRule should provide namespaced metrics", func() {
pod := createTestPod(node, []string{"test-me-network"})
DeferCleanup(namespaces.CleanPods, namespaces.Test, clients)

namespacedMetricNames := []string{
"network:sriov_vf_rx_bytes",
"network:sriov_vf_tx_bytes",
"network:sriov_vf_rx_packets",
"network:sriov_vf_tx_packets",
"network:sriov_vf_rx_dropped",
"network:sriov_vf_tx_dropped",
"network:sriov_vf_rx_broadcast",
"network:sriov_vf_rx_multicast",
}

Eventually(func(g Gomega) {
for _, metricName := range namespacedMetricNames {
values := runPromQLQuery(fmt.Sprintf(`%s{namespace="%s",pod="%s"}`, metricName, pod.Namespace, pod.Name))
g.Expect(values).ToNot(BeEmpty(), "no value for metric %s", metricName)
Eventually(func(g Gomega) {
for _, metricName := range namespacedMetricNames {
values := runPromQLQuery(fmt.Sprintf(`%s{namespace="%s",pod="%s"}`, metricName, pod.Namespace, pod.Name))
g.Expect(values).ToNot(BeEmpty(), "no value for metric %s", metricName)
}
}, "90s", "1s").Should(Succeed())
})

It("Metrics should have the correct labels", func() {
pod := createTestPod(node, []string{"test-me-network"})
DeferCleanup(namespaces.CleanPods, namespaces.Test, clients)

metricsName := []string{
"sriov_vf_rx_bytes",
"sriov_vf_tx_bytes",
"sriov_vf_rx_packets",
"sriov_vf_tx_packets",
"sriov_vf_rx_dropped",
"sriov_vf_tx_dropped",
"sriov_vf_rx_broadcast",
"sriov_vf_rx_multicast",
}
}, "40s", "1s").Should(Succeed())

Eventually(func(g Gomega) {
for _, metricName := range metricsName {
samples := runPromQLQuery(metricName)
g.Expect(samples).ToNot(BeEmpty(), "no value for metric %s", metricName)
g.Expect(samples[0].Metric).To(And(
HaveKey(model.LabelName("pciAddr")),
HaveKey(model.LabelName("node")),
HaveKey(model.LabelName("pf")),
HaveKey(model.LabelName("vf")),
))
}
}, "90s", "1s").Should(Succeed())

// sriov_kubepoddevice has a different sets of label than statistics metrics
samples := runPromQLQuery(fmt.Sprintf(`sriov_kubepoddevice{namespace="%s",pod="%s"}`, pod.Namespace, pod.Name))
Expect(samples).ToNot(BeEmpty(), "no value for metric sriov_kubepoddevice")
Expect(samples[0].Metric).To(And(
HaveKey(model.LabelName("pciAddr")),
HaveKeyWithValue(model.LabelName("node"), model.LabelValue(pod.Spec.NodeName)),
HaveKeyWithValue(model.LabelName("dev_type"), model.LabelValue("openshift.io/metricsResource")),
HaveKeyWithValue(model.LabelName("namespace"), model.LabelValue(pod.Namespace)),
HaveKeyWithValue(model.LabelName("pod"), model.LabelValue(pod.Name)),
))
})
})
})

Expand Down
20 changes: 20 additions & 0 deletions test/util/k8sreporter/reporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ import (

sriovv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1"
"github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/namespaces"

monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
rbacv1 "k8s.io/api/rbac/v1"
)

func New(reportPath string) (*kniK8sReporter.KubernetesReporter, error) {
Expand All @@ -18,6 +21,17 @@ func New(reportPath string) (*kniK8sReporter.KubernetesReporter, error) {
if err != nil {
return err
}

err = monitoringv1.AddToScheme(s)
if err != nil {
return err
}

err = rbacv1.AddToScheme(s)
if err != nil {
return err
}

return nil
}

Expand All @@ -38,6 +52,8 @@ func New(reportPath string) (*kniK8sReporter.KubernetesReporter, error) {
return true
case multusNamespace != "" && ns == multusNamespace:
return true
case ns == "openshift-monitoring":
return true
}
return false
}
Expand All @@ -47,6 +63,10 @@ func New(reportPath string) (*kniK8sReporter.KubernetesReporter, error) {
{Cr: &sriovv1.SriovNetworkNodePolicyList{}},
{Cr: &sriovv1.SriovNetworkList{}},
{Cr: &sriovv1.SriovOperatorConfigList{}},
{Cr: &monitoringv1.ServiceMonitorList{}, Namespace: &operatorNamespace},
{Cr: &monitoringv1.PrometheusRuleList{}, Namespace: &operatorNamespace},
{Cr: &rbacv1.RoleList{}, Namespace: &operatorNamespace},
{Cr: &rbacv1.RoleBindingList{}, Namespace: &operatorNamespace},
}

err := os.Mkdir(reportPath, 0755)
Expand Down

0 comments on commit aecf473

Please sign in to comment.