Skip to content

Commit

Permalink
Merge branch 'main' into dev-es-removal
Browse files Browse the repository at this point in the history
  • Loading branch information
vishnuchalla authored Oct 7, 2024
2 parents f5bb7d0 + fd4c2d7 commit 4b782a9
Show file tree
Hide file tree
Showing 19 changed files with 57 additions and 69 deletions.
4 changes: 3 additions & 1 deletion cluster-density.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ func NewClusterDensity(wh *workloads.WorkloadHelper, variant string) *cobra.Comm
var churnDelay, churnDuration time.Duration
var churnDeletionStrategy string
var podReadyThreshold time.Duration
var metricsProfiles []string
cmd := &cobra.Command{
Use: variant,
Short: fmt.Sprintf("Runs %v workload", variant),
Expand Down Expand Up @@ -60,7 +61,7 @@ func NewClusterDensity(wh *workloads.WorkloadHelper, variant string) *cobra.Comm
log.Errorf("image-registry deployment is not deployed")
}
}
setMetrics(cmd, "metrics-aggregated.yml")
setMetrics(cmd, metricsProfiles)
wh.Run(cmd.Name())
},
}
Expand All @@ -73,6 +74,7 @@ func NewClusterDensity(wh *workloads.WorkloadHelper, variant string) *cobra.Comm
cmd.Flags().IntVar(&churnPercent, "churn-percent", 10, "Percentage of job iterations that kube-burner will churn each round")
cmd.Flags().StringVar(&churnDeletionStrategy, "churn-deletion-strategy", "default", "Churn deletion strategy to use")
cmd.Flags().BoolVar(&svcLatency, "service-latency", false, "Enable service latency measurement")
cmd.Flags().StringSliceVar(&metricsProfiles, "metrics-profile", []string{"metrics-aggregated.yml"}, "Comma separated list of metrics profiles to use")
cmd.MarkFlagRequired("iterations")
return cmd
}
8 changes: 0 additions & 8 deletions cmd/config/metrics-aggregated.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,6 @@
- query: sum(irate(apiserver_request_total{apiserver="kube-apiserver",verb!="WATCH"}[2m])) by (verb,resource,code) > 0
metricName: APIRequestRate

# Kubeproxy and OVN service sync latency

- query: histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket[2m])) by (le)) > 0
metricName: serviceSyncLatency

- query: histogram_quantile(0.99, sum(rate(ovnkube_master_network_programming_duration_seconds_bucket{kind="service"}[2m])) by (le))
metricName: serviceSyncLatency

# Containers & pod metrics

- query: (sum(irate(container_cpu_usage_seconds_total{name!="",container!="POD",namespace=~"openshift-(etcd|oauth-apiserver|sdn|ovn-kubernetes|network-node-identity|multus|.*apiserver|authentication|.*controller-manager|.*scheduler|image-registry|operator-lifecycle-manager)|cilium|stackrox|calico.*|tigera.*"}[2m]) * 100) by (container, pod, namespace, node) and on (node) kube_node_role{role="master"}) > 0
Expand Down
8 changes: 0 additions & 8 deletions cmd/config/metrics-egressip.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,6 @@
- query: sum(irate(apiserver_request_total{apiserver="kube-apiserver",verb!="WATCH"}[2m])) by (verb,resource,code) > 0
metricName: APIRequestRate

# Kubeproxy and OVN service sync latency

- query: histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket[2m])) by (le)) > 0
metricName: serviceSyncLatency

- query: histogram_quantile(0.99, sum(rate(ovnkube_master_network_programming_duration_seconds_bucket{kind="service"}[2m])) by (le))
metricName: serviceSyncLatency

# Containers & pod metrics

- query: (sum(irate(container_cpu_usage_seconds_total{name!="",container!="POD",namespace=~"openshift-(etcd|oauth-apiserver|sdn|ovn-kubernetes|network-node-identity|multus|.*apiserver|authentication|.*controller-manager|.*scheduler|image-registry|operator-lifecycle-manager)|cilium|stackrox|calico.*|tigera.*"}[2m]) * 100) by (container, pod, namespace, node) and on (node) kube_node_role{role="master"}) > 0
Expand Down
8 changes: 0 additions & 8 deletions cmd/config/metrics.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,6 @@
- query: sum(irate(apiserver_request_total{apiserver="kube-apiserver",verb!="WATCH"}[2m])) by (verb,resource,code) > 0
metricName: APIRequestRate

# Kubeproxy and OVN service sync latency

- query: histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket[2m])) by (le)) > 0
metricName: serviceSyncLatency

- query: histogram_quantile(0.99, sum(rate(ovnkube_master_network_programming_duration_seconds_bucket{kind="service"}[2m])) by (le))
metricName: serviceSyncLatency

# Containers & pod metrics

- query: (sum(irate(container_cpu_usage_seconds_total{name!="",container!~"POD|",namespace=~"openshift-(etcd|.*apiserver|monitoring|user-workload-monitoring|ovn-kubernetes|network-node-identity|multus|sdn|ingress|.*controller-manager|.*scheduler)|cilium|stackrox|calico.*|tigera.*"}[2m]) * 100) by (container, pod, namespace, node)) > 0
Expand Down
7 changes: 2 additions & 5 deletions common.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,13 @@ import (

var clusterMetadata ocpmetadata.ClusterMetadata

func setMetrics(cmd *cobra.Command, metricsProfile string) {
var metricsProfiles []string
func setMetrics(cmd *cobra.Command, metricsProfiles []string) {
profileType, _ := cmd.Root().PersistentFlags().GetString("profile-type")
switch ProfileType(profileType) {
case Reporting:
metricsProfiles = []string{"metrics-report.yml"}
case Regular:
metricsProfiles = []string{metricsProfile}
case Both:
metricsProfiles = []string{"metrics-report.yml", metricsProfile}
metricsProfiles = append(metricsProfiles, "metrics-report.yml")
}
os.Setenv("METRICS", strings.Join(metricsProfiles, ","))
}
Expand Down
4 changes: 3 additions & 1 deletion crd-scale.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
// NewCrdScale holds the crd-scale workload
func NewCrdScale(wh *workloads.WorkloadHelper) *cobra.Command {
var iterations int
var metricsProfiles []string
cmd := &cobra.Command{
Use: "crd-scale",
Short: "Runs crd-scale workload",
Expand All @@ -33,11 +34,12 @@ func NewCrdScale(wh *workloads.WorkloadHelper) *cobra.Command {
os.Setenv("JOB_ITERATIONS", fmt.Sprint(iterations))
},
Run: func(cmd *cobra.Command, args []string) {
setMetrics(cmd, "metrics-aggregated.yml")
setMetrics(cmd, metricsProfiles)
wh.Run(cmd.Name())
},
}
cmd.Flags().IntVar(&iterations, "iterations", 0, "Number of CRDs to create")
cmd.Flags().StringSliceVar(&metricsProfiles, "metrics-profile", []string{"metrics-aggregated.yml"}, "Comma separated list of metrics profiles to use")
cmd.MarkFlagRequired("iterations")
return cmd
}
4 changes: 3 additions & 1 deletion egressip.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ func NewEgressIP(wh *workloads.WorkloadHelper, variant string) *cobra.Command {
var iterations, addressesPerIteration int
var externalServerIP string
var podReadyThreshold time.Duration
var metricsProfiles []string
cmd := &cobra.Command{
Use: variant,
Short: fmt.Sprintf("Runs %v workload", variant),
Expand All @@ -160,14 +161,15 @@ func NewEgressIP(wh *workloads.WorkloadHelper, variant string) *cobra.Command {
generateEgressIPs(iterations, addressesPerIteration, externalServerIP)
},
Run: func(cmd *cobra.Command, args []string) {
setMetrics(cmd, "metrics-egressip.yml")
setMetrics(cmd, metricsProfiles)
wh.Run(cmd.Name())
},
}
cmd.Flags().DurationVar(&podReadyThreshold, "pod-ready-threshold", 2*time.Minute, "Pod ready timeout threshold")
cmd.Flags().IntVar(&iterations, "iterations", 0, fmt.Sprintf("%v iterations", variant))
cmd.Flags().StringVar(&externalServerIP, "external-server-ip", "", "External server IP address")
cmd.Flags().IntVar(&addressesPerIteration, "addresses-per-iteration", 1, fmt.Sprintf("%v iterations", variant))
cmd.Flags().StringSliceVar(&metricsProfiles, "metrics-profile", []string{"metrics-egressip.yml"}, "Comma separated list of metrics profiles to use")
cmd.MarkFlagRequired("iterations")
cmd.MarkFlagRequired("external-server-ip")
return cmd
Expand Down
9 changes: 3 additions & 6 deletions index.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ import (
"encoding/json"
"fmt"
"os"
"strings"
"time"

"github.com/cloud-bulldozer/go-commons/indexers"
Expand All @@ -35,7 +34,8 @@ import (

// NewIndex orchestrates indexing for ocp wrapper
func NewIndex(metricsEndpoint *string, ocpMetaAgent *ocpmetadata.Metadata) *cobra.Command {
var metricsProfile, jobName string
var jobName string
var metricsProfiles []string
var start, end int64
var userMetadata, metricsDirectory string
var prometheusStep time.Duration
Expand Down Expand Up @@ -70,9 +70,6 @@ func NewIndex(metricsEndpoint *string, ocpMetaAgent *ocpmetadata.Metadata) *cobr
log.Fatal("Error obtaining prometheus information from cluster: ", err.Error())
}
}
metricsProfiles := strings.FieldsFunc(metricsProfile, func(r rune) bool {
return r == ',' || r == ' '
})
indexer = config.MetricsEndpoint{
Endpoint: prometheusURL,
Token: prometheusToken,
Expand Down Expand Up @@ -148,7 +145,7 @@ func NewIndex(metricsEndpoint *string, ocpMetaAgent *ocpmetadata.Metadata) *cobr
burner.IndexJobSummary([]burner.JobSummary{jobSummary}, indexerValue)
},
}
cmd.Flags().StringVarP(&metricsProfile, "metrics-profile", "m", "metrics.yml", "comma-separated list of metric profiles")
cmd.Flags().StringSliceVarP(&metricsProfiles, "metrics-profile", "m", []string{"metrics.yml"}, "Comma separated list of metrics profiles to use")
cmd.Flags().StringVar(&metricsDirectory, "metrics-directory", "collected-metrics", "Directory to dump the metrics files in, when using default local indexing")
cmd.Flags().DurationVar(&prometheusStep, "step", 30*time.Second, "Prometheus step size")
cmd.Flags().Int64Var(&start, "start", time.Now().Unix()-3600, "Epoch start time")
Expand Down
4 changes: 3 additions & 1 deletion networkpolicy.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ func NewNetworkPolicy(wh *workloads.WorkloadHelper, variant string) *cobra.Comma
var churn bool
var churnDelay, churnDuration time.Duration
var churnDeletionStrategy string
var metricsProfiles []string
cmd := &cobra.Command{
Use: variant,
Short: fmt.Sprintf("Runs %v workload", variant),
Expand All @@ -42,7 +43,7 @@ func NewNetworkPolicy(wh *workloads.WorkloadHelper, variant string) *cobra.Comma
os.Setenv("CHURN_DELETION_STRATEGY", churnDeletionStrategy)
},
Run: func(cmd *cobra.Command, args []string) {
setMetrics(cmd, "metrics.yml")
setMetrics(cmd, metricsProfiles)
wh.Run(cmd.Name())
},
}
Expand All @@ -53,6 +54,7 @@ func NewNetworkPolicy(wh *workloads.WorkloadHelper, variant string) *cobra.Comma
cmd.Flags().DurationVar(&churnDelay, "churn-delay", 2*time.Minute, "Time to wait between each churn")
cmd.Flags().IntVar(&churnPercent, "churn-percent", 10, "Percentage of job iterations that kube-burner will churn each round")
cmd.Flags().StringVar(&churnDeletionStrategy, "churn-deletion-strategy", "default", "Churn deletion strategy to use")
cmd.Flags().StringSliceVar(&metricsProfiles, "metrics-profile", []string{"metrics.yml"}, "Comma separated list of metrics profiles to use")
cmd.MarkFlagRequired("iterations")
return cmd
}
4 changes: 3 additions & 1 deletion node-density-cni.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ func NewNodeDensityCNI(wh *workloads.WorkloadHelper) *cobra.Command {
var namespacedIterations, svcLatency bool
var podReadyThreshold time.Duration
var iterationsPerNamespace int
var metricsProfiles []string
cmd := &cobra.Command{
Use: "node-density-cni",
Short: "Runs node-density-cni workload",
Expand All @@ -49,7 +50,7 @@ func NewNodeDensityCNI(wh *workloads.WorkloadHelper) *cobra.Command {
os.Setenv("SVC_LATENCY", strconv.FormatBool(svcLatency))
},
Run: func(cmd *cobra.Command, args []string) {
setMetrics(cmd, "metrics.yml")
setMetrics(cmd, metricsProfiles)
wh.Run(cmd.Name())
},
}
Expand All @@ -58,5 +59,6 @@ func NewNodeDensityCNI(wh *workloads.WorkloadHelper) *cobra.Command {
cmd.Flags().BoolVar(&namespacedIterations, "namespaced-iterations", true, "Namespaced iterations")
cmd.Flags().IntVar(&iterationsPerNamespace, "iterations-per-namespace", 1000, "Iterations per namespace")
cmd.Flags().BoolVar(&svcLatency, "service-latency", false, "Enable service latency measurement")
cmd.Flags().StringSliceVar(&metricsProfiles, "metrics-profile", []string{"metrics.yml"}, "Comma separated list of metrics profiles to use")
return cmd
}
4 changes: 3 additions & 1 deletion node-density-heavy.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ func NewNodeDensityHeavy(wh *workloads.WorkloadHelper) *cobra.Command {
var podReadyThreshold, probesPeriod time.Duration
var namespacedIterations bool
var iterationsPerNamespace int
var metricsProfiles []string
cmd := &cobra.Command{
Use: "node-density-heavy",
Short: "Runs node-density-heavy workload",
Expand All @@ -49,7 +50,7 @@ func NewNodeDensityHeavy(wh *workloads.WorkloadHelper) *cobra.Command {
os.Setenv("ITERATIONS_PER_NAMESPACE", fmt.Sprint(iterationsPerNamespace))
},
Run: func(cmd *cobra.Command, args []string) {
setMetrics(cmd, "metrics.yml")
setMetrics(cmd, metricsProfiles)
wh.Run(cmd.Name())
},
}
Expand All @@ -58,5 +59,6 @@ func NewNodeDensityHeavy(wh *workloads.WorkloadHelper) *cobra.Command {
cmd.Flags().IntVar(&podsPerNode, "pods-per-node", 245, "Pods per node")
cmd.Flags().BoolVar(&namespacedIterations, "namespaced-iterations", true, "Namespaced iterations")
cmd.Flags().IntVar(&iterationsPerNamespace, "iterations-per-namespace", 1000, "Iterations per namespace")
cmd.Flags().StringSliceVar(&metricsProfiles, "metrics-profile", []string{"metrics.yml"}, "Comma separated list of metrics profiles to use")
return cmd
}
4 changes: 3 additions & 1 deletion node-density.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ func NewNodeDensity(wh *workloads.WorkloadHelper) *cobra.Command {
var podsPerNode int
var podReadyThreshold time.Duration
var containerImage string
var metricsProfiles []string
cmd := &cobra.Command{
Use: "node-density",
Short: "Runs node-density workload",
Expand All @@ -45,12 +46,13 @@ func NewNodeDensity(wh *workloads.WorkloadHelper) *cobra.Command {
os.Setenv("CONTAINER_IMAGE", containerImage)
},
Run: func(cmd *cobra.Command, args []string) {
setMetrics(cmd, "metrics.yml")
setMetrics(cmd, metricsProfiles)
wh.Run(cmd.Name())
},
}
cmd.Flags().IntVar(&podsPerNode, "pods-per-node", 245, "Pods per node")
cmd.Flags().DurationVar(&podReadyThreshold, "pod-ready-threshold", 15*time.Second, "Pod ready timeout threshold")
cmd.Flags().StringVar(&containerImage, "container-image", "gcr.io/google_containers/pause:3.1", "Container image")
cmd.Flags().StringSliceVar(&metricsProfiles, "metrics-profile", []string{"metrics.yml"}, "Comma separated list of metrics profiles to use")
return cmd
}
6 changes: 3 additions & 3 deletions pvc-density.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ var dynamicStorageProvisioners = map[string]string{
func NewPVCDensity(wh *workloads.WorkloadHelper) *cobra.Command {

var iterations int
var storageProvisioners []string
var storageProvisioners, metricsProfiles []string
var claimSize string
var containerImage string
provisioner := "aws"
Expand All @@ -64,7 +64,7 @@ func NewPVCDensity(wh *workloads.WorkloadHelper) *cobra.Command {
os.Setenv("STORAGE_PROVISIONER", fmt.Sprint(dynamicStorageProvisioners[provisioner]))
},
Run: func(cmd *cobra.Command, args []string) {
setMetrics(cmd, "metrics.yml")
setMetrics(cmd, metricsProfiles)
wh.Run(cmd.Name())
},
}
Expand All @@ -73,6 +73,6 @@ func NewPVCDensity(wh *workloads.WorkloadHelper) *cobra.Command {
cmd.Flags().StringVar(&provisioner, "provisioner", provisioner, fmt.Sprintf("[%s]", strings.Join(storageProvisioners, " ")))
cmd.Flags().StringVar(&claimSize, "claim-size", "256Mi", "claim-size=256Mi")
cmd.Flags().StringVar(&containerImage, "container-image", "gcr.io/google_containers/pause:3.1", "Container image")

cmd.Flags().StringSliceVar(&metricsProfiles, "metrics-profile", []string{"metrics.yml"}, "Comma separated list of metrics profiles to use")
return cmd
}
9 changes: 9 additions & 0 deletions test/ocp/custom-metrics.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
- query: process_resident_memory_bytes{job="prometheus-k8s"}
metricName: prometheusRSS

- query: irate(process_cpu_seconds_total{job="prometheus-k8s"}[2m]) and on (job) topk(2,avg_over_time(process_cpu_seconds_total{job="prometheus-k8s"}[{{.elapsed}}:]))
metricName: top2PrometheusCPU

- query: prometheus_build_info
metricName: prometheusBuildInfo
instant: true
4 changes: 2 additions & 2 deletions test/ocp/metrics-endpoints.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
- endpoint: http://localhost:9090
metrics:
- metrics.yml
- custom-metrics.yml
alerts:
- alerts.yml
indexer:
Expand All @@ -10,7 +10,7 @@
type: opensearch
- endpoint: http://localhost:9090
metrics:
- metrics.yml
- custom-metrics.yml
indexer:
esServers: ["{{.ES_SERVER}}"]
insecureSkipVerify: true
Expand Down
9 changes: 0 additions & 9 deletions test/ocp/metrics.yml

This file was deleted.

Loading

0 comments on commit 4b782a9

Please sign in to comment.