Larger results using sidecar logs

Prior to this, we were extracting results from tasks via the termination messages which had a limit of only 4 KB per pod. If users had many results then the results would need to become smaller to obey the upper limit of 4 KB. We now run a dedicated sidecar that has access to the results of all the steps. This sidecar prints out the result and its content to stdout. The logs of the sidecar are parsed by the taskrun controller and the results updated instead of termination logs. We set an upper limit on the results to 1KB but users can have as many such results as needed.
tektoncd · Nov 3, 2022 · 2c61ea8 · 2c61ea8
1 parent 2d38f5f
commit 2c61ea8
Show file tree

Hide file tree

Showing 27 changed files with 807 additions and 32 deletions.
diff --git a/cmd/controller/main.go b/cmd/controller/main.go
@@ -52,6 +52,7 @@ func main() {
 
 	opts := &pipeline.Options{}
 	flag.StringVar(&opts.Images.EntrypointImage, "entrypoint-image", "", "The container image containing our entrypoint binary.")
+	flag.StringVar(&opts.Images.SidecarLogResultsImage, "sidecarlogresults-image", "", "The container image containing the binary for accessing results.")
 	flag.StringVar(&opts.Images.NopImage, "nop-image", "", "The container image used to stop sidecars")
 	flag.StringVar(&opts.Images.GitImage, "git-image", "", "The container image containing our Git binary.")
 	flag.StringVar(&opts.Images.KubeconfigWriterImage, "kubeconfig-writer-image", "", "The container image containing our kubeconfig writer binary.")

diff --git a/cmd/entrypoint/main.go b/cmd/entrypoint/main.go
@@ -52,9 +52,10 @@ var (
 	breakpointOnFailure = flag.Bool("breakpoint_on_failure", false, "If specified, expect steps to not skip on failure")
 	onError             = flag.String("on_error", "", "Set to \"continue\" to ignore an error and continue when a container terminates with a non-zero exit code."+
 		" Set to \"stopAndFail\" to declare a failure with a step error and stop executing the rest of the steps.")
-	stepMetadataDir = flag.String("step_metadata_dir", "", "If specified, create directory to store the step metadata e.g. /tekton/steps/<step-name>/")
-	enableSpire     = flag.Bool("enable_spire", false, "If specified by configmap, this enables spire signing and verification")
-	socketPath      = flag.String("spire_socket_path", "unix:///spiffe-workload-api/spire-agent.sock", "Experimental: The SPIRE agent socket for SPIFFE workload API.")
+	stepMetadataDir                  = flag.String("step_metadata_dir", "", "If specified, create directory to store the step metadata e.g. /tekton/steps/<step-name>/")
+	enableSpire                      = flag.Bool("enable_spire", false, "If specified by configmap, this enables spire signing and verification")
+	socketPath                       = flag.String("spire_socket_path", "unix:///spiffe-workload-api/spire-agent.sock", "Experimental: The SPIRE agent socket for SPIFFE workload API.")
+	dontSendResultsToTerminationPath = flag.Bool("dont_send_results_to_termination_path", false, "If specified, dont send results to the termination path.")
 )
 
 const (
@@ -154,13 +155,14 @@ func main() {
 			stdoutPath: *stdoutPath,
 			stderrPath: *stderrPath,
 		},
-		PostWriter:          &realPostWriter{},
-		Results:             strings.Split(*results, ","),
-		Timeout:             timeout,
-		BreakpointOnFailure: *breakpointOnFailure,
-		OnError:             *onError,
-		StepMetadataDir:     *stepMetadataDir,
-		SpireWorkloadAPI:    spireWorkloadAPI,
+		PostWriter:                       &realPostWriter{},
+		Results:                          strings.Split(*results, ","),
+		Timeout:                          timeout,
+		BreakpointOnFailure:              *breakpointOnFailure,
+		OnError:                          *onError,
+		StepMetadataDir:                  *stepMetadataDir,
+		SpireWorkloadAPI:                 spireWorkloadAPI,
+		DontSendResultsToTerminationPath: *dontSendResultsToTerminationPath,
 	}
 
 	// Copy any creds injected by the controller into the $HOME directory of the current

diff --git a/cmd/sidecarlogresults/main.go b/cmd/sidecarlogresults/main.go
@@ -0,0 +1,40 @@
+/*
+Copyright 2019 The Tekton Authors
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package main
+
+import (
+	"flag"
+	"log"
+
+	"github.com/tektoncd/pipeline/pkg/apis/pipeline"
+	"github.com/tektoncd/pipeline/pkg/sidecarlogresults"
+)
+
+func main() {
+	var resultsDir string
+	var resultNames string
+	flag.StringVar(&resultsDir, "results-dir", pipeline.DefaultResultPath, "Path to the results directory. Default is /tekton/results")
+	flag.StringVar(&resultNames, "result-names", "", "comma separated result names to expect from the steps running in the pod. eg. foo,bar,baz")
+	flag.Parse()
+	if resultNames == "" {
+		log.Fatal("result-names were not provided")
+	}
+	err := sidecarlogresults.LookForResults(resultsDir, resultNames)
+	if err != nil {
+		log.Fatal(err)
+	}
+}
diff --git a/config/controller.yaml b/config/controller.yaml
@@ -68,6 +68,7 @@ spec:
           "-kubeconfig-writer-image", "ko://github.com/tektoncd/pipeline/cmd/kubeconfigwriter",
           "-git-image", "ko://github.com/tektoncd/pipeline/cmd/git-init",
           "-entrypoint-image", "ko://github.com/tektoncd/pipeline/cmd/entrypoint",
+          "-sidecarlogresults-image", "ko://github.com/tektoncd/pipeline/cmd/sidecarlogresults",
           "-nop-image", "ko://github.com/tektoncd/pipeline/cmd/nop",
           "-imagedigest-exporter-image", "ko://github.com/tektoncd/pipeline/cmd/imagedigestexporter",
           "-pr-image", "ko://github.com/tektoncd/pipeline/cmd/pullrequest-init",

diff --git a/config/enable-log-access-to-controller/clusterrole.yaml b/config/enable-log-access-to-controller/clusterrole.yaml
@@ -0,0 +1,13 @@
+kind: ClusterRole
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+  name: tekton-pipelines-controller-pod-log-access
+  labels:
+    app.kubernetes.io/component: controller
+    app.kubernetes.io/instance: default
+    app.kubernetes.io/part-of: tekton-pipelines
+rules:
+  - apiGroups: [""]
+    # Controller needs to get the logs of the results sidecar created by TaskRuns to extract results.
+    resources: ["pods/log"]
+    verbs: ["get"]
diff --git a/config/enable-log-access-to-controller/clusterrolebinding.yaml b/config/enable-log-access-to-controller/clusterrolebinding.yaml
@@ -0,0 +1,16 @@
+kind: ClusterRoleBinding
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+  name: tekton-pipelines-controller-pod-log-access
+  labels:
+    app.kubernetes.io/component: controller
+    app.kubernetes.io/instance: default
+    app.kubernetes.io/part-of: tekton-pipelines
+subjects:
+  - kind: ServiceAccount
+    name: tekton-pipelines-controller
+    namespace: tekton-pipelines
+roleRef:
+  kind: ClusterRole
+  name: tekton-pipelines-controller-pod-log-access
+  apiGroup: rbac.authorization.k8s.io
diff --git a/docs/install.md b/docs/install.md
@@ -24,6 +24,7 @@ This guide explains how to install Tekton Pipelines. It covers the following top
     - [Customizing the Pipelines Controller behavior](#customizing-the-pipelines-controller-behavior)
     - [Alpha Features](#alpha-features)
     - [Beta Features](#beta-features)
+- [Enabling larger results using sidecar logs](#enabling-larger-results-using-sidecar-logs)
 - [Configuring High Availability](#configuring-high-availability)
 - [Configuring tekton pipeline controller performance](#configuring-tekton-pipeline-controller-performance)
 - [Creating a custom release of Tekton Pipelines](#creating-a-custom-release-of-tekton-pipelines)
@@ -421,6 +422,7 @@ features](#alpha-features) to be used.
   do both. For more information, see [Configuring usage of `TaskRun` and `Run` embedded statuses](pipelineruns.md#configuring-usage-of-taskrun-and-run-embedded-statuses).
 
 - `resource-verification-mode`: Setting this flag to "enforce" will enforce verification of tasks/pipeline. Failing to verify will fail the taskrun/pipelinerun. "warn" will only log the err message and "skip" will skip the whole verification.
+- `enable-sidecar-logs-results`: Set this flag to "true" to enable use of a results sidecar logs to extract results larger than the size of the termination message. While termination message restrics the combined size of results to 4K per pod, enabling this feature will allow 1K per result (as many results as required).
 
 For example:
 
@@ -470,6 +472,55 @@ the `feature-flags` ConfigMap alongside your Tekton Pipelines deployment via
 
 For beta versions of Tekton CRDs, setting `enable-api-fields` to "beta" is the same as setting it to "stable".
 
+## Enabling larger results using sidecar logs
+
+**Note**: The maximum size of a Task's results is limited by the container termination message feature of Kubernetes, as results are passed back to the controller via this mechanism. At present, the limit is “4096 bytes”.
+
+To exceed this limit of 4096 bytes, you can enable larger results using sidecar logs. By enabling this feature, you will have a limit of 1024 bytes per result with no restriction on the number of results.
+
+**Note**: to enable this feature, you need to grant `get` access to all `pods/log` to the `Tekton pipeline controller`. This means that the tekton pipeline controller has the ability to access the pod logs.
+
+1. Create a cluster role by applying the following spec. 
+
+```yaml
+kind: ClusterRole
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+  name: tekton-pipelines-controller-pod-log-access
+  labels:
+    app.kubernetes.io/component: controller
+    app.kubernetes.io/instance: default
+    app.kubernetes.io/part-of: tekton-pipelines
+rules:
+  - apiGroups: [""]
+    # Controller needs to get the logs of the results sidecar created by TaskRuns to extract results.
+    resources: ["pods/log"]
+    verbs: ["get"]
+```
+
+2. Create a cluster role binding by applying the folowing spec.
+
+```yaml
+kind: ClusterRoleBinding
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+  name: tekton-pipelines-controller-pod-log-access
+  labels:
+    app.kubernetes.io/component: controller
+    app.kubernetes.io/instance: default
+    app.kubernetes.io/part-of: tekton-pipelines
+subjects:
+  - kind: ServiceAccount
+    name: tekton-pipelines-controller
+    namespace: tekton-pipelines
+roleRef:
+  kind: ClusterRole
+  name: tekton-pipelines-controller-pod-log-access
+  apiGroup: rbac.authorization.k8s.io
+```
+
+3. Enable the feature flag to use sidecar logs by setting `enable-sidecar-logs-results: "true"` in the [configMap](#customizing-the-pipelines-controller-behavior).
+
 ## Configuring High Availability
 
 If you want to run Tekton Pipelines in a way so that webhooks are resiliant against failures and support

diff --git a/docs/tasks.md b/docs/tasks.md
@@ -23,6 +23,7 @@ weight: 200
   - [Specifying `Resources`](#specifying-resources)
   - [Specifying `Workspaces`](#specifying-workspaces)
   - [Emitting `Results`](#emitting-results)
+    - [Larger `Results` using sidecar logs](#larger-results-using-sidecar-logs)
   - [Specifying `Volumes`](#specifying-volumes)
   - [Specifying a `Step` template](#specifying-a-step-template)
   - [Specifying `Sidecars`](#specifying-sidecars)
@@ -835,7 +836,7 @@ This also means that the number of Steps in a Task affects the maximum size of a
 as each Step is implemented as a container in the TaskRun's pod.
 The more containers we have in our pod, *the smaller the allowed size of each container's
 message*, meaning that the **more steps you have in a Task, the smaller the result for each step can be**. 
-For example, if you have 10 steps, the size of each step's Result will have a maximum of less than 1KB*.
+For example, if you have 10 steps, the size of each step's Result will have a maximum of less than 1KB.
 
 If your `Task` writes a large number of small results, you can work around this limitation
 by writing each result from a separate `Step` so that each `Step` has its own termination message.
@@ -847,6 +848,15 @@ available size will less than 4096 bytes.
 As a general rule-of-thumb, if a result needs to be larger than a kilobyte, you should likely use a
 [`Workspace`](#specifying-workspaces) to store and pass it between `Tasks` within a `Pipeline`.
 
+#### Larger `Results` using sidecar logs
+
+This is an experimental feature. The `enable-sidecar-logs-results` feature flag must be set to `"true"`](./install.md#enabling-larger-results-using-sidecar-logs)
+
+Instead of using termination messages to store results, the taskrun controller injects a sidecar container which monitors the results of all the steps. The sidecar mounts the volume where results of all the steps are stored. As soon as it finds a new result, it logs it to std out. The controller has access to the logs of the sidecar container (Caution: we need you to enable access to [kubernetes pod/logs](./install.md#enabling-larger-results-using-sidecar-logs). 
+
+**Note**: This feature allows users to store up to `1 KB per result`. Because we are not limited by the size of the termination messages, users can have as many results as they require where each result can be up to 1 KB in size. If the size of a result exceeds 1KB, then the TaskRun will be placed into a failed state with the following message: `Result exceeded the maximum allowed limit of 1024 bytes.` 
+
+
 ### Specifying `Volumes`
 
 Specifies one or more [`Volumes`](https://kubernetes.io/docs/concepts/storage/volumes/) that the `Steps` in your

diff --git a/examples/v1beta1/pipelineruns/4808-regression.yaml b/examples/v1beta1/pipelineruns/4808-regression.yaml
@@ -92,4 +92,4 @@ spec:
     name: result-test
   params:
   - name: RESULT_STRING_LENGTH
-    value: "3000"
+    value: "1000"
diff --git a/examples/v1beta1/pipelineruns/alpha/pipelinerun-large-results.yaml b/examples/v1beta1/pipelineruns/alpha/pipelinerun-large-results.yaml
@@ -0,0 +1,41 @@
+apiVersion: tekton.dev/v1beta1
+kind: Task
+metadata:
+  name: large-result-task
+spec:
+  results:
+    - name: result1
+    - name: result2
+    - name: result3
+    - name: result4
+    - name: result5
+  steps:
+    - name: step1
+      image: alpine
+      script: |
+        cat /dev/urandom | head -c 750 | base64 | tee $(results.result1.path);
+        cat /dev/urandom | head -c 750 | base64 | tee $(results.result2.path);
+        cat /dev/urandom | head -c 750 | base64 | tee $(results.result3.path);
+        cat /dev/urandom | head -c 750 | base64 | tee $(results.result4.path);
+        cat /dev/urandom | head -c 750 | base64 | tee $(results.result5.path);
+---
+apiVersion: tekton.dev/v1beta1
+kind: Pipeline
+metadata:
+  name: large-result-pipeline
+spec:
+  tasks:
+    - name: large-task
+      taskRef:
+        name: large-result-task
+  results:
+    - name: large-result
+      value: $(tasks.large-task.results.result1)
+---
+apiVersion: tekton.dev/v1beta1
+kind: PipelineRun
+metadata:
+  name: large-result-pipeline-run
+spec:
+  pipelineRef:
+    name: large-result-pipeline
diff --git a/examples/v1beta1/taskruns/alpha/large-task-result.yaml b/examples/v1beta1/taskruns/alpha/large-task-result.yaml
@@ -0,0 +1,28 @@
+apiVersion: tekton.dev/v1beta1
+kind: TaskRun
+metadata:
+  generateName: larger-results-
+spec:
+  taskSpec:
+    description: |
+      A task that creates results > termination message limit of 4K per pod!
+    results:
+      - name: result1
+      - name: result2
+      - name: result3
+      - name: result4
+      - name: result5
+    steps:
+      - name: step1
+        image: bash:latest
+        script: |
+          #!/usr/bin/env bash
+          cat /dev/urandom | head -c 750 | base64 | tee /tekton/results/result1 #about 1 K result
+          cat /dev/urandom | head -c 750 | base64 | tee /tekton/results/result2 #about 1 K result
+      - name: step2
+        image: bash:latest
+        script: |
+          #!/usr/bin/env bash
+          cat /dev/urandom | head -c 750 | base64 | tee /tekton/results/result3 #about 1 K result
+          cat /dev/urandom | head -c 750 | base64 | tee /tekton/results/result4 #about 1 K result
+          cat /dev/urandom | head -c 750 | base64 | tee /tekton/results/result5 #about 1 K result
diff --git a/pkg/apis/config/feature_flags.go b/pkg/apis/config/feature_flags.go
@@ -74,6 +74,8 @@ const (
 	DefaultEnableSpire = false
 	// DefaultResourceVerificationMode is the default value for "resource-verification-mode".
 	DefaultResourceVerificationMode = SkipResourceVerificationMode
+	// DefaultSidecarLogsResults is the default value for "enable-larger-results".
+	DefaultSidecarLogsResults = false
 
 	disableAffinityAssistantKey         = "disable-affinity-assistant"
 	disableCredsInitKey                 = "disable-creds-init"
@@ -87,6 +89,7 @@ const (
 	embeddedStatus                      = "embedded-status"
 	enableSpire                         = "enable-spire"
 	verificationMode                    = "resource-verification-mode"
+	enableSidecarLogsResults            = "enable-sidecar-logs-results"
 )
 
 // FeatureFlags holds the features configurations
@@ -105,6 +108,7 @@ type FeatureFlags struct {
 	EmbeddedStatus                   string
 	EnableSpire                      bool
 	ResourceVerificationMode         string
+	EnableSidecarLogsResults         bool
 }
 
 // GetFeatureFlagsConfigName returns the name of the configmap containing all
@@ -159,6 +163,9 @@ func NewFeatureFlagsFromMap(cfgMap map[string]string) (*FeatureFlags, error) {
 	if err := setResourceVerificationMode(cfgMap, DefaultResourceVerificationMode, &tc.ResourceVerificationMode); err != nil {
 		return nil, err
 	}
+	if err := setFeature(enableSidecarLogsResults, DefaultSidecarLogsResults, &tc.EnableSidecarLogsResults); err != nil {
+		return nil, err
+	}
 
 	// Given that they are alpha features, Tekton Bundles and Custom Tasks should be switched on if
 	// enable-api-fields is "alpha". If enable-api-fields is not "alpha" then fall back to the value of

diff --git a/pkg/apis/pipeline/images.go b/pkg/apis/pipeline/images.go
@@ -26,6 +26,8 @@ import (
 type Images struct {
 	// EntrypointImage is container image containing our entrypoint binary.
 	EntrypointImage string
+	// SidecarLogResultsImage is container image containing the binary that fetches results from the steps and logs it to stdout.
+	SidecarLogResultsImage string
 	// NopImage is the container image used to kill sidecars.
 	NopImage string
 	// GitImage is the container image with Git that we use to implement the Git source step.
@@ -55,6 +57,7 @@ func (i Images) Validate() error {
 		v, name string
 	}{
 		{i.EntrypointImage, "entrypoint-image"},
+		{i.SidecarLogResultsImage, "sidecarlogresults-image"},
 		{i.NopImage, "nop-image"},
 		{i.GitImage, "git-image"},
 		{i.KubeconfigWriterImage, "kubeconfig-writer-image"},

diff --git a/pkg/apis/pipeline/images_test.go b/pkg/apis/pipeline/images_test.go
@@ -9,6 +9,7 @@ import (
 func TestValidate(t *testing.T) {
 	valid := pipeline.Images{
 		EntrypointImage:          "set",
+		SidecarLogResultsImage:   "set",
 		NopImage:                 "set",
 		GitImage:                 "set",
 		KubeconfigWriterImage:    "set",
@@ -25,6 +26,7 @@ func TestValidate(t *testing.T) {
 
 	invalid := pipeline.Images{
 		EntrypointImage:          "set",
+		SidecarLogResultsImage:   "set",
 		NopImage:                 "set",
 		GitImage:                 "", // unset!
 		KubeconfigWriterImage:    "set",

diff --git a/pkg/apis/pipeline/v1beta1/taskrun_types.go b/pkg/apis/pipeline/v1beta1/taskrun_types.go
@@ -181,6 +181,8 @@ const (
 	TaskRunReasonsResultsVerificationFailed TaskRunReason = "TaskRunResultsVerificationFailed"
 	// AwaitingTaskRunResults is the reason set when waiting upon `TaskRun` results and signatures to verify
 	AwaitingTaskRunResults TaskRunReason = "AwaitingTaskRunResults"
+	// TaskRunReasonResultLargerThanAllowedLimit is the reason set when one of the results exceeds its maximum allowed limit of 1 KB
+	TaskRunReasonResultLargerThanAllowedLimit TaskRunReason = "TaskRunResultLargerThanAllowedLimit"
 )
 
 func (t TaskRunReason) String() string {