From c0173a8b527322d812d940454bea459b5587342e Mon Sep 17 00:00:00 2001
From: Chitrang Patel <chitrang@google.com>
Date: Fri, 3 Jun 2022 13:52:07 -0400
Subject: [PATCH] TEP 0127: Larger results using sidecar logs: validation and
 example tests

Prior to this, we were extracting results from tasks via the termination messages which had a limit of only 4 KB per pod. If users had many results then the results would need to become smaller to obey the upper limit of 4 KB.

We now run a dedicated sidecar that has access to the results of all the steps. This sidecar prints out the result and its content to stdout. The logs of the sidecar are parsed by the taskrun controller and the results updated instead of termination logs. We set an upper limit on the results to 4KB by default (configurable) and users can have as many such results as needed.
---
 cmd/sidecarlogresults/main.go                 |  5 +-
 docs/install.md                               | 26 +++++++
 docs/tasks.md                                 | 13 +++-
 .../alpha/pipelinerun-large-results.yaml      | 76 +++++++++++++++++++
 .../taskruns/alpha/large-task-result.yaml     | 28 +++++++
 pkg/apis/pipeline/v1/task_validation.go       | 14 ++++
 pkg/apis/pipeline/v1/task_validation_test.go  | 48 ++++++++++++
 pkg/apis/pipeline/v1beta1/task_validation.go  | 14 ++++
 .../pipeline/v1beta1/task_validation_test.go  | 48 ++++++++++++
 test/e2e-tests-kind-prow-alpha.env            |  1 +
 test/e2e-tests.sh                             | 14 ++++
 11 files changed, 282 insertions(+), 5 deletions(-)
 create mode 100644 examples/v1beta1/pipelineruns/alpha/pipelinerun-large-results.yaml
 create mode 100644 examples/v1beta1/taskruns/alpha/large-task-result.yaml

diff --git a/cmd/sidecarlogresults/main.go b/cmd/sidecarlogresults/main.go
index e163ee98df4..70d8a7e9d76 100644
--- a/cmd/sidecarlogresults/main.go
+++ b/cmd/sidecarlogresults/main.go
@@ -36,10 +36,7 @@ func main() {
 	if resultNames == "" {
 		log.Fatal("result-names were not provided")
 	}
-	expectedResults := []string{}
-	for _, s := range strings.Split(resultNames, ",") {
-		expectedResults = append(expectedResults, s)
-	}
+	expectedResults := strings.Split(resultNames, ",")
 	err := sidecarlogresults.LookForResults(os.Stdout, pod.RunDir, resultsDir, expectedResults)
 	if err != nil {
 		log.Fatal(err)
diff --git a/docs/install.md b/docs/install.md
index f34cf7ba9ee..6d8dd6fa103 100644
--- a/docs/install.md
+++ b/docs/install.md
@@ -484,6 +484,32 @@ the `feature-flags` ConfigMap alongside your Tekton Pipelines deployment via
 
 For beta versions of Tekton CRDs, setting `enable-api-fields` to "beta" is the same as setting it to "stable".
 
+## Enabling larger results using sidecar logs
+
+**Note**: The maximum size of a Task's results is limited by the container termination message feature of Kubernetes, as results are passed back to the controller via this mechanism. At present, the limit is per task is “4096 bytes”. All results produced by the task share this upper limit.
+
+To exceed this limit of 4096 bytes, you can enable larger results using sidecar logs. By enabling this feature, you will have a configurable limit (with a default of 4096 bytes) per result with no restriction on the number of results. The results are still stored in the taskrun crd so they should not exceed the 1.5MB CRD size limit.
+
+**Note**: to enable this feature, you need to grant `get` access to all `pods/log` to the `Tekton pipeline controller`. This means that the tekton pipeline controller has the ability to access the pod logs.
+
+1. Create a cluster role and rolebinding by applying the following spec to provide log access to `tekton-pipelines-controller`. 
+
+```
+kubectl apply -f optional_config/enable-log-access-to-controller/
+```
+
+2. Set the `results-from` feature flag to use sidecar logs by setting `results-from: sidecar-logs` in the [configMap](#customizing-the-pipelines-controller-behavior).
+
+```
+kubectl patch cm feature-flags -n tekton-pipelines -p '{"data":{"results-from":"sidecar-logs"}}'
+```
+
+3. If you want the size per result to be something other than 4096 bytes, you can set the `max-result-size` feature flag in bytes by setting `max-result-size: 8192(whatever you need here)`. **Note:** The value you can set here cannot exceed the size of the CRD limit of 1.5 MB.
+ 
+```
+kubectl patch cm feature-flags -n tekton-pipelines -p '{"data":{"max-result-size":"<VALUE-IN-BYTES>"}}'
+```
+
 ## Configuring High Availability
 
 If you want to run Tekton Pipelines in a way so that webhooks are resiliant against failures and support
diff --git a/docs/tasks.md b/docs/tasks.md
index 3782ed7cc67..a5669fa4e4a 100644
--- a/docs/tasks.md
+++ b/docs/tasks.md
@@ -23,6 +23,7 @@ weight: 200
   - [Specifying `Resources`](#specifying-resources)
   - [Specifying `Workspaces`](#specifying-workspaces)
   - [Emitting `Results`](#emitting-results)
+    - [Larger `Results` using sidecar logs](#larger-results-using-sidecar-logs)
   - [Specifying `Volumes`](#specifying-volumes)
   - [Specifying a `Step` template](#specifying-a-step-template)
   - [Specifying `Sidecars`](#specifying-sidecars)
@@ -835,7 +836,7 @@ This also means that the number of Steps in a Task affects the maximum size of a
 as each Step is implemented as a container in the TaskRun's pod.
 The more containers we have in our pod, *the smaller the allowed size of each container's
 message*, meaning that the **more steps you have in a Task, the smaller the result for each step can be**. 
-For example, if you have 10 steps, the size of each step's Result will have a maximum of less than 1KB*.
+For example, if you have 10 steps, the size of each step's Result will have a maximum of less than 1KB.
 
 If your `Task` writes a large number of small results, you can work around this limitation
 by writing each result from a separate `Step` so that each `Step` has its own termination message.
@@ -847,6 +848,16 @@ available size will less than 4096 bytes.
 As a general rule-of-thumb, if a result needs to be larger than a kilobyte, you should likely use a
 [`Workspace`](#specifying-workspaces) to store and pass it between `Tasks` within a `Pipeline`.
 
+#### Larger `Results` using sidecar logs
+
+This is an experimental feature. The `results-from` feature flag must be set to `"sidecar-logs"`](./install.md#enabling-larger-results-using-sidecar-logs).
+
+Instead of using termination messages to store results, the taskrun controller injects a sidecar container which monitors the results of all the steps. The sidecar mounts the volume where results of all the steps are stored. As soon as it finds a new result, it logs it to std out. The controller has access to the logs of the sidecar container (Caution: we need you to enable access to [kubernetes pod/logs](./install.md#enabling-larger-results-using-sidecar-logs). 
+
+This feature allows users to store up to 4 KB per result by default. Because we are not limited by the size of the termination messages, users can have as many results as they require (or until the CRD reaches its limit). If the size of a result exceeds this limit, then the TaskRun will be placed into a failed state with the following message: `Result exceeded the maximum allowed limit.`
+
+**Note**: If you require even larger results, you can specify a different upper limit per result by setting `max-result-size` feature flag to your desired size in bytes ([see instructions](./install.md#enabling-larger-results-using-sidecar-logs)). **CAUTION**: the larger you make the size, more likely will the CRD reach its max limit enforced by the `etcd` server leading to bad user experience.
+
 ### Specifying `Volumes`
 
 Specifies one or more [`Volumes`](https://kubernetes.io/docs/concepts/storage/volumes/) that the `Steps` in your
diff --git a/examples/v1beta1/pipelineruns/alpha/pipelinerun-large-results.yaml b/examples/v1beta1/pipelineruns/alpha/pipelinerun-large-results.yaml
new file mode 100644
index 00000000000..3138fcddc85
--- /dev/null
+++ b/examples/v1beta1/pipelineruns/alpha/pipelinerun-large-results.yaml
@@ -0,0 +1,76 @@
+apiVersion: tekton.dev/v1beta1
+kind: Task
+metadata:
+  name: large-result-task
+spec:
+  results:
+    - name: result1
+    - name: result2
+    - name: result3
+    - name: result4
+    - name: result5
+  steps:
+    - name: step1
+      image: alpine
+      script: |
+        cat /dev/urandom | head -c 2500 | base64 | tee $(results.result1.path);
+        cat /dev/urandom | head -c 2500 | base64 | tee $(results.result2.path);
+        cat /dev/urandom | head -c 2500 | base64 | tee $(results.result3.path);
+        cat /dev/urandom | head -c 2500 | base64 | tee $(results.result4.path);
+        cat /dev/urandom | head -c 2500 | base64 | tee $(results.result5.path);
+---
+apiVersion: tekton.dev/v1beta1
+kind: Task
+metadata:
+  name: print-large-results
+spec:
+  params:
+    - name: param1
+    - name: param2
+    - name: param3
+    - name: param4
+    - name: param5
+  steps:
+    - name: step1
+      image: alpine
+      script: |
+        echo "$(params.param1)";
+        echo "$(params.param2)";
+        echo "$(params.param3)";
+        echo "$(params.param4)";
+        echo "$(params.param5)";
+---
+apiVersion: tekton.dev/v1beta1
+kind: Pipeline
+metadata:
+  name: large-result-pipeline
+spec:
+  tasks:
+    - name: large-task
+      taskRef:
+        name: large-result-task
+    - name: print-results
+      params:
+        - name: param1
+          value: "$(tasks.large-task.results.result1)"
+        - name: param2
+          value: "$(tasks.large-task.results.result2)"
+        - name: param3
+          value: "$(tasks.large-task.results.result3)"
+        - name: param4
+          value: "$(tasks.large-task.results.result4)"
+        - name: param5
+          value: "$(tasks.large-task.results.result5)"
+      taskRef:
+        name: print-large-results
+  results:
+    - name: large-result
+      value: $(tasks.large-task.results.result1)
+---
+apiVersion: tekton.dev/v1beta1
+kind: PipelineRun
+metadata:
+  name: large-result-pipeline-run
+spec:
+  pipelineRef:
+    name: large-result-pipeline
diff --git a/examples/v1beta1/taskruns/alpha/large-task-result.yaml b/examples/v1beta1/taskruns/alpha/large-task-result.yaml
new file mode 100644
index 00000000000..35b70fa67e5
--- /dev/null
+++ b/examples/v1beta1/taskruns/alpha/large-task-result.yaml
@@ -0,0 +1,28 @@
+apiVersion: tekton.dev/v1beta1
+kind: TaskRun
+metadata:
+  generateName: larger-results-
+spec:
+  taskSpec:
+    description: |
+      A task that creates results > termination message limit of 4K per pod!
+    results:
+      - name: result1
+      - name: result2
+      - name: result3
+      - name: result4
+      - name: result5
+    steps:
+      - name: step1
+        image: bash:latest
+        script: |
+          #!/usr/bin/env bash
+          cat /dev/urandom | head -c 2500 | base64 | tee /tekton/results/result1 #about 1 K result
+          cat /dev/urandom | head -c 2500 | base64 | tee /tekton/results/result2 #about 4 K result
+      - name: step2
+        image: bash:latest
+        script: |
+          #!/usr/bin/env bash
+          cat /dev/urandom | head -c 2500 | base64 | tee /tekton/results/result3 #about 1 K result
+          cat /dev/urandom | head -c 2500 | base64 | tee /tekton/results/result4 #about 4 K result
+          cat /dev/urandom | head -c 2500 | base64 | tee /tekton/results/result5 #about 4 K result
diff --git a/pkg/apis/pipeline/v1/task_validation.go b/pkg/apis/pipeline/v1/task_validation.go
index d2ce8de157a..d2fd7d483c7 100644
--- a/pkg/apis/pipeline/v1/task_validation.go
+++ b/pkg/apis/pipeline/v1/task_validation.go
@@ -25,6 +25,7 @@ import (
 	"time"
 
 	"github.com/tektoncd/pipeline/pkg/apis/config"
+	"github.com/tektoncd/pipeline/pkg/apis/pipeline"
 	"github.com/tektoncd/pipeline/pkg/apis/validate"
 	"github.com/tektoncd/pipeline/pkg/apis/version"
 	"github.com/tektoncd/pipeline/pkg/substitution"
@@ -92,6 +93,7 @@ func (ts *TaskSpec) Validate(ctx context.Context) (errs *apis.FieldError) {
 	}
 
 	errs = errs.Also(validateSteps(ctx, mergedSteps).ViaField("steps"))
+	errs = errs.Also(validateSidecarNames(ctx, ts.Sidecars))
 	errs = errs.Also(ValidateParameterTypes(ctx, ts.Params).ViaField("params"))
 	errs = errs.Also(ValidateParameterVariables(ctx, ts.Steps, ts.Params))
 	errs = errs.Also(validateTaskContextVariables(ctx, ts.Steps))
@@ -99,6 +101,18 @@ func (ts *TaskSpec) Validate(ctx context.Context) (errs *apis.FieldError) {
 	return errs
 }
 
+func validateSidecarNames(ctx context.Context, sidecars []Sidecar) (errs *apis.FieldError) {
+	for _, sc := range sidecars {
+		if config.FromContextOrDefaults(ctx).FeatureFlags.ResultExtractionMethod == config.ResultExtractionMethodSidecarLogs && sc.Name == pipeline.ReservedResultsSidecarName {
+			errs = errs.Also(&apis.FieldError{
+				Message: fmt.Sprintf("Invalid sidecar name %v. This is reserved by the controller because the results-from feature flag has been set to %v", sc.Name, config.ResultExtractionMethodSidecarLogs),
+				Paths:   []string{"sidecars"},
+			})
+		}
+	}
+	return errs
+}
+
 func validateResults(ctx context.Context, results []TaskResult) (errs *apis.FieldError) {
 	for index, result := range results {
 		errs = errs.Also(result.Validate(ctx).ViaIndex(index))
diff --git a/pkg/apis/pipeline/v1/task_validation_test.go b/pkg/apis/pipeline/v1/task_validation_test.go
index 193e43fb2be..2fcc766a756 100644
--- a/pkg/apis/pipeline/v1/task_validation_test.go
+++ b/pkg/apis/pipeline/v1/task_validation_test.go
@@ -1298,6 +1298,54 @@ func TestTaskSpecValidateError(t *testing.T) {
 	}
 }
 
+func TestTaskSpecValidateErrorSidecarName(t *testing.T) {
+	tests := []struct {
+		name                   string
+		sidecars               []v1.Sidecar
+		resultExtractionMethod string
+		expectedError          apis.FieldError
+	}{{
+		name: "cannot use reserved sidecar name",
+		sidecars: []v1.Sidecar{{
+			Name:  "tekton-log-results",
+			Image: "my-image",
+		}},
+		resultExtractionMethod: "sidecar-logs",
+		expectedError: apis.FieldError{
+			// ": sidecars\nmissing field(s): step"
+			//  - 	" \nmissing field(s): step: sidecar",
+			Message: fmt.Sprintf("Invalid sidecar name tekton-log-results. This is reserved by the controller because the results-from feature flag has been set to %v", config.ResultExtractionMethodSidecarLogs),
+			Paths:   []string{"sidecars"},
+		},
+	}}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			ctx := context.Background()
+			if tt.resultExtractionMethod != "" {
+				ctx = config.ToContext(ctx, &config.Config{
+					FeatureFlags: &config.FeatureFlags{
+						ResultExtractionMethod: tt.resultExtractionMethod,
+					},
+				})
+			}
+			ts := &v1.TaskSpec{
+				Steps: []v1.Step{{
+					Name:  "does-not-matter",
+					Image: "does-not-matter",
+				}},
+				Sidecars: tt.sidecars,
+			}
+			err := ts.Validate(ctx)
+			if err == nil {
+				t.Fatalf("Expected an error, got nothing for %v", ts)
+			}
+			if d := cmp.Diff(tt.expectedError.Error(), err.Error(), cmpopts.IgnoreUnexported(apis.FieldError{})); d != "" {
+				t.Errorf("TaskSpec.Validate() errors diff %s", diff.PrintWantGot(d))
+			}
+		})
+	}
+}
+
 func TestStepAndSidecarWorkspaces(t *testing.T) {
 	type fields struct {
 		Steps      []v1.Step
diff --git a/pkg/apis/pipeline/v1beta1/task_validation.go b/pkg/apis/pipeline/v1beta1/task_validation.go
index a64d5bdb81a..00949a387ab 100644
--- a/pkg/apis/pipeline/v1beta1/task_validation.go
+++ b/pkg/apis/pipeline/v1beta1/task_validation.go
@@ -25,6 +25,7 @@ import (
 	"time"
 
 	"github.com/tektoncd/pipeline/pkg/apis/config"
+	"github.com/tektoncd/pipeline/pkg/apis/pipeline"
 	"github.com/tektoncd/pipeline/pkg/apis/validate"
 	"github.com/tektoncd/pipeline/pkg/apis/version"
 	"github.com/tektoncd/pipeline/pkg/substitution"
@@ -92,6 +93,7 @@ func (ts *TaskSpec) Validate(ctx context.Context) (errs *apis.FieldError) {
 	}
 
 	errs = errs.Also(validateSteps(ctx, mergedSteps).ViaField("steps"))
+	errs = errs.Also(validateSidecarNames(ctx, ts.Sidecars))
 	errs = errs.Also(ts.Resources.Validate(ctx).ViaField("resources"))
 	errs = errs.Also(ValidateParameterTypes(ctx, ts.Params).ViaField("params"))
 	errs = errs.Also(ValidateParameterVariables(ctx, ts.Steps, ts.Params))
@@ -101,6 +103,18 @@ func (ts *TaskSpec) Validate(ctx context.Context) (errs *apis.FieldError) {
 	return errs
 }
 
+func validateSidecarNames(ctx context.Context, sidecars []Sidecar) (errs *apis.FieldError) {
+	for _, sc := range sidecars {
+		if config.FromContextOrDefaults(ctx).FeatureFlags.ResultExtractionMethod == config.ResultExtractionMethodSidecarLogs && sc.Name == pipeline.ReservedResultsSidecarName {
+			errs = errs.Also(&apis.FieldError{
+				Message: fmt.Sprintf("Invalid sidecar name %v. This is reserved by the controller because the results-from feature flag has been set to %v", sc.Name, config.ResultExtractionMethodSidecarLogs),
+				Paths:   []string{"sidecars"},
+			})
+		}
+	}
+	return errs
+}
+
 func validateResults(ctx context.Context, results []TaskResult) (errs *apis.FieldError) {
 	for index, result := range results {
 		errs = errs.Also(result.Validate(ctx).ViaIndex(index))
diff --git a/pkg/apis/pipeline/v1beta1/task_validation_test.go b/pkg/apis/pipeline/v1beta1/task_validation_test.go
index 391cea3eb8b..bec255cf2d8 100644
--- a/pkg/apis/pipeline/v1beta1/task_validation_test.go
+++ b/pkg/apis/pipeline/v1beta1/task_validation_test.go
@@ -1411,6 +1411,54 @@ func TestTaskSpecValidateError(t *testing.T) {
 	}
 }
 
+func TestTaskSpecValidateErrorSidecarName(t *testing.T) {
+	tests := []struct {
+		name                   string
+		sidecars               []v1beta1.Sidecar
+		resultExtractionMethod string
+		expectedError          apis.FieldError
+	}{{
+		name: "cannot use reserved sidecar name",
+		sidecars: []v1beta1.Sidecar{{
+			Name:  "tekton-log-results",
+			Image: "my-image",
+		}},
+		resultExtractionMethod: "sidecar-logs",
+		expectedError: apis.FieldError{
+			// ": sidecars\nmissing field(s): step"
+			//  - 	" \nmissing field(s): step: sidecar",
+			Message: fmt.Sprintf("Invalid sidecar name tekton-log-results. This is reserved by the controller because the results-from feature flag has been set to %v", config.ResultExtractionMethodSidecarLogs),
+			Paths:   []string{"sidecars"},
+		},
+	}}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			ctx := context.Background()
+			if tt.resultExtractionMethod != "" {
+				ctx = config.ToContext(ctx, &config.Config{
+					FeatureFlags: &config.FeatureFlags{
+						ResultExtractionMethod: tt.resultExtractionMethod,
+					},
+				})
+			}
+			ts := &v1beta1.TaskSpec{
+				Steps: []v1beta1.Step{{
+					Name:  "does-not-matter",
+					Image: "does-not-matter",
+				}},
+				Sidecars: tt.sidecars,
+			}
+			err := ts.Validate(ctx)
+			if err == nil {
+				t.Fatalf("Expected an error, got nothing for %v", ts)
+			}
+			if d := cmp.Diff(tt.expectedError.Error(), err.Error(), cmpopts.IgnoreUnexported(apis.FieldError{})); d != "" {
+				t.Errorf("TaskSpec.Validate() errors diff %s", diff.PrintWantGot(d))
+			}
+		})
+	}
+}
+
 func TestStepAndSidecarWorkspaces(t *testing.T) {
 	type fields struct {
 		Steps      []v1beta1.Step
diff --git a/test/e2e-tests-kind-prow-alpha.env b/test/e2e-tests-kind-prow-alpha.env
index eacb0ef5eb6..b1db6ff3546 100644
--- a/test/e2e-tests-kind-prow-alpha.env
+++ b/test/e2e-tests-kind-prow-alpha.env
@@ -4,3 +4,4 @@ EMBEDDED_STATUS_GATE=minimal
 RUN_YAML_TESTS=true
 KO_DOCKER_REPO=registry.local:5000
 E2E_GO_TEST_TIMEOUT=40m
+RESULTS_FROM=sidecar-logs
diff --git a/test/e2e-tests.sh b/test/e2e-tests.sh
index b553452f46c..459cd7ecbd4 100755
--- a/test/e2e-tests.sh
+++ b/test/e2e-tests.sh
@@ -27,6 +27,7 @@ SKIP_INITIALIZE=${SKIP_INITIALIZE:="false"}
 RUN_YAML_TESTS=${RUN_YAML_TESTS:="true"}
 SKIP_GO_E2E_TESTS=${SKIP_GO_E2E_TESTS:="false"}
 E2E_GO_TEST_TIMEOUT=${E2E_GO_TEST_TIMEOUT:="20m"}
+RESULTS_FROM=${RESULTS_FROM:-termination-message}
 failed=0
 
 # Script entry point.
@@ -75,6 +76,18 @@ function set_embedded_status() {
   kubectl patch configmap feature-flags -n tekton-pipelines -p "$jsonpatch"
 }
 
+function set_result_extraction_method() {
+  local method="$1"
+  if [ "$method" != "termination-message" ] && [ "$method" != "sidecar-logs" ]; then
+    printf "Invalid value for results-from %s\n" ${method}
+    exit 255
+  fi
+  printf "Setting results-from to %s\n", ${method}
+  jsonpatch=$(printf "{\"data\": {\"results-from\": \"%s\"}}" $1)
+  echo "feature-flags ConfigMap patch: ${jsonpatch}"
+  kubectl patch configmap feature-flags -n tekton-pipelines -p "$jsonpatch"
+}
+
 function run_e2e() {
   # Run the integration tests
   header "Running Go e2e tests"
@@ -93,6 +106,7 @@ function run_e2e() {
 
 set_feature_gate "$PIPELINE_FEATURE_GATE"
 set_embedded_status "$EMBEDDED_STATUS_GATE"
+set_result_extraction_method "$RESULTS_FROM"
 run_e2e
 
 (( failed )) && fail_test