Skip to content

Commit

Permalink
Larger results using sidecar logs
Browse files Browse the repository at this point in the history
Prior to this, we were extracting results from tasks via the termination messages which had a limit of only 4 KB per pod. If users had many results then the results would need to become smaller to obey the upper limit of 4 KB.

We now run a dedicated sidecar that has access to the results of all the steps. This sidecar prints out the result and its content to stdout. The logs of the sidecar are parsed by the taskrun controller and the results updated instead of termination logs. We set an upper limit on the results to 1KB but users can have as many such results as needed.
  • Loading branch information
chitrangpatel committed Nov 3, 2022
1 parent 2d38f5f commit 2c61ea8
Show file tree
Hide file tree
Showing 27 changed files with 807 additions and 32 deletions.
1 change: 1 addition & 0 deletions cmd/controller/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ func main() {

opts := &pipeline.Options{}
flag.StringVar(&opts.Images.EntrypointImage, "entrypoint-image", "", "The container image containing our entrypoint binary.")
flag.StringVar(&opts.Images.SidecarLogResultsImage, "sidecarlogresults-image", "", "The container image containing the binary for accessing results.")
flag.StringVar(&opts.Images.NopImage, "nop-image", "", "The container image used to stop sidecars")
flag.StringVar(&opts.Images.GitImage, "git-image", "", "The container image containing our Git binary.")
flag.StringVar(&opts.Images.KubeconfigWriterImage, "kubeconfig-writer-image", "", "The container image containing our kubeconfig writer binary.")
Expand Down
22 changes: 12 additions & 10 deletions cmd/entrypoint/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,10 @@ var (
breakpointOnFailure = flag.Bool("breakpoint_on_failure", false, "If specified, expect steps to not skip on failure")
onError = flag.String("on_error", "", "Set to \"continue\" to ignore an error and continue when a container terminates with a non-zero exit code."+
" Set to \"stopAndFail\" to declare a failure with a step error and stop executing the rest of the steps.")
stepMetadataDir = flag.String("step_metadata_dir", "", "If specified, create directory to store the step metadata e.g. /tekton/steps/<step-name>/")
enableSpire = flag.Bool("enable_spire", false, "If specified by configmap, this enables spire signing and verification")
socketPath = flag.String("spire_socket_path", "unix:///spiffe-workload-api/spire-agent.sock", "Experimental: The SPIRE agent socket for SPIFFE workload API.")
stepMetadataDir = flag.String("step_metadata_dir", "", "If specified, create directory to store the step metadata e.g. /tekton/steps/<step-name>/")
enableSpire = flag.Bool("enable_spire", false, "If specified by configmap, this enables spire signing and verification")
socketPath = flag.String("spire_socket_path", "unix:///spiffe-workload-api/spire-agent.sock", "Experimental: The SPIRE agent socket for SPIFFE workload API.")
dontSendResultsToTerminationPath = flag.Bool("dont_send_results_to_termination_path", false, "If specified, dont send results to the termination path.")
)

const (
Expand Down Expand Up @@ -154,13 +155,14 @@ func main() {
stdoutPath: *stdoutPath,
stderrPath: *stderrPath,
},
PostWriter: &realPostWriter{},
Results: strings.Split(*results, ","),
Timeout: timeout,
BreakpointOnFailure: *breakpointOnFailure,
OnError: *onError,
StepMetadataDir: *stepMetadataDir,
SpireWorkloadAPI: spireWorkloadAPI,
PostWriter: &realPostWriter{},
Results: strings.Split(*results, ","),
Timeout: timeout,
BreakpointOnFailure: *breakpointOnFailure,
OnError: *onError,
StepMetadataDir: *stepMetadataDir,
SpireWorkloadAPI: spireWorkloadAPI,
DontSendResultsToTerminationPath: *dontSendResultsToTerminationPath,
}

// Copy any creds injected by the controller into the $HOME directory of the current
Expand Down
40 changes: 40 additions & 0 deletions cmd/sidecarlogresults/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
Copyright 2019 The Tekton Authors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package main

import (
"flag"
"log"

"github.com/tektoncd/pipeline/pkg/apis/pipeline"
"github.com/tektoncd/pipeline/pkg/sidecarlogresults"
)

func main() {
var resultsDir string
var resultNames string
flag.StringVar(&resultsDir, "results-dir", pipeline.DefaultResultPath, "Path to the results directory. Default is /tekton/results")
flag.StringVar(&resultNames, "result-names", "", "comma separated result names to expect from the steps running in the pod. eg. foo,bar,baz")
flag.Parse()
if resultNames == "" {
log.Fatal("result-names were not provided")
}
err := sidecarlogresults.LookForResults(resultsDir, resultNames)
if err != nil {
log.Fatal(err)
}
}
1 change: 1 addition & 0 deletions config/controller.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ spec:
"-kubeconfig-writer-image", "ko://github.com/tektoncd/pipeline/cmd/kubeconfigwriter",
"-git-image", "ko://github.com/tektoncd/pipeline/cmd/git-init",
"-entrypoint-image", "ko://github.com/tektoncd/pipeline/cmd/entrypoint",
"-sidecarlogresults-image", "ko://github.com/tektoncd/pipeline/cmd/sidecarlogresults",
"-nop-image", "ko://github.com/tektoncd/pipeline/cmd/nop",
"-imagedigest-exporter-image", "ko://github.com/tektoncd/pipeline/cmd/imagedigestexporter",
"-pr-image", "ko://github.com/tektoncd/pipeline/cmd/pullrequest-init",
Expand Down
13 changes: 13 additions & 0 deletions config/enable-log-access-to-controller/clusterrole.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: tekton-pipelines-controller-pod-log-access
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/instance: default
app.kubernetes.io/part-of: tekton-pipelines
rules:
- apiGroups: [""]
# Controller needs to get the logs of the results sidecar created by TaskRuns to extract results.
resources: ["pods/log"]
verbs: ["get"]
16 changes: 16 additions & 0 deletions config/enable-log-access-to-controller/clusterrolebinding.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: tekton-pipelines-controller-pod-log-access
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/instance: default
app.kubernetes.io/part-of: tekton-pipelines
subjects:
- kind: ServiceAccount
name: tekton-pipelines-controller
namespace: tekton-pipelines
roleRef:
kind: ClusterRole
name: tekton-pipelines-controller-pod-log-access
apiGroup: rbac.authorization.k8s.io
51 changes: 51 additions & 0 deletions docs/install.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ This guide explains how to install Tekton Pipelines. It covers the following top
- [Customizing the Pipelines Controller behavior](#customizing-the-pipelines-controller-behavior)
- [Alpha Features](#alpha-features)
- [Beta Features](#beta-features)
- [Enabling larger results using sidecar logs](#enabling-larger-results-using-sidecar-logs)
- [Configuring High Availability](#configuring-high-availability)
- [Configuring tekton pipeline controller performance](#configuring-tekton-pipeline-controller-performance)
- [Creating a custom release of Tekton Pipelines](#creating-a-custom-release-of-tekton-pipelines)
Expand Down Expand Up @@ -421,6 +422,7 @@ features](#alpha-features) to be used.
do both. For more information, see [Configuring usage of `TaskRun` and `Run` embedded statuses](pipelineruns.md#configuring-usage-of-taskrun-and-run-embedded-statuses).

- `resource-verification-mode`: Setting this flag to "enforce" will enforce verification of tasks/pipeline. Failing to verify will fail the taskrun/pipelinerun. "warn" will only log the err message and "skip" will skip the whole verification.
- `enable-sidecar-logs-results`: Set this flag to "true" to enable use of a results sidecar logs to extract results larger than the size of the termination message. While termination message restrics the combined size of results to 4K per pod, enabling this feature will allow 1K per result (as many results as required).

For example:

Expand Down Expand Up @@ -470,6 +472,55 @@ the `feature-flags` ConfigMap alongside your Tekton Pipelines deployment via

For beta versions of Tekton CRDs, setting `enable-api-fields` to "beta" is the same as setting it to "stable".

## Enabling larger results using sidecar logs

**Note**: The maximum size of a Task's results is limited by the container termination message feature of Kubernetes, as results are passed back to the controller via this mechanism. At present, the limit is “4096 bytes”.
To exceed this limit of 4096 bytes, you can enable larger results using sidecar logs. By enabling this feature, you will have a limit of 1024 bytes per result with no restriction on the number of results.
**Note**: to enable this feature, you need to grant `get` access to all `pods/log` to the `Tekton pipeline controller`. This means that the tekton pipeline controller has the ability to access the pod logs.
1. Create a cluster role by applying the following spec.
```yaml
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: tekton-pipelines-controller-pod-log-access
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/instance: default
app.kubernetes.io/part-of: tekton-pipelines
rules:
- apiGroups: [""]
# Controller needs to get the logs of the results sidecar created by TaskRuns to extract results.
resources: ["pods/log"]
verbs: ["get"]
```
2. Create a cluster role binding by applying the folowing spec.
```yaml
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: tekton-pipelines-controller-pod-log-access
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/instance: default
app.kubernetes.io/part-of: tekton-pipelines
subjects:
- kind: ServiceAccount
name: tekton-pipelines-controller
namespace: tekton-pipelines
roleRef:
kind: ClusterRole
name: tekton-pipelines-controller-pod-log-access
apiGroup: rbac.authorization.k8s.io
```
3. Enable the feature flag to use sidecar logs by setting `enable-sidecar-logs-results: "true"` in the [configMap](#customizing-the-pipelines-controller-behavior).
## Configuring High Availability
If you want to run Tekton Pipelines in a way so that webhooks are resiliant against failures and support
Expand Down
12 changes: 11 additions & 1 deletion docs/tasks.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ weight: 200
- [Specifying `Resources`](#specifying-resources)
- [Specifying `Workspaces`](#specifying-workspaces)
- [Emitting `Results`](#emitting-results)
- [Larger `Results` using sidecar logs](#larger-results-using-sidecar-logs)
- [Specifying `Volumes`](#specifying-volumes)
- [Specifying a `Step` template](#specifying-a-step-template)
- [Specifying `Sidecars`](#specifying-sidecars)
Expand Down Expand Up @@ -835,7 +836,7 @@ This also means that the number of Steps in a Task affects the maximum size of a
as each Step is implemented as a container in the TaskRun's pod.
The more containers we have in our pod, *the smaller the allowed size of each container's
message*, meaning that the **more steps you have in a Task, the smaller the result for each step can be**.
For example, if you have 10 steps, the size of each step's Result will have a maximum of less than 1KB*.
For example, if you have 10 steps, the size of each step's Result will have a maximum of less than 1KB.

If your `Task` writes a large number of small results, you can work around this limitation
by writing each result from a separate `Step` so that each `Step` has its own termination message.
Expand All @@ -847,6 +848,15 @@ available size will less than 4096 bytes.
As a general rule-of-thumb, if a result needs to be larger than a kilobyte, you should likely use a
[`Workspace`](#specifying-workspaces) to store and pass it between `Tasks` within a `Pipeline`.

#### Larger `Results` using sidecar logs

This is an experimental feature. The `enable-sidecar-logs-results` feature flag must be set to `"true"`](./install.md#enabling-larger-results-using-sidecar-logs)

Instead of using termination messages to store results, the taskrun controller injects a sidecar container which monitors the results of all the steps. The sidecar mounts the volume where results of all the steps are stored. As soon as it finds a new result, it logs it to std out. The controller has access to the logs of the sidecar container (Caution: we need you to enable access to [kubernetes pod/logs](./install.md#enabling-larger-results-using-sidecar-logs).

**Note**: This feature allows users to store up to `1 KB per result`. Because we are not limited by the size of the termination messages, users can have as many results as they require where each result can be up to 1 KB in size. If the size of a result exceeds 1KB, then the TaskRun will be placed into a failed state with the following message: `Result exceeded the maximum allowed limit of 1024 bytes.`


### Specifying `Volumes`

Specifies one or more [`Volumes`](https://kubernetes.io/docs/concepts/storage/volumes/) that the `Steps` in your
Expand Down
2 changes: 1 addition & 1 deletion examples/v1beta1/pipelineruns/4808-regression.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -92,4 +92,4 @@ spec:
name: result-test
params:
- name: RESULT_STRING_LENGTH
value: "3000"
value: "1000"
41 changes: 41 additions & 0 deletions examples/v1beta1/pipelineruns/alpha/pipelinerun-large-results.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
apiVersion: tekton.dev/v1beta1
kind: Task
metadata:
name: large-result-task
spec:
results:
- name: result1
- name: result2
- name: result3
- name: result4
- name: result5
steps:
- name: step1
image: alpine
script: |
cat /dev/urandom | head -c 750 | base64 | tee $(results.result1.path);
cat /dev/urandom | head -c 750 | base64 | tee $(results.result2.path);
cat /dev/urandom | head -c 750 | base64 | tee $(results.result3.path);
cat /dev/urandom | head -c 750 | base64 | tee $(results.result4.path);
cat /dev/urandom | head -c 750 | base64 | tee $(results.result5.path);
---
apiVersion: tekton.dev/v1beta1
kind: Pipeline
metadata:
name: large-result-pipeline
spec:
tasks:
- name: large-task
taskRef:
name: large-result-task
results:
- name: large-result
value: $(tasks.large-task.results.result1)
---
apiVersion: tekton.dev/v1beta1
kind: PipelineRun
metadata:
name: large-result-pipeline-run
spec:
pipelineRef:
name: large-result-pipeline
28 changes: 28 additions & 0 deletions examples/v1beta1/taskruns/alpha/large-task-result.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
apiVersion: tekton.dev/v1beta1
kind: TaskRun
metadata:
generateName: larger-results-
spec:
taskSpec:
description: |
A task that creates results > termination message limit of 4K per pod!
results:
- name: result1
- name: result2
- name: result3
- name: result4
- name: result5
steps:
- name: step1
image: bash:latest
script: |
#!/usr/bin/env bash
cat /dev/urandom | head -c 750 | base64 | tee /tekton/results/result1 #about 1 K result
cat /dev/urandom | head -c 750 | base64 | tee /tekton/results/result2 #about 1 K result
- name: step2
image: bash:latest
script: |
#!/usr/bin/env bash
cat /dev/urandom | head -c 750 | base64 | tee /tekton/results/result3 #about 1 K result
cat /dev/urandom | head -c 750 | base64 | tee /tekton/results/result4 #about 1 K result
cat /dev/urandom | head -c 750 | base64 | tee /tekton/results/result5 #about 1 K result
7 changes: 7 additions & 0 deletions pkg/apis/config/feature_flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ const (
DefaultEnableSpire = false
// DefaultResourceVerificationMode is the default value for "resource-verification-mode".
DefaultResourceVerificationMode = SkipResourceVerificationMode
// DefaultSidecarLogsResults is the default value for "enable-larger-results".
DefaultSidecarLogsResults = false

disableAffinityAssistantKey = "disable-affinity-assistant"
disableCredsInitKey = "disable-creds-init"
Expand All @@ -87,6 +89,7 @@ const (
embeddedStatus = "embedded-status"
enableSpire = "enable-spire"
verificationMode = "resource-verification-mode"
enableSidecarLogsResults = "enable-sidecar-logs-results"
)

// FeatureFlags holds the features configurations
Expand All @@ -105,6 +108,7 @@ type FeatureFlags struct {
EmbeddedStatus string
EnableSpire bool
ResourceVerificationMode string
EnableSidecarLogsResults bool
}

// GetFeatureFlagsConfigName returns the name of the configmap containing all
Expand Down Expand Up @@ -159,6 +163,9 @@ func NewFeatureFlagsFromMap(cfgMap map[string]string) (*FeatureFlags, error) {
if err := setResourceVerificationMode(cfgMap, DefaultResourceVerificationMode, &tc.ResourceVerificationMode); err != nil {
return nil, err
}
if err := setFeature(enableSidecarLogsResults, DefaultSidecarLogsResults, &tc.EnableSidecarLogsResults); err != nil {
return nil, err
}

// Given that they are alpha features, Tekton Bundles and Custom Tasks should be switched on if
// enable-api-fields is "alpha". If enable-api-fields is not "alpha" then fall back to the value of
Expand Down
3 changes: 3 additions & 0 deletions pkg/apis/pipeline/images.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ import (
type Images struct {
// EntrypointImage is container image containing our entrypoint binary.
EntrypointImage string
// SidecarLogResultsImage is container image containing the binary that fetches results from the steps and logs it to stdout.
SidecarLogResultsImage string
// NopImage is the container image used to kill sidecars.
NopImage string
// GitImage is the container image with Git that we use to implement the Git source step.
Expand Down Expand Up @@ -55,6 +57,7 @@ func (i Images) Validate() error {
v, name string
}{
{i.EntrypointImage, "entrypoint-image"},
{i.SidecarLogResultsImage, "sidecarlogresults-image"},
{i.NopImage, "nop-image"},
{i.GitImage, "git-image"},
{i.KubeconfigWriterImage, "kubeconfig-writer-image"},
Expand Down
2 changes: 2 additions & 0 deletions pkg/apis/pipeline/images_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
func TestValidate(t *testing.T) {
valid := pipeline.Images{
EntrypointImage: "set",
SidecarLogResultsImage: "set",
NopImage: "set",
GitImage: "set",
KubeconfigWriterImage: "set",
Expand All @@ -25,6 +26,7 @@ func TestValidate(t *testing.T) {

invalid := pipeline.Images{
EntrypointImage: "set",
SidecarLogResultsImage: "set",
NopImage: "set",
GitImage: "", // unset!
KubeconfigWriterImage: "set",
Expand Down
2 changes: 2 additions & 0 deletions pkg/apis/pipeline/v1beta1/taskrun_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,8 @@ const (
TaskRunReasonsResultsVerificationFailed TaskRunReason = "TaskRunResultsVerificationFailed"
// AwaitingTaskRunResults is the reason set when waiting upon `TaskRun` results and signatures to verify
AwaitingTaskRunResults TaskRunReason = "AwaitingTaskRunResults"
// TaskRunReasonResultLargerThanAllowedLimit is the reason set when one of the results exceeds its maximum allowed limit of 1 KB
TaskRunReasonResultLargerThanAllowedLimit TaskRunReason = "TaskRunResultLargerThanAllowedLimit"
)

func (t TaskRunReason) String() string {
Expand Down
Loading

0 comments on commit 2c61ea8

Please sign in to comment.