Skip to content

Commit

Permalink
Update sources.sh to use bashbrew 0.1.10's ArchGitChecksum
Browse files Browse the repository at this point in the history
This makes a *very* dramatic difference in the speed of `sources.sh`.

Before:

```console
$ time .scripts/sources.sh $(cat subset.txt) > sources.json
...
real	6m13.444s
user	2m51.334s
sys	3m29.256s
```

After:

```console
$ time .scripts/sources.sh $(cat subset.txt) > sources.json
...
real	0m31.238s
user	0m32.769s
sys	0m1.375s
```
  • Loading branch information
tianon committed Jan 22, 2024
1 parent 19ca057 commit 59b029c
Show file tree
Hide file tree
Showing 6 changed files with 48 additions and 153 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ jobs:
run: |
# not doing "uses: docker-library/bashbrew@xxx" because it'll build which is slow and we don't need more than just bashbrew here
mkdir .bin
wget -O .bin/bashbrew 'https://github.com/docker-library/bashbrew/releases/download/v0.1.9/bashbrew-amd64'
echo '8cdd7adc707b972040577006f7a05b8e9d4dd362be5069f862fd1885f2eb107a *.bin/bashbrew' | sha256sum --strict --check -
wget -O .bin/bashbrew 'https://github.com/docker-library/bashbrew/releases/download/v0.1.11/bashbrew-amd64'
echo '6203635644d0efef2886f8ea9c487995a7abc4166db7a4773e94f89c943a4b04 *.bin/bashbrew' | sha256sum --strict --check -
chmod +x .bin/bashbrew
.bin/bashbrew --version
echo "$PWD/.bin" >> "$GITHUB_PATH"
Expand Down
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
builds
tar-scrubber
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ go 1.21

require (
github.com/containerd/containerd v1.6.19
github.com/docker-library/bashbrew v0.1.9
github.com/docker-library/bashbrew v0.1.11
github.com/opencontainers/image-spec v1.1.0-rc2.0.20221013174636-8159c8264e2e
github.com/sirupsen/logrus v1.9.0
golang.org/x/time v0.5.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ github.com/containerd/containerd v1.6.19/go.mod h1:HZCDMn4v/Xl2579/MvtOC2M206i+J
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/docker-library/bashbrew v0.1.9 h1:mDRel5sLJzpWbbnacZ2Y0RL6pRsCD/DI57vwEIX0uHQ=
github.com/docker-library/bashbrew v0.1.9/go.mod h1:fp+ljAv22z5OK3k7gutU/1eZz0lIRnM6SSvfJd9ABEE=
github.com/docker-library/bashbrew v0.1.11 h1:9S6jYFu0+RaqEAfvS2lh7jcaDkcvFi2maB2aU3yb0TM=
github.com/docker-library/bashbrew v0.1.11/go.mod h1:6fyRRSm4vgBAgTw87EsfOT7wXKsc4JA9I5cdQJmwOm8=
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw=
github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
Expand Down
138 changes: 43 additions & 95 deletions sources.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,6 @@ defaultArchNamespaces='
: "${BASHBREW_ARCH_NAMESPACES=$defaultArchNamespaces}"
export BASHBREW_ARCH_NAMESPACES

dir="$(dirname "$BASH_SOURCE")"
dir="$(readlink -ve "$dir")"
if [ "$dir/tar-scrubber.go" -nt "$dir/tar-scrubber" ] || [ "$dir/.go-env.sh" -nt "$dir/tar-scrubber" ]; then
{
echo "building '$dir/tar-scrubber' from 'tar-scrubber.go'"
"$dir/.go-env.sh" go build -v -o tar-scrubber tar-scrubber.go
ls -l "$dir/tar-scrubber"
} >&2
fi
[ -x "$dir/tar-scrubber" ]
export tarScrubber="$dir/tar-scrubber"

# let's resolve all the external pins so we can inject those too
libraryDir="${BASHBREW_LIBRARY:-"$HOME/docker/official-images/library"}"
libraryDir="$(readlink -ve "$libraryDir")"
Expand All @@ -51,99 +39,58 @@ for tag in $externalPins; do
externalPinsJson="$(jq <<<"$externalPinsJson" -c --arg tag "${tag#library/}" --arg digest "$digest" '.[$tag] = $digest')"
done

_sha256() {
sha256sum "$@" | cut -d' ' -f1
}

json="$(
bashbrew cat --build-order --format '
{{- range $e := .SortedEntries false -}}
{{- range $a := $e.Architectures -}}
{{- $archNs := archNamespace $a -}}
{{- with $e -}}
{
"repo": {{ $.RepoName | json }},
"arch": {{ $a | json }},
"platformString": {{ (ociPlatform $a).String | json }},
"platform": {{ ociPlatform $a | json }},
"gitCache": {{ gitCache | json }},
"tags": {{ $.Tags namespace false . | json }},
"archTags": {{ if $archNs -}} {{ $.Tags $archNs false . | json }} {{- else -}} [] {{- end }},
bashbrew cat --build-order --format '
{{- range $e := .SortedEntries false -}}
{{- range $a := $e.Architectures -}}
{{- $archNs := archNamespace $a -}}
{{- with $e -}}
{{- $sum := $.ArchGitChecksum $a . -}}
{{- $file := .ArchFile $a -}}
{{- $builder := .ArchBuilder $a -}}
{
"sourceId": {{ join "\n" $sum $file $builder "" | sha256sum | json }},
"reproducibleGitChecksum": {{ $sum | json }},
"allTags": [],
"entry": {
"GitRepo": {{ .ArchGitRepo $a | json }},
"GitFetch": {{ .ArchGitFetch $a | json }},
"GitCommit": {{ .ArchGitCommit $a | json }},
"Directory": {{ .ArchDirectory $a | json }},
"File": {{ .ArchFile $a | json }},
"Builder": {{ .ArchBuilder $a | json }},
"froms": {{ $.ArchDockerFroms $a . | json }}
"File": {{ $file | json }},
"Builder": {{ $builder | json }},
"SOURCE_DATE_EPOCH": {{ ($.ArchGitTime $a .).Unix | json }}
},
"arches": {
{{ $a | json }}: {
"tags": {{ $.Tags namespace false . | json }},
"archTags": {{ if $archNs -}} {{ $.Tags $archNs false . | json }} {{- else -}} [] {{- end }},
"froms": {{ $.ArchDockerFroms $a . | json }},
"platformString": {{ (ociPlatform $a).String | json }},
"platform": {{ ociPlatform $a | json }},
"parents": { }
}
}
{{- end -}}
}
{{- end -}}
{{- end -}}
' "$@"
)"

shell="$(
jq <<<"$json" -r '
. as $e
| {
reproducibleGitChecksum: (
[
# TODO do this inside bashbrew? (could then use go-git to make an even more determistic tarball instead of munging Git afterwards, and could even do things like munge the Dockerfile to remove no-rebuild variance like comments and non-COPY-ed files)
"git -C \(.gitCache | @sh) archive --format=tar \(.GitCommit + ":" + (.Directory | if . == "." then "" else . + "/" end) | @sh)",
"\(env.tarScrubber | @sh) --sha256",
empty
] | join(" | ")
),
sourceId: "printf \("%s\\n" | @sh) \"$reproducibleGitChecksum\" \(.File | @sh) \(.Builder | @sh) | _sha256", # the combination of things that might cause a rebuild # TODO consider making this a compressed JSON object like buildId
SOURCE_DATE_EPOCH: "git -C \(.gitCache | @sh) show --no-patch --format=format:%ct \(.GitCommit | @sh)",
}
| to_entries
| [
"printf >&2 \("%s (%s): " | @sh) \($e.tags[0]) \($e.arch)",
empty
]
+ map(.key + "=\"$(" + .value + ")\"")
+ [
"export \(map(.key) | join(" "))",
"printf >&2 \("%s\\n" | @sh) \"$sourceId\"",
(
$e
| {
allTags: (.tags + .archTags),
entry: {
GitRepo: .GitRepo,
GitFetch: .GitFetch,
GitCommit: .GitCommit,
Directory: .Directory,
File: .File,
Builder: .Builder,
},
arches: {
(.arch): {
tags: .tags,
archTags: .archTags,
froms: .froms,
platformString: .platformString,
platform: .platform,
},
},
} as $obj
| "jq <<<\($obj | tojson | @sh) -c \("{ sourceId: env.sourceId, reproducibleGitChecksum: env.reproducibleGitChecksum } + . | .entry.SOURCE_DATE_EPOCH = (env.SOURCE_DATE_EPOCH | tonumber)" | @sh)"
),
empty
]
| join("\n")
'
)"
json="$(set -Eeuo pipefail; eval "$shell")"
jq <<<"$json" -s --argjson pins "$externalPinsJson" '
{{- end -}}
' "$@" | jq 3>&1 1>&2 2>&3- -r '
# https://github.com/jqlang/jq/issues/2063 - "stderr" cannot functionally output a string correctly until jq 1.7+ (which is very very recent), so we hack around it to get some progress output by using Bash to swap stdout and stderr so we can output our objects to stderr and our progress text to stdout and "fix it in post"
# TODO balk / error at multiple arches entries
first(.arches[].tags[], .arches[].archTags[]) as $tag
| first(.arches | keys_unsorted[]) as $arch
| stderr
| "\($tag) (\($arch)): \(.sourceId)"
# TODO if we could get jq 1.7+ for sure, we can drop this entire "jq" invocation and instead have the reduce loop of the following invocation print status strings directly to "stderr"
' | jq -n --argjson pins "$externalPinsJson" '
def unique_unsorted:
# https://unix.stackexchange.com/a/738744/153467
reduce .[] as $a ([]; if IN(.[]; $a) then . else . += [$a] end)
;
reduce .[] as $in ({};
.[$in.sourceId] |= (
reduce inputs as $in ({};
# TODO if we can get support for joining two arrays directly in bashbrew (sprig?) then we can avoid this .arches[] nonsense
($in | .allTags = ([ $in.arches[].tags[], $in.arches[].archTags[] ] | unique_unsorted)) as $in
| .[$in.sourceId] |=
if . == null then
$in
else
Expand All @@ -167,8 +114,9 @@ jq <<<"$json" -s --argjson pins "$externalPinsJson" '
.entry = $in.entry
else . end
end
)
)
# TODO a lot of this could be removed/parsed during the above reduce, since it has to parse things in build order anyhow
# TODO actually, instead, this bit should be a totally separate script so the use case of "combine sources.json files together" works better 👀
| (
reduce to_entries[] as $e ({};
$e.key as $sourceId
Expand Down
52 changes: 0 additions & 52 deletions tar-scrubber.go

This file was deleted.

0 comments on commit 59b029c

Please sign in to comment.