Skip to content

Commit

Permalink
Compute the layer size from tar-split for zstd:chunked layers
Browse files Browse the repository at this point in the history
Signed-off-by: Miloslav Trmač <[email protected]>
  • Loading branch information
mtrmac committed Oct 11, 2024
1 parent de3e71f commit 92824f9
Show file tree
Hide file tree
Showing 3 changed files with 112 additions and 26 deletions.
30 changes: 30 additions & 0 deletions pkg/chunked/compression_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,36 @@ func ensureTOCMatchesTarSplit(toc *internal.TOC, tarSplit []byte) error {
return nil
}

// tarSizeFromTarSplit computes the total tarball size, computing only tarSplit
func tarSizeFromTarSplit(tarSplit []byte) (int64, error) {
var res int64 = 0

unpacker := storage.NewJSONUnpacker(bytes.NewReader(tarSplit))
for {
entry, err := unpacker.Next()
if err != nil {
if err == io.EOF {
break
}
return -1, fmt.Errorf("reading tar-split entries: %w", err)
}
switch entry.Type {
case storage.SegmentType:
res += int64(len(entry.Payload))
case storage.FileType:
// entry.Size is the “logical size”, which might not be the physical size for sparse entries;
// but the way tar-split/tar/asm.WriteOutputTarStream combines FileType entries and returned files contents,
// sparse files are not supported.
// Also https://github.com/opencontainers/image-spec/blob/main/layer.md says
// > Sparse files SHOULD NOT be used because they lack consistent support across tar implementations.
res += entry.Size
default:
return -1, fmt.Errorf("unexpected tar-split entry type %q", entry.Type)
}
}
return res, nil
}

// ensureTimePointersMatch ensures that a and b are equal
func ensureTimePointersMatch(a, b *time.Time) error {
// We didn’t always use “timeIfNotZero” when creating the TOC, so treat time.IsZero the same as nil.
Expand Down
45 changes: 45 additions & 0 deletions pkg/chunked/compression_linux_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package chunked

import (
"bytes"
"io"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/vbatts/tar-split/archive/tar"
"github.com/vbatts/tar-split/tar/asm"
"github.com/vbatts/tar-split/tar/storage"
)

func TestTarSizeFromTarSplit(t *testing.T) {
var tarball bytes.Buffer
tarWriter := tar.NewWriter(&tarball)
for _, e := range someFiles {
tf, err := typeToTarType(e.Type)
require.NoError(t, err)
err = tarWriter.WriteHeader(&tar.Header{
Typeflag: tf,
Name: e.Name,
Size: e.Size,
Mode: e.Mode,
})
require.NoError(t, err)
data := make([]byte, e.Size)
_, err = tarWriter.Write(data)
require.NoError(t, err)
}
err := tarWriter.Close()
require.NoError(t, err)
expectedTarSize := tarball.Len()

var tarSplit bytes.Buffer
tsReader, err := asm.NewInputTarStream(&tarball, storage.NewJSONPacker(&tarSplit), storage.NewDiscardFilePutter())
require.NoError(t, err)
_, err = io.Copy(io.Discard, tsReader)
require.NoError(t, err)

res, err := tarSizeFromTarSplit(tarSplit.Bytes())
require.NoError(t, err)
assert.Equal(t, expectedTarSize, res)
}
63 changes: 37 additions & 26 deletions pkg/chunked/storage_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,8 @@ type chunkedDiffer struct {
// is no TOC referenced by the manifest.
blobDigest digest.Digest

blobSize int64
blobSize int64
uncompressedTarSize int64 // -1 if unknown

pullOptions map[string]string

Expand Down Expand Up @@ -216,6 +217,7 @@ func makeConvertFromRawDiffer(store storage.Store, blobDigest digest.Digest, blo
fsVerityDigests: make(map[string]string),
blobDigest: blobDigest,
blobSize: blobSize,
uncompressedTarSize: -1, // Will be computed later
convertToZstdChunked: true,
copyBuffer: makeCopyBuffer(),
layersCache: layersCache,
Expand All @@ -229,24 +231,33 @@ func makeZstdChunkedDiffer(store storage.Store, blobSize int64, tocDigest digest
if err != nil {
return nil, fmt.Errorf("read zstd:chunked manifest: %w", err)
}
var uncompressedTarSize int64 = -1
if tarSplit != nil {
uncompressedTarSize, err = tarSizeFromTarSplit(tarSplit)
if err != nil {
return nil, fmt.Errorf("computing size from tar-split")
}
}

layersCache, err := getLayersCache(store)
if err != nil {
return nil, err
}

return &chunkedDiffer{
fsVerityDigests: make(map[string]string),
blobSize: blobSize,
tocDigest: tocDigest,
copyBuffer: makeCopyBuffer(),
fileType: fileTypeZstdChunked,
layersCache: layersCache,
manifest: manifest,
toc: toc,
pullOptions: pullOptions,
stream: iss,
tarSplit: tarSplit,
tocOffset: tocOffset,
fsVerityDigests: make(map[string]string),
blobSize: blobSize,
uncompressedTarSize: uncompressedTarSize,
tocDigest: tocDigest,
copyBuffer: makeCopyBuffer(),
fileType: fileTypeZstdChunked,
layersCache: layersCache,
manifest: manifest,
toc: toc,
pullOptions: pullOptions,
stream: iss,
tarSplit: tarSplit,
tocOffset: tocOffset,
}, nil
}

Expand All @@ -261,16 +272,17 @@ func makeEstargzChunkedDiffer(store storage.Store, blobSize int64, tocDigest dig
}

return &chunkedDiffer{
fsVerityDigests: make(map[string]string),
blobSize: blobSize,
tocDigest: tocDigest,
copyBuffer: makeCopyBuffer(),
fileType: fileTypeEstargz,
layersCache: layersCache,
manifest: manifest,
pullOptions: pullOptions,
stream: iss,
tocOffset: tocOffset,
fsVerityDigests: make(map[string]string),
blobSize: blobSize,
uncompressedTarSize: -1, // We would have to read and decompress the whole layer
tocDigest: tocDigest,
copyBuffer: makeCopyBuffer(),
fileType: fileTypeEstargz,
layersCache: layersCache,
manifest: manifest,
pullOptions: pullOptions,
stream: iss,
tocOffset: tocOffset,
}, nil
}

Expand Down Expand Up @@ -1153,7 +1165,6 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff

var compressedDigest digest.Digest
var uncompressedDigest digest.Digest
var uncompressedSize int64 = -1

if c.convertToZstdChunked {
fd, err := unix.Open(dest, unix.O_TMPFILE|unix.O_RDWR|unix.O_CLOEXEC, 0o600)
Expand Down Expand Up @@ -1185,7 +1196,7 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
if err != nil {
return graphdriver.DriverWithDifferOutput{}, err
}
uncompressedSize = tarSize
c.uncompressedTarSize = tarSize
// fileSource is a O_TMPFILE file descriptor, so we
// need to keep it open until the entire file is processed.
defer fileSource.Close()
Expand Down Expand Up @@ -1255,7 +1266,7 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
TOCDigest: c.tocDigest,
UncompressedDigest: uncompressedDigest,
CompressedDigest: compressedDigest,
Size: uncompressedSize,
Size: c.uncompressedTarSize,
}

// When the hard links deduplication is used, file attributes are ignored because setting them
Expand Down

0 comments on commit 92824f9

Please sign in to comment.