-
Notifications
You must be signed in to change notification settings - Fork 243
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Correctly compute UncompressedSize on zstd:chunked pull, don’t set it on estargz #2130
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -139,7 +139,7 @@ func readEstargzChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, | |
} | ||
|
||
// readZstdChunkedManifest reads the zstd:chunked manifest from the seekable stream blobStream. | ||
// Returns (manifest blob, parsed manifest, tar-split blob, manifest offset). | ||
// Returns (manifest blob, parsed manifest, tar-split blob or nil, manifest offset). | ||
func readZstdChunkedManifest(blobStream ImageSourceSeekable, tocDigest digest.Digest, annotations map[string]string) ([]byte, *internal.TOC, []byte, int64, error) { | ||
offsetMetadata := annotations[internal.ManifestInfoKey] | ||
if offsetMetadata == "" { | ||
|
@@ -214,7 +214,7 @@ func readZstdChunkedManifest(blobStream ImageSourceSeekable, tocDigest digest.Di | |
return nil, nil, nil, 0, fmt.Errorf("unmarshaling TOC: %w", err) | ||
} | ||
|
||
decodedTarSplit := []byte{} | ||
var decodedTarSplit []byte = nil | ||
if toc.TarSplitDigest != "" { | ||
if tarSplitChunk.Offset <= 0 { | ||
return nil, nil, nil, 0, fmt.Errorf("TOC requires a tar-split, but the %s annotation does not describe a position", internal.TarSplitInfoKey) | ||
|
@@ -288,6 +288,36 @@ func ensureTOCMatchesTarSplit(toc *internal.TOC, tarSplit []byte) error { | |
return nil | ||
} | ||
|
||
// tarSizeFromTarSplit computes the total tarball size, using only the tarSplit metadata | ||
func tarSizeFromTarSplit(tarSplit []byte) (int64, error) { | ||
var res int64 = 0 | ||
|
||
unpacker := storage.NewJSONUnpacker(bytes.NewReader(tarSplit)) | ||
for { | ||
entry, err := unpacker.Next() | ||
if err != nil { | ||
if err == io.EOF { | ||
break | ||
} | ||
return -1, fmt.Errorf("reading tar-split entries: %w", err) | ||
} | ||
switch entry.Type { | ||
case storage.SegmentType: | ||
res += int64(len(entry.Payload)) | ||
case storage.FileType: | ||
// entry.Size is the “logical size”, which might not be the physical size for sparse entries; | ||
// but the way tar-split/tar/asm.WriteOutputTarStream combines FileType entries and returned files contents, | ||
// sparse files are not supported. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But we don't error out today? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we should be erroring out, that’s something that should happen at tar-split construction. The physical size outright isn’t available in the tar-split format, so here this consumer is only documenting the impact of that assumption. (As is, sparse files are barely supported by archive/tar or the tar-split fork: they are indicated either by a special file type, or as a regular file a special PAX record; and there is no API to expose the data / hole segments. Also, |
||
// Also https://github.com/opencontainers/image-spec/blob/main/layer.md says | ||
// > Sparse files SHOULD NOT be used because they lack consistent support across tar implementations. | ||
res += entry.Size | ||
default: | ||
return -1, fmt.Errorf("unexpected tar-split entry type %q", entry.Type) | ||
} | ||
} | ||
return res, nil | ||
} | ||
|
||
// ensureTimePointersMatch ensures that a and b are equal | ||
func ensureTimePointersMatch(a, b *time.Time) error { | ||
// We didn’t always use “timeIfNotZero” when creating the TOC, so treat time.IsZero the same as nil. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
package chunked | ||
|
||
import ( | ||
"bytes" | ||
"io" | ||
"testing" | ||
|
||
"github.com/stretchr/testify/assert" | ||
"github.com/stretchr/testify/require" | ||
"github.com/vbatts/tar-split/archive/tar" | ||
"github.com/vbatts/tar-split/tar/asm" | ||
"github.com/vbatts/tar-split/tar/storage" | ||
) | ||
|
||
func TestTarSizeFromTarSplit(t *testing.T) { | ||
var tarball bytes.Buffer | ||
tarWriter := tar.NewWriter(&tarball) | ||
for _, e := range someFiles { | ||
tf, err := typeToTarType(e.Type) | ||
require.NoError(t, err) | ||
err = tarWriter.WriteHeader(&tar.Header{ | ||
Typeflag: tf, | ||
Name: e.Name, | ||
Size: e.Size, | ||
Mode: e.Mode, | ||
}) | ||
require.NoError(t, err) | ||
data := make([]byte, e.Size) | ||
_, err = tarWriter.Write(data) | ||
require.NoError(t, err) | ||
} | ||
err := tarWriter.Close() | ||
require.NoError(t, err) | ||
expectedTarSize := int64(tarball.Len()) | ||
|
||
var tarSplit bytes.Buffer | ||
tsReader, err := asm.NewInputTarStream(&tarball, storage.NewJSONPacker(&tarSplit), storage.NewDiscardFilePutter()) | ||
require.NoError(t, err) | ||
_, err = io.Copy(io.Discard, tsReader) | ||
require.NoError(t, err) | ||
|
||
res, err := tarSizeFromTarSplit(tarSplit.Bytes()) | ||
require.NoError(t, err) | ||
assert.Equal(t, expectedTarSize, res) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -89,7 +89,8 @@ type chunkedDiffer struct { | |
// is no TOC referenced by the manifest. | ||
blobDigest digest.Digest | ||
|
||
blobSize int64 | ||
blobSize int64 | ||
uncompressedTarSize int64 // -1 if unknown | ||
|
||
pullOptions map[string]string | ||
|
||
|
@@ -216,6 +217,7 @@ func makeConvertFromRawDiffer(store storage.Store, blobDigest digest.Digest, blo | |
fsVerityDigests: make(map[string]string), | ||
blobDigest: blobDigest, | ||
blobSize: blobSize, | ||
uncompressedTarSize: -1, // Will be computed later | ||
convertToZstdChunked: true, | ||
copyBuffer: makeCopyBuffer(), | ||
layersCache: layersCache, | ||
|
@@ -229,24 +231,33 @@ func makeZstdChunkedDiffer(store storage.Store, blobSize int64, tocDigest digest | |
if err != nil { | ||
return nil, fmt.Errorf("read zstd:chunked manifest: %w", err) | ||
} | ||
var uncompressedTarSize int64 = -1 | ||
if tarSplit != nil { | ||
uncompressedTarSize, err = tarSizeFromTarSplit(tarSplit) | ||
if err != nil { | ||
return nil, fmt.Errorf("computing size from tar-split") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks! Fixed in #2136 . |
||
} | ||
} | ||
|
||
layersCache, err := getLayersCache(store) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
return &chunkedDiffer{ | ||
fsVerityDigests: make(map[string]string), | ||
blobSize: blobSize, | ||
tocDigest: tocDigest, | ||
copyBuffer: makeCopyBuffer(), | ||
fileType: fileTypeZstdChunked, | ||
layersCache: layersCache, | ||
manifest: manifest, | ||
toc: toc, | ||
pullOptions: pullOptions, | ||
stream: iss, | ||
tarSplit: tarSplit, | ||
tocOffset: tocOffset, | ||
fsVerityDigests: make(map[string]string), | ||
blobSize: blobSize, | ||
uncompressedTarSize: uncompressedTarSize, | ||
tocDigest: tocDigest, | ||
copyBuffer: makeCopyBuffer(), | ||
fileType: fileTypeZstdChunked, | ||
layersCache: layersCache, | ||
manifest: manifest, | ||
toc: toc, | ||
pullOptions: pullOptions, | ||
stream: iss, | ||
tarSplit: tarSplit, | ||
tocOffset: tocOffset, | ||
}, nil | ||
} | ||
|
||
|
@@ -261,16 +272,17 @@ func makeEstargzChunkedDiffer(store storage.Store, blobSize int64, tocDigest dig | |
} | ||
|
||
return &chunkedDiffer{ | ||
fsVerityDigests: make(map[string]string), | ||
blobSize: blobSize, | ||
tocDigest: tocDigest, | ||
copyBuffer: makeCopyBuffer(), | ||
fileType: fileTypeEstargz, | ||
layersCache: layersCache, | ||
manifest: manifest, | ||
pullOptions: pullOptions, | ||
stream: iss, | ||
tocOffset: tocOffset, | ||
fsVerityDigests: make(map[string]string), | ||
blobSize: blobSize, | ||
uncompressedTarSize: -1, // We would have to read and decompress the whole layer | ||
tocDigest: tocDigest, | ||
copyBuffer: makeCopyBuffer(), | ||
fileType: fileTypeEstargz, | ||
layersCache: layersCache, | ||
manifest: manifest, | ||
pullOptions: pullOptions, | ||
stream: iss, | ||
tocOffset: tocOffset, | ||
}, nil | ||
} | ||
|
||
|
@@ -1153,7 +1165,6 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff | |
|
||
var compressedDigest digest.Digest | ||
var uncompressedDigest digest.Digest | ||
var convertedBlobSize int64 | ||
|
||
if c.convertToZstdChunked { | ||
fd, err := unix.Open(dest, unix.O_TMPFILE|unix.O_RDWR|unix.O_CLOEXEC, 0o600) | ||
|
@@ -1185,7 +1196,7 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff | |
if err != nil { | ||
return graphdriver.DriverWithDifferOutput{}, err | ||
} | ||
convertedBlobSize = tarSize | ||
c.uncompressedTarSize = tarSize | ||
// fileSource is a O_TMPFILE file descriptor, so we | ||
// need to keep it open until the entire file is processed. | ||
defer fileSource.Close() | ||
|
@@ -1255,6 +1266,7 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff | |
TOCDigest: c.tocDigest, | ||
UncompressedDigest: uncompressedDigest, | ||
CompressedDigest: compressedDigest, | ||
Size: c.uncompressedTarSize, | ||
} | ||
|
||
// When the hard links deduplication is used, file attributes are ignored because setting them | ||
|
@@ -1268,19 +1280,12 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff | |
|
||
var missingParts []missingPart | ||
|
||
mergedEntries, totalSizeFromTOC, err := c.mergeTocEntries(c.fileType, toc.Entries) | ||
mergedEntries, err := c.mergeTocEntries(c.fileType, toc.Entries) | ||
if err != nil { | ||
return output, err | ||
} | ||
|
||
output.UIDs, output.GIDs = collectIDs(mergedEntries) | ||
if convertedBlobSize > 0 { | ||
// if the image was converted, store the original tar size, so that | ||
// it can be recreated correctly. | ||
output.Size = convertedBlobSize | ||
} else { | ||
output.Size = totalSizeFromTOC | ||
} | ||
|
||
if err := maybeDoIDRemap(mergedEntries, options); err != nil { | ||
return output, err | ||
|
@@ -1597,9 +1602,7 @@ func mustSkipFile(fileType compressedFileType, e internal.FileMetadata) bool { | |
return false | ||
} | ||
|
||
func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []internal.FileMetadata) ([]fileMetadata, int64, error) { | ||
var totalFilesSize int64 | ||
|
||
func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []internal.FileMetadata) ([]fileMetadata, error) { | ||
countNextChunks := func(start int) int { | ||
count := 0 | ||
for _, e := range entries[start:] { | ||
|
@@ -1629,10 +1632,8 @@ func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []i | |
continue | ||
} | ||
|
||
totalFilesSize += e.Size | ||
|
||
if e.Type == TypeChunk { | ||
return nil, -1, fmt.Errorf("chunk type without a regular file") | ||
return nil, fmt.Errorf("chunk type without a regular file") | ||
} | ||
|
||
if e.Type == TypeReg { | ||
|
@@ -1668,7 +1669,7 @@ func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []i | |
lastChunkOffset = mergedEntries[i].chunks[j].Offset | ||
} | ||
} | ||
return mergedEntries, totalFilesSize, nil | ||
return mergedEntries, nil | ||
} | ||
|
||
// validateChunkChecksum checks if the file at $root/$path[offset:chunk.ChunkSize] has the | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You need indentation for list items (same as for code blocks, i.e. an extra space).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah,
gofmt
didn’t want to help with this (and struct field comments are formatted in HTML just as an uninterpreted code block), so I punted.You’re right, this is the right thing to do. Fixed in #2136 .