25 MB/s read limit |
---|
- -| Compressor Name | Ratio | Compressed Size | Compression Time | -|:----------------|------:|----------------:|-----------------:| -| zstd -3 | 2.108 | 20.718 GB | 29m 48.530s | -| zstd-adaptive | 2.230 | 19.581 GB | 29m 48.798s | - - |
5 MB/s write limit |
---|
- -| Compressor Name | Ratio | Compressed Size | Compression Time | -|:----------------|------:|----------------:|-----------------:| -| zstd -3 | 2.108 | 20.718 GB | 1h 10m 43.076s | -| zstd-adaptive | 2.249 | 19.412 GB | 1h 06m 15.577s | - - |
When decompressing many times, * ZSTD_c_forceAttachDict * ZSTD_c_literalCompressionMode * ZSTD_c_targetCBlockSize + * ZSTD_c_srcSizeHint * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * note : never ever use experimentalParam? names directly; * also, the enums values themselves are unstable and can still change. @@ -334,6 +335,7 @@Decompression context
When decompressing many times, ZSTD_c_experimentalParam4=1001, ZSTD_c_experimentalParam5=1002, ZSTD_c_experimentalParam6=1003, + ZSTD_c_experimentalParam7=1004, } ZSTD_cParameter;
typedef struct { @@ -1005,14 +1007,23 @@Streaming decompression functions
size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); size_t ZSTD_estimateDCtxSize(void); -These functions make it possible to estimate memory usage - of a future {D,C}Ctx, before its creation. - ZSTD_estimateCCtxSize() will provide a budget large enough for any compression level up to selected one. - It will also consider src size to be arbitrarily "large", which is worst case. - If srcSize is known to always be small, ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation. - ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. - ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. - Note : CCtx size estimation is only correct for single-threaded compression. +
These functions make it possible to estimate memory usage of a future + {D,C}Ctx, before its creation. + + ZSTD_estimateCCtxSize() will provide a budget large enough for any + compression level up to selected one. Unlike ZSTD_estimateCStreamSize*(), + this estimate does not include space for a window buffer, so this estimate + is guaranteed to be enough for single-shot compressions, but not streaming + compressions. It will however assume the input may be arbitrarily large, + which is the worst case. If srcSize is known to always be small, + ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation. + ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with + ZSTD_getCParams() to create cParams from compressionLevel. + ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with + ZSTD_CCtxParams_setParameter(). + + Note: only single-threaded compression is supported. This function will + return an error code if ZSTD_c_nbWorkers is >= 1.
size_t ZSTD_estimateCStreamSize(int compressionLevel); @@ -1318,7 +1329,10 @@Advanced Streaming compression functions
/**! ZST /**! ZSTD_initCStream_advanced() : * This function is deprecated, and is approximately equivalent to: * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * ZSTD_CCtx_setZstdParams(zcs, params); // Set the zstd params and leave the rest as-is + * // Pseudocode: Set each zstd parameter and leave the rest as-is. + * for ((param, value) : params) { + * ZSTD_CCtx_setParameter(zcs, param, value); + * } * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); * @@ -1338,7 +1352,10 @@Advanced Streaming compression functions
/**! ZST /**! ZSTD_initCStream_usingCDict_advanced() : * This function is deprecated, and is approximately equivalent to: * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * ZSTD_CCtx_setZstdFrameParams(zcs, fParams); // Set the zstd frame params and leave the rest as-is + * // Pseudocode: Set each zstd frame parameter and leave the rest as-is. + * for ((fParam, value) : fParams) { + * ZSTD_CCtx_setParameter(zcs, fParam, value); + * } * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); * ZSTD_CCtx_refCDict(zcs, cdict); * diff --git a/examples/streaming_compression.c b/examples/streaming_compression.c index d1353a684a..d0b04895f0 100644 --- a/examples/streaming_compression.c +++ b/examples/streaming_compression.c @@ -44,8 +44,8 @@ static void compressFile_orDie(const char* fname, const char* outName, int cLeve * and writes all output produced to the output file. */ size_t const toRead = buffInSize; - size_t read; - while ((read = fread_orDie(buffIn, toRead, fin))) { + for (;;) { + size_t read = fread_orDie(buffIn, toRead, fin); /* Select the flush mode. * If the read may not be finished (read == toRead) we use * ZSTD_e_continue. If this is the last chunk, we use ZSTD_e_end. @@ -76,6 +76,10 @@ static void compressFile_orDie(const char* fname, const char* outName, int cLeve } while (!finished); CHECK(input.pos == input.size, "Impossible: zstd only returns 0 when the input is completely consumed!"); + + if (lastChunk) { + break; + } } ZSTD_freeCCtx(cctx); diff --git a/examples/streaming_decompression.c b/examples/streaming_decompression.c index bcd861b756..d26b45b34c 100644 --- a/examples/streaming_decompression.c +++ b/examples/streaming_decompression.c @@ -34,7 +34,10 @@ static void decompressFile_orDie(const char* fname) */ size_t const toRead = buffInSize; size_t read; + size_t lastRet = 0; + int isEmpty = 1; while ( (read = fread_orDie(buffIn, toRead, fin)) ) { + isEmpty = 0; ZSTD_inBuffer input = { buffIn, read, 0 }; /* Given a valid frame, zstd won't consume the last byte of the frame * until it has flushed all of the decompressed data of the frame. @@ -53,9 +56,24 @@ static void decompressFile_orDie(const char* fname) size_t const ret = ZSTD_decompressStream(dctx, &output , &input); CHECK_ZSTD(ret); fwrite_orDie(buffOut, output.pos, fout); + lastRet = ret; } } + if (isEmpty) { + fprintf(stderr, "input is empty\n"); + exit(1); + } + + if (lastRet != 0) { + /* The last return value from ZSTD_decompressStream did not end on a + * frame, but we reached the end of the file! We assume this is an + * error, and the input was truncated. + */ + fprintf(stderr, "EOF before end of stream: %zu\n", lastRet); + exit(1); + } + ZSTD_freeDCtx(dctx); fclose_orDie(fin); fclose_orDie(fout); diff --git a/lib/common/bitstream.h b/lib/common/bitstream.h index 7bdb060460..1c294b80d1 100644 --- a/lib/common/bitstream.h +++ b/lib/common/bitstream.h @@ -164,7 +164,7 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val) _BitScanReverse ( &r, val ); return (unsigned) r; # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ - return 31 - __builtin_clz (val); + return __builtin_clz (val) ^ 31; # elif defined(__ICCARM__) /* IAR Intrinsic */ return 31 - __CLZ(val); # else /* Software version */ @@ -244,9 +244,9 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC) { size_t const nbBytes = bitC->bitPos >> 3; assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); + assert(bitC->ptr <= bitC->endPtr); MEM_writeLEST(bitC->ptr, bitC->bitContainer); bitC->ptr += nbBytes; - assert(bitC->ptr <= bitC->endPtr); bitC->bitPos &= 7; bitC->bitContainer >>= nbBytes*8; } @@ -260,6 +260,7 @@ MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC) { size_t const nbBytes = bitC->bitPos >> 3; assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); + assert(bitC->ptr <= bitC->endPtr); MEM_writeLEST(bitC->ptr, bitC->bitContainer); bitC->ptr += nbBytes; if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; diff --git a/lib/common/compiler.h b/lib/common/compiler.h index 6686b837d6..1877a0c1d9 100644 --- a/lib/common/compiler.h +++ b/lib/common/compiler.h @@ -61,6 +61,13 @@ # define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR #endif +/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */ +#if defined(__GNUC__) +# define UNUSED_ATTR __attribute__((unused)) +#else +# define UNUSED_ATTR +#endif + /* force no inlining */ #ifdef _MSC_VER # define FORCE_NOINLINE static __declspec(noinline) @@ -127,9 +134,14 @@ } \ } -/* vectorization */ +/* vectorization + * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */ #if !defined(__clang__) && defined(__GNUC__) -# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) +# if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5) +# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) +# else +# define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")") +# endif #else # define DONT_VECTORIZE #endif diff --git a/lib/common/fse.h b/lib/common/fse.h index 811c670bdd..a7553e3721 100644 --- a/lib/common/fse.h +++ b/lib/common/fse.h @@ -308,7 +308,7 @@ If there is an error, the function will return an error code, which can be teste *******************************************/ /* FSE buffer bounds */ #define FSE_NCOUNTBOUND 512 -#define FSE_BLOCKBOUND(size) (size + (size>>7)) +#define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */) #define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ /* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */ diff --git a/lib/common/fse_decompress.c b/lib/common/fse_decompress.c index 72bbead5be..4f07378982 100644 --- a/lib/common/fse_decompress.c +++ b/lib/common/fse_decompress.c @@ -52,7 +52,9 @@ #define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ /* check and forward error code */ +#ifndef CHECK_F #define CHECK_F(f) { size_t const e = f; if (FSE_isError(e)) return e; } +#endif /* ************************************************************** diff --git a/lib/common/mem.h b/lib/common/mem.h index c10d7f61e1..2b115ddb6a 100644 --- a/lib/common/mem.h +++ b/lib/common/mem.h @@ -47,6 +47,39 @@ extern "C" { #define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; } MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); } +/* detects whether we are being compiled under msan */ +#if defined (__has_feature) +# if __has_feature(memory_sanitizer) +# define MEMORY_SANITIZER 1 +# endif +#endif + +#if defined (MEMORY_SANITIZER) +/* Not all platforms that support msan provide sanitizers/msan_interface.h. + * We therefore declare the functions we need ourselves, rather than trying to + * include the header file... */ + +#include/* intptr_t */ + +/* Make memory region fully initialized (without changing its contents). */ +void __msan_unpoison(const volatile void *a, size_t size); + +/* Make memory region fully uninitialized (without changing its contents). + This is a legacy interface that does not update origin information. Use + __msan_allocated_memory() instead. */ +void __msan_poison(const volatile void *a, size_t size); + +/* Returns the offset of the first (at least partially) poisoned byte in the + memory range, or -1 if the whole range is good. */ +intptr_t __msan_test_shadow(const volatile void *x, size_t size); +#endif + +#if defined (MEMORY_SANITIZER) +# define MEM_SKIP_MSAN __attribute__((no_sanitize("memory"))) +#else +# define MEM_SKIP_MSAN +#endif + /*-************************************************************** * Basic Types diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h index 585fd6b19e..f791c5b387 100644 --- a/lib/common/zstd_internal.h +++ b/lib/common/zstd_internal.h @@ -197,8 +197,8 @@ static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); } static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); } #define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; } -#define WILDCOPY_OVERLENGTH 8 -#define VECLEN 16 +#define WILDCOPY_OVERLENGTH 32 +#define WILDCOPY_VECLEN 16 typedef enum { ZSTD_no_overlap, @@ -207,67 +207,58 @@ typedef enum { } ZSTD_overlap_e; /*! ZSTD_wildcopy() : - * custom version of memcpy(), can overwrite up to WILDCOPY_OVERLENGTH bytes (if length==0) */ + * Custom version of memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0) + * @param ovtype controls the overlap detection + * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart. + * - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart. + * The src buffer must be before the dst buffer. + */ MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE -void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype) +void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype) { ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src; const BYTE* ip = (const BYTE*)src; BYTE* op = (BYTE*)dst; BYTE* const oend = op + length; - assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8)); - if (length < VECLEN || (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN)) { - do - COPY8(op, ip) - while (op < oend); - } - else { - if ((length & 8) == 0) - COPY8(op, ip); - do { + assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN)); + + if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) { + /* Handle short offset copies. */ + do { + COPY8(op, ip) + } while (op < oend); + } else { + assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN); + /* Separate out the first two COPY16() calls because the copy length is + * almost certain to be short, so the branches have different + * probabilities. + * On gcc-9 unrolling once is +1.6%, twice is +2%, thrice is +1.8%. + * On clang-8 unrolling once is +1.4%, twice is +3.3%, thrice is +3%. + */ COPY16(op, ip); - } - while (op < oend); - } -} - -/*! ZSTD_wildcopy_16min() : - * same semantics as ZSTD_wilcopy() except guaranteed to be able to copy 16 bytes at the start */ -MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE -void ZSTD_wildcopy_16min(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype) -{ - ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src; - const BYTE* ip = (const BYTE*)src; - BYTE* op = (BYTE*)dst; - BYTE* const oend = op + length; - - assert(length >= 8); - assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8)); - - if (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN) { - do - COPY8(op, ip) - while (op < oend); - } - else { - if ((length & 8) == 0) - COPY8(op, ip); - do { COPY16(op, ip); - } - while (op < oend); + if (op >= oend) return; + do { + COPY16(op, ip); + COPY16(op, ip); + } + while (op < oend); } } -MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd) /* should be faster for decoding, but strangely, not verified on all platform */ +/*! ZSTD_wildcopy8() : + * The same as ZSTD_wildcopy(), but it can only overwrite 8 bytes, and works for + * overlapping buffers that are at least 8 bytes apart. + */ +MEM_STATIC void ZSTD_wildcopy8(void* dst, const void* src, ptrdiff_t length) { const BYTE* ip = (const BYTE*)src; BYTE* op = (BYTE*)dst; - BYTE* const oend = (BYTE*)dstEnd; - do - COPY8(op, ip) - while (op < oend); + BYTE* const oend = (BYTE*)op + length; + do { + COPY8(op, ip); + } while (op < oend); } @@ -323,7 +314,7 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus _BitScanReverse(&r, val); return (unsigned)r; # elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ - return 31 - __builtin_clz(val); + return __builtin_clz (val) ^ 31; # elif defined(__ICCARM__) /* IAR Intrinsic */ return 31 - __CLZ(val); # else /* Software version */ diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 62cab4ed52..2f0736b241 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -42,11 +42,10 @@ size_t ZSTD_compressBound(size_t srcSize) { * Context memory management ***************************************/ struct ZSTD_CDict_s { - void* dictBuffer; const void* dictContent; size_t dictContentSize; - void* workspace; - size_t workspaceSize; + U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */ + ZSTD_cwksp workspace; ZSTD_matchState_t matchState; ZSTD_compressedBlockState_t cBlockState; ZSTD_customMem customMem; @@ -84,23 +83,26 @@ ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem) ZSTD_CCtx* ZSTD_initStaticCCtx(void *workspace, size_t workspaceSize) { - ZSTD_CCtx* const cctx = (ZSTD_CCtx*) workspace; + ZSTD_cwksp ws; + ZSTD_CCtx* cctx; if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */ if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */ - memset(workspace, 0, workspaceSize); /* may be a bit generous, could memset be smaller ? */ + ZSTD_cwksp_init(&ws, workspace, workspaceSize); + + cctx = (ZSTD_CCtx*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CCtx)); + if (cctx == NULL) { + return NULL; + } + memset(cctx, 0, sizeof(ZSTD_CCtx)); + ZSTD_cwksp_move(&cctx->workspace, &ws); cctx->staticSize = workspaceSize; - cctx->workSpace = (void*)(cctx+1); - cctx->workSpaceSize = workspaceSize - sizeof(ZSTD_CCtx); /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */ - if (cctx->workSpaceSize < HUF_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t)) return NULL; - assert(((size_t)cctx->workSpace & (sizeof(void*)-1)) == 0); /* ensure correct alignment */ - cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)cctx->workSpace; - cctx->blockState.nextCBlock = cctx->blockState.prevCBlock + 1; - { - void* const ptr = cctx->blockState.nextCBlock + 1; - cctx->entropyWorkspace = (U32*)ptr; - } + if (!ZSTD_cwksp_check_available(&cctx->workspace, HUF_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL; + cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); + cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); + cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object( + &cctx->workspace, HUF_WORKSPACE_SIZE); cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); return cctx; } @@ -128,7 +130,11 @@ static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx) { assert(cctx != NULL); assert(cctx->staticSize == 0); - ZSTD_free(cctx->workSpace, cctx->customMem); cctx->workSpace = NULL; + /* Only free workspace if cctx not in workspace, otherwise the workspace + * will be freed when the cctx itself is freed. */ + if ((void*)cctx->workspace.workspace != (void*)cctx) { + ZSTD_cwksp_free(&cctx->workspace, cctx->customMem); + } ZSTD_clearAllDicts(cctx); #ifdef ZSTD_MULTITHREAD ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL; @@ -160,7 +166,9 @@ static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx) size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx) { if (cctx==NULL) return 0; /* support sizeof on NULL */ - return sizeof(*cctx) + cctx->workSpaceSize + /* cctx may be in the workspace */ + return (cctx->workspace.workspace == cctx ? 0 : sizeof(*cctx)) + + ZSTD_cwksp_sizeof(&cctx->workspace) + ZSTD_sizeof_localDict(cctx->localDict) + ZSTD_sizeof_mtctx(cctx); } @@ -239,9 +247,9 @@ size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_paramete /* ZSTD_assignParamsToCCtxParams() : * params is presumed valid at this stage */ static ZSTD_CCtx_params ZSTD_assignParamsToCCtxParams( - ZSTD_CCtx_params cctxParams, ZSTD_parameters params) + const ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) { - ZSTD_CCtx_params ret = cctxParams; + ZSTD_CCtx_params ret = *cctxParams; ret.cParams = params.cParams; ret.fParams = params.fParams; ret.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ @@ -392,6 +400,11 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX; return bounds; + case ZSTD_c_srcSizeHint: + bounds.lowerBound = ZSTD_SRCSIZEHINT_MIN; + bounds.upperBound = ZSTD_SRCSIZEHINT_MAX; + return bounds; + default: { ZSTD_bounds const boundError = { ERROR(parameter_unsupported), 0, 0 }; return boundError; @@ -448,6 +461,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) case ZSTD_c_forceAttachDict: case ZSTD_c_literalCompressionMode: case ZSTD_c_targetCBlockSize: + case ZSTD_c_srcSizeHint: default: return 0; } @@ -494,6 +508,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) case ZSTD_c_ldmMinMatch: case ZSTD_c_ldmBucketSizeLog: case ZSTD_c_targetCBlockSize: + case ZSTD_c_srcSizeHint: break; default: RETURN_ERROR(parameter_unsupported); @@ -674,6 +689,12 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, CCtxParams->targetCBlockSize = value; return CCtxParams->targetCBlockSize; + case ZSTD_c_srcSizeHint : + if (value!=0) /* 0 ==> default */ + BOUNDCHECK(ZSTD_c_srcSizeHint, value); + CCtxParams->srcSizeHint = value; + return CCtxParams->srcSizeHint; + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } } @@ -779,6 +800,9 @@ size_t ZSTD_CCtxParams_getParameter( case ZSTD_c_targetCBlockSize : *value = (int)CCtxParams->targetCBlockSize; break; + case ZSTD_c_srcSizeHint : + *value = (int)CCtxParams->srcSizeHint; + break; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } return 0; @@ -1029,7 +1053,11 @@ ZSTD_adjustCParams(ZSTD_compressionParameters cPar, ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize) { - ZSTD_compressionParameters cParams = ZSTD_getCParams(CCtxParams->compressionLevel, srcSizeHint, dictSize); + ZSTD_compressionParameters cParams; + if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) { + srcSizeHint = CCtxParams->srcSizeHint; + } + cParams = ZSTD_getCParams(CCtxParams->compressionLevel, srcSizeHint, dictSize); if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; if (CCtxParams->cParams.windowLog) cParams.windowLog = CCtxParams->cParams.windowLog; if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog; @@ -1081,7 +1109,7 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) matchStateSize + ldmSpace + ldmSeqSpace; DEBUGLOG(5, "sizeof(ZSTD_CCtx) : %u", (U32)sizeof(ZSTD_CCtx)); - DEBUGLOG(5, "estimate workSpace : %u", (U32)neededSpace); + DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace); return sizeof(ZSTD_CCtx) + neededSpace; } } @@ -1186,17 +1214,6 @@ size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx) return 0; /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */ } - - -static U32 ZSTD_equivalentCParams(ZSTD_compressionParameters cParams1, - ZSTD_compressionParameters cParams2) -{ - return (cParams1.hashLog == cParams2.hashLog) - & (cParams1.chainLog == cParams2.chainLog) - & (cParams1.strategy == cParams2.strategy) /* opt parser space */ - & ((cParams1.minMatch==3) == (cParams2.minMatch==3)); /* hashlog3 space */ -} - static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1, ZSTD_compressionParameters cParams2) { @@ -1211,71 +1228,6 @@ static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1, assert(cParams1.strategy == cParams2.strategy); } -/** The parameters are equivalent if ldm is not enabled in both sets or - * all the parameters are equivalent. */ -static U32 ZSTD_equivalentLdmParams(ldmParams_t ldmParams1, - ldmParams_t ldmParams2) -{ - return (!ldmParams1.enableLdm && !ldmParams2.enableLdm) || - (ldmParams1.enableLdm == ldmParams2.enableLdm && - ldmParams1.hashLog == ldmParams2.hashLog && - ldmParams1.bucketSizeLog == ldmParams2.bucketSizeLog && - ldmParams1.minMatchLength == ldmParams2.minMatchLength && - ldmParams1.hashRateLog == ldmParams2.hashRateLog); -} - -typedef enum { ZSTDb_not_buffered, ZSTDb_buffered } ZSTD_buffered_policy_e; - -/* ZSTD_sufficientBuff() : - * check internal buffers exist for streaming if buffPol == ZSTDb_buffered . - * Note : they are assumed to be correctly sized if ZSTD_equivalentCParams()==1 */ -static U32 ZSTD_sufficientBuff(size_t bufferSize1, size_t maxNbSeq1, - size_t maxNbLit1, - ZSTD_buffered_policy_e buffPol2, - ZSTD_compressionParameters cParams2, - U64 pledgedSrcSize) -{ - size_t const windowSize2 = MAX(1, (size_t)MIN(((U64)1 << cParams2.windowLog), pledgedSrcSize)); - size_t const blockSize2 = MIN(ZSTD_BLOCKSIZE_MAX, windowSize2); - size_t const maxNbSeq2 = blockSize2 / ((cParams2.minMatch == 3) ? 3 : 4); - size_t const maxNbLit2 = blockSize2; - size_t const neededBufferSize2 = (buffPol2==ZSTDb_buffered) ? windowSize2 + blockSize2 : 0; - DEBUGLOG(4, "ZSTD_sufficientBuff: is neededBufferSize2=%u <= bufferSize1=%u", - (U32)neededBufferSize2, (U32)bufferSize1); - DEBUGLOG(4, "ZSTD_sufficientBuff: is maxNbSeq2=%u <= maxNbSeq1=%u", - (U32)maxNbSeq2, (U32)maxNbSeq1); - DEBUGLOG(4, "ZSTD_sufficientBuff: is maxNbLit2=%u <= maxNbLit1=%u", - (U32)maxNbLit2, (U32)maxNbLit1); - return (maxNbLit2 <= maxNbLit1) - & (maxNbSeq2 <= maxNbSeq1) - & (neededBufferSize2 <= bufferSize1); -} - -/** Equivalence for resetCCtx purposes */ -static U32 ZSTD_equivalentParams(ZSTD_CCtx_params params1, - ZSTD_CCtx_params params2, - size_t buffSize1, - size_t maxNbSeq1, size_t maxNbLit1, - ZSTD_buffered_policy_e buffPol2, - U64 pledgedSrcSize) -{ - DEBUGLOG(4, "ZSTD_equivalentParams: pledgedSrcSize=%u", (U32)pledgedSrcSize); - if (!ZSTD_equivalentCParams(params1.cParams, params2.cParams)) { - DEBUGLOG(4, "ZSTD_equivalentCParams() == 0"); - return 0; - } - if (!ZSTD_equivalentLdmParams(params1.ldmParams, params2.ldmParams)) { - DEBUGLOG(4, "ZSTD_equivalentLdmParams() == 0"); - return 0; - } - if (!ZSTD_sufficientBuff(buffSize1, maxNbSeq1, maxNbLit1, buffPol2, - params2.cParams, pledgedSrcSize)) { - DEBUGLOG(4, "ZSTD_sufficientBuff() == 0"); - return 0; - } - return 1; -} - static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs) { int i; @@ -1301,88 +1253,104 @@ static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms) ms->dictMatchState = NULL; } -/*! ZSTD_continueCCtx() : - * reuse CCtx without reset (note : requires no dictionary) */ -static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pledgedSrcSize) -{ - size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize)); - size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); - DEBUGLOG(4, "ZSTD_continueCCtx: re-use context in place"); +/** + * Indicates whether this compression proceeds directly from user-provided + * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or + * whether the context needs to buffer the input/output (ZSTDb_buffered). + */ +typedef enum { + ZSTDb_not_buffered, + ZSTDb_buffered +} ZSTD_buffered_policy_e; - cctx->blockSize = blockSize; /* previous block size could be different even for same windowLog, due to pledgedSrcSize */ - cctx->appliedParams = params; - cctx->blockState.matchState.cParams = params.cParams; - cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; - cctx->consumedSrcSize = 0; - cctx->isFirstBlock = 1; - cctx->producedCSize = 0; - if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN) - cctx->appliedParams.fParams.contentSizeFlag = 0; - DEBUGLOG(4, "pledged content size : %u ; flag : %u", - (U32)pledgedSrcSize, cctx->appliedParams.fParams.contentSizeFlag); - cctx->stage = ZSTDcs_init; - cctx->dictID = 0; - if (params.ldmParams.enableLdm) - ZSTD_window_clear(&cctx->ldmState.window); - ZSTD_referenceExternalSequences(cctx, NULL, 0); - ZSTD_invalidateMatchState(&cctx->blockState.matchState); - ZSTD_reset_compressedBlockState(cctx->blockState.prevCBlock); - XXH64_reset(&cctx->xxhState, 0); - return 0; -} +/** + * Controls, for this matchState reset, whether the tables need to be cleared / + * prepared for the coming compression (ZSTDcrp_makeClean), or whether the + * tables can be left unclean (ZSTDcrp_leaveDirty), because we know that a + * subsequent operation will overwrite the table space anyways (e.g., copying + * the matchState contents in from a CDict). + */ +typedef enum { + ZSTDcrp_makeClean, + ZSTDcrp_leaveDirty +} ZSTD_compResetPolicy_e; -typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e; +/** + * Controls, for this matchState reset, whether indexing can continue where it + * left off (ZSTDirp_continue), or whether it needs to be restarted from zero + * (ZSTDirp_reset). + */ +typedef enum { + ZSTDirp_continue, + ZSTDirp_reset +} ZSTD_indexResetPolicy_e; -typedef enum { ZSTD_resetTarget_CDict, ZSTD_resetTarget_CCtx } ZSTD_resetTarget_e; +typedef enum { + ZSTD_resetTarget_CDict, + ZSTD_resetTarget_CCtx +} ZSTD_resetTarget_e; -static void* +static size_t ZSTD_reset_matchState(ZSTD_matchState_t* ms, - void* ptr, + ZSTD_cwksp* ws, const ZSTD_compressionParameters* cParams, - ZSTD_compResetPolicy_e const crp, ZSTD_resetTarget_e const forWho) + const ZSTD_compResetPolicy_e crp, + const ZSTD_indexResetPolicy_e forceResetIndex, + const ZSTD_resetTarget_e forWho) { size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); size_t const hSize = ((size_t)1) << cParams->hashLog; U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; - size_t const h3Size = ((size_t)1) << hashLog3; - size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); + size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; - assert(((size_t)ptr & 3) == 0); + DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset); + if (forceResetIndex == ZSTDirp_reset) { + memset(&ms->window, 0, sizeof(ms->window)); + ms->window.dictLimit = 1; /* start from 1, so that 1st position is valid */ + ms->window.lowLimit = 1; /* it ensures first and later CCtx usages compress the same */ + ms->window.nextSrc = ms->window.base + 1; /* see issue #1241 */ + ZSTD_cwksp_mark_tables_dirty(ws); + } ms->hashLog3 = hashLog3; - memset(&ms->window, 0, sizeof(ms->window)); - ms->window.dictLimit = 1; /* start from 1, so that 1st position is valid */ - ms->window.lowLimit = 1; /* it ensures first and later CCtx usages compress the same */ - ms->window.nextSrc = ms->window.base + 1; /* see issue #1241 */ + ZSTD_invalidateMatchState(ms); + assert(!ZSTD_cwksp_reserve_failed(ws)); /* check that allocation hasn't already failed */ + + ZSTD_cwksp_clear_tables(ws); + + DEBUGLOG(5, "reserving table space"); + /* table Space */ + ms->hashTable = (U32*)ZSTD_cwksp_reserve_table(ws, hSize * sizeof(U32)); + ms->chainTable = (U32*)ZSTD_cwksp_reserve_table(ws, chainSize * sizeof(U32)); + ms->hashTable3 = (U32*)ZSTD_cwksp_reserve_table(ws, h3Size * sizeof(U32)); + RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, + "failed a workspace allocation in ZSTD_reset_matchState"); + + DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_leaveDirty); + if (crp!=ZSTDcrp_leaveDirty) { + /* reset tables only */ + ZSTD_cwksp_clean_tables(ws); + } + /* opt parser space */ if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) { DEBUGLOG(4, "reserving optimal parser space"); - ms->opt.litFreq = (unsigned*)ptr; - ms->opt.litLengthFreq = ms->opt.litFreq + (1< opt.matchLengthFreq = ms->opt.litLengthFreq + (MaxLL+1); - ms->opt.offCodeFreq = ms->opt.matchLengthFreq + (MaxML+1); - ptr = ms->opt.offCodeFreq + (MaxOff+1); - ms->opt.matchTable = (ZSTD_match_t*)ptr; - ptr = ms->opt.matchTable + ZSTD_OPT_NUM+1; - ms->opt.priceTable = (ZSTD_optimal_t*)ptr; - ptr = ms->opt.priceTable + ZSTD_OPT_NUM+1; + ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1< opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned)); + ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned)); + ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned)); + ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t)); + ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t)); } - /* table Space */ - DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_noMemset); - assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ - if (crp!=ZSTDcrp_noMemset) memset(ptr, 0, tableSpace); /* reset tables only */ - ms->hashTable = (U32*)(ptr); - ms->chainTable = ms->hashTable + hSize; - ms->hashTable3 = ms->chainTable + chainSize; - ptr = ms->hashTable3 + h3Size; - ms->cParams = *cParams; - assert(((size_t)ptr & 3) == 0); - return ptr; + RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, + "failed a workspace allocation in ZSTD_reset_matchState"); + + return 0; } /* ZSTD_indexTooCloseToMax() : @@ -1398,13 +1366,6 @@ static int ZSTD_indexTooCloseToMax(ZSTD_window_t w) return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN); } -#define ZSTD_WORKSPACETOOLARGE_FACTOR 3 /* define "workspace is too large" as this number of times larger than needed */ -#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 /* when workspace is continuously too large - * during at least this number of times, - * context's memory usage is considered wasteful, - * because it's sized to handle a worst case scenario which rarely happens. - * In which case, resize it down to free some memory */ - /*! ZSTD_resetCCtx_internal() : note : `params` are assumed fully validated at this stage */ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, @@ -1413,31 +1374,12 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, ZSTD_compResetPolicy_e const crp, ZSTD_buffered_policy_e const zbuff) { + ZSTD_cwksp* const ws = &zc->workspace; DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u", (U32)pledgedSrcSize, params.cParams.windowLog); assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); zc->isFirstBlock = 1; - if (crp == ZSTDcrp_continue) { - if (ZSTD_equivalentParams(zc->appliedParams, params, - zc->inBuffSize, - zc->seqStore.maxNbSeq, zc->seqStore.maxNbLit, - zbuff, pledgedSrcSize) ) { - DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> consider continue mode"); - zc->workSpaceOversizedDuration += (zc->workSpaceOversizedDuration > 0); /* if it was too large, it still is */ - if (zc->workSpaceOversizedDuration <= ZSTD_WORKSPACETOOLARGE_MAXDURATION) { - DEBUGLOG(4, "continue mode confirmed (wLog1=%u, blockSize1=%zu)", - zc->appliedParams.cParams.windowLog, zc->blockSize); - if (ZSTD_indexTooCloseToMax(zc->blockState.matchState.window)) { - /* prefer a reset, faster than a rescale */ - ZSTD_reset_matchState(&zc->blockState.matchState, - zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32, - ¶ms.cParams, - crp, ZSTD_resetTarget_CCtx); - } - return ZSTD_continueCCtx(zc, params, pledgedSrcSize); - } } } - DEBUGLOG(4, "ZSTD_equivalentParams()==0 -> reset CCtx"); if (params.ldmParams.enableLdm) { /* Adjust long distance matching parameters */ @@ -1456,53 +1398,67 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, size_t const buffInSize = (zbuff==ZSTDb_buffered) ? windowSize + blockSize : 0; size_t const matchStateSize = ZSTD_sizeof_matchState(¶ms.cParams, /* forCCtx */ 1); size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize); - void* ptr; /* used to partition workSpace */ - /* Check if workSpace is large enough, alloc a new one if needed */ - { size_t const entropySpace = HUF_WORKSPACE_SIZE; + ZSTD_indexResetPolicy_e needsIndexReset = ZSTDirp_continue; + + if (ZSTD_indexTooCloseToMax(zc->blockState.matchState.window)) { + needsIndexReset = ZSTDirp_reset; + } + + ZSTD_cwksp_bump_oversized_duration(ws, 0); + + /* Check if workspace is large enough, alloc a new one if needed */ + { size_t const cctxSpace = zc->staticSize ? sizeof(ZSTD_CCtx) : 0; + size_t const entropySpace = HUF_WORKSPACE_SIZE; size_t const blockStateSpace = 2 * sizeof(ZSTD_compressedBlockState_t); size_t const bufferSpace = buffInSize + buffOutSize; size_t const ldmSpace = ZSTD_ldm_getTableSize(params.ldmParams); size_t const ldmSeqSpace = maxNbLdmSeq * sizeof(rawSeq); - size_t const neededSpace = entropySpace + blockStateSpace + ldmSpace + - ldmSeqSpace + matchStateSize + tokenSpace + - bufferSpace; + size_t const neededSpace = + cctxSpace + + entropySpace + + blockStateSpace + + ldmSpace + + ldmSeqSpace + + matchStateSize + + tokenSpace + + bufferSpace; - int const workSpaceTooSmall = zc->workSpaceSize < neededSpace; - int const workSpaceTooLarge = zc->workSpaceSize > ZSTD_WORKSPACETOOLARGE_FACTOR * neededSpace; - int const workSpaceWasteful = workSpaceTooLarge && (zc->workSpaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION); - zc->workSpaceOversizedDuration = workSpaceTooLarge ? zc->workSpaceOversizedDuration+1 : 0; + int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace; + int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace); DEBUGLOG(4, "Need %zuKB workspace, including %zuKB for match state, and %zuKB for buffers", neededSpace>>10, matchStateSize>>10, bufferSpace>>10); DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize); - if (workSpaceTooSmall || workSpaceWasteful) { - DEBUGLOG(4, "Resize workSpaceSize from %zuKB to %zuKB", - zc->workSpaceSize >> 10, + if (workspaceTooSmall || workspaceWasteful) { + DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB", + ZSTD_cwksp_sizeof(ws) >> 10, neededSpace >> 10); RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize"); - zc->workSpaceSize = 0; - ZSTD_free(zc->workSpace, zc->customMem); - zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem); - RETURN_ERROR_IF(zc->workSpace == NULL, memory_allocation); - zc->workSpaceSize = neededSpace; - zc->workSpaceOversizedDuration = 0; + needsIndexReset = ZSTDirp_reset; + ZSTD_cwksp_free(ws, zc->customMem); + FORWARD_IF_ERROR(ZSTD_cwksp_create(ws, neededSpace, zc->customMem)); + + DEBUGLOG(5, "reserving object space"); /* Statically sized space. * entropyWorkspace never moves, * though prev/next block swap places */ - assert(((size_t)zc->workSpace & 3) == 0); /* ensure correct alignment */ - assert(zc->workSpaceSize >= 2 * sizeof(ZSTD_compressedBlockState_t)); - zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)zc->workSpace; - zc->blockState.nextCBlock = zc->blockState.prevCBlock + 1; - ptr = zc->blockState.nextCBlock + 1; - zc->entropyWorkspace = (U32*)ptr; + assert(ZSTD_cwksp_check_available(ws, 2 * sizeof(ZSTD_compressedBlockState_t))); + zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); + RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock"); + zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); + RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock"); + zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, HUF_WORKSPACE_SIZE); + RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate entropyWorkspace"); } } + ZSTD_cwksp_clear(ws); + /* init params */ zc->appliedParams = params; zc->blockState.matchState.cParams = params.cParams; @@ -1521,58 +1477,58 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock); - ptr = ZSTD_reset_matchState(&zc->blockState.matchState, - zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32, - ¶ms.cParams, - crp, ZSTD_resetTarget_CCtx); - - /* ldm hash table */ - /* initialize bucketOffsets table later for pointer alignment */ - if (params.ldmParams.enableLdm) { - size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog; - memset(ptr, 0, ldmHSize * sizeof(ldmEntry_t)); - assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ - zc->ldmState.hashTable = (ldmEntry_t*)ptr; - ptr = zc->ldmState.hashTable + ldmHSize; - zc->ldmSequences = (rawSeq*)ptr; - ptr = zc->ldmSequences + maxNbLdmSeq; - zc->maxNbLdmSequences = maxNbLdmSeq; - - memset(&zc->ldmState.window, 0, sizeof(zc->ldmState.window)); - } - assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ - - /* sequences storage */ - zc->seqStore.maxNbSeq = maxNbSeq; - zc->seqStore.sequencesStart = (seqDef*)ptr; - ptr = zc->seqStore.sequencesStart + maxNbSeq; - zc->seqStore.llCode = (BYTE*) ptr; - zc->seqStore.mlCode = zc->seqStore.llCode + maxNbSeq; - zc->seqStore.ofCode = zc->seqStore.mlCode + maxNbSeq; - zc->seqStore.litStart = zc->seqStore.ofCode + maxNbSeq; /* ZSTD_wildcopy() is used to copy into the literals buffer, * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes. */ + zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH); zc->seqStore.maxNbLit = blockSize; - ptr = zc->seqStore.litStart + blockSize + WILDCOPY_OVERLENGTH; + + /* buffers */ + zc->inBuffSize = buffInSize; + zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize); + zc->outBuffSize = buffOutSize; + zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize); /* ldm bucketOffsets table */ if (params.ldmParams.enableLdm) { + /* TODO: avoid memset? */ size_t const ldmBucketSize = ((size_t)1) << (params.ldmParams.hashLog - params.ldmParams.bucketSizeLog); - memset(ptr, 0, ldmBucketSize); - zc->ldmState.bucketOffsets = (BYTE*)ptr; - ptr = zc->ldmState.bucketOffsets + ldmBucketSize; - ZSTD_window_clear(&zc->ldmState.window); + zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, ldmBucketSize); + memset(zc->ldmState.bucketOffsets, 0, ldmBucketSize); } + + /* sequences storage */ ZSTD_referenceExternalSequences(zc, NULL, 0); + zc->seqStore.maxNbSeq = maxNbSeq; + zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); + zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); + zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); + zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef)); + + FORWARD_IF_ERROR(ZSTD_reset_matchState( + &zc->blockState.matchState, + ws, + ¶ms.cParams, + crp, + needsIndexReset, + ZSTD_resetTarget_CCtx)); - /* buffers */ - zc->inBuffSize = buffInSize; - zc->inBuff = (char*)ptr; - zc->outBuffSize = buffOutSize; - zc->outBuff = zc->inBuff + buffInSize; + /* ldm hash table */ + if (params.ldmParams.enableLdm) { + /* TODO: avoid memset? */ + size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog; + zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t)); + memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t)); + zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq)); + zc->maxNbLdmSequences = maxNbLdmSeq; + + memset(&zc->ldmState.window, 0, sizeof(zc->ldmState.window)); + ZSTD_window_clear(&zc->ldmState.window); + } + + DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws)); return 0; } @@ -1606,15 +1562,15 @@ static const size_t attachDictSizeCutoffs[ZSTD_STRATEGY_MAX+1] = { }; static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict, - ZSTD_CCtx_params params, + const ZSTD_CCtx_params* params, U64 pledgedSrcSize) { size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy]; return ( pledgedSrcSize <= cutoff || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN - || params.attachDictPref == ZSTD_dictForceAttach ) - && params.attachDictPref != ZSTD_dictForceCopy - && !params.forceWindow; /* dictMatchState isn't correctly + || params->attachDictPref == ZSTD_dictForceAttach ) + && params->attachDictPref != ZSTD_dictForceCopy + && !params->forceWindow; /* dictMatchState isn't correctly * handled in _enforceMaxDist */ } @@ -1632,8 +1588,8 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, * has its own tables. */ params.cParams = ZSTD_adjustCParams_internal(*cdict_cParams, pledgedSrcSize, 0); params.cParams.windowLog = windowLog; - ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, - ZSTDcrp_continue, zbuff); + FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, + ZSTDcrp_makeClean, zbuff)); assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); } @@ -1681,13 +1637,15 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, /* Copy only compression parameters related to tables. */ params.cParams = *cdict_cParams; params.cParams.windowLog = windowLog; - ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, - ZSTDcrp_noMemset, zbuff); + FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, + ZSTDcrp_leaveDirty, zbuff)); assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog); assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog); } + ZSTD_cwksp_mark_tables_dirty(&cctx->workspace); + /* copy tables */ { size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog); size_t const hSize = (size_t)1 << cdict_cParams->hashLog; @@ -1700,11 +1658,14 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, } /* Zero the hashTable3, since the cdict never fills it */ - { size_t const h3Size = (size_t)1 << cctx->blockState.matchState.hashLog3; + { int const h3log = cctx->blockState.matchState.hashLog3; + size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; assert(cdict->matchState.hashLog3 == 0); memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32)); } + ZSTD_cwksp_mark_tables_clean(&cctx->workspace); + /* copy dictionary offsets */ { ZSTD_matchState_t const* srcMatchState = &cdict->matchState; ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState; @@ -1726,7 +1687,7 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, * in-place. We decide here which strategy to use. */ static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, - ZSTD_CCtx_params params, + const ZSTD_CCtx_params* params, U64 pledgedSrcSize, ZSTD_buffered_policy_e zbuff) { @@ -1736,10 +1697,10 @@ static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) { return ZSTD_resetCCtx_byAttachingCDict( - cctx, cdict, params, pledgedSrcSize, zbuff); + cctx, cdict, *params, pledgedSrcSize, zbuff); } else { return ZSTD_resetCCtx_byCopyingCDict( - cctx, cdict, params, pledgedSrcSize, zbuff); + cctx, cdict, *params, pledgedSrcSize, zbuff); } } @@ -1765,7 +1726,7 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, params.cParams = srcCCtx->appliedParams.cParams; params.fParams = fParams; ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize, - ZSTDcrp_noMemset, zbuff); + ZSTDcrp_leaveDirty, zbuff); assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog); assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy); assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog); @@ -1773,16 +1734,21 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3); } + ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace); + /* copy tables */ { size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog); size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog; - size_t const h3Size = (size_t)1 << srcCCtx->blockState.matchState.hashLog3; + int const h3log = srcCCtx->blockState.matchState.hashLog3; + size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); assert((U32*)dstCCtx->blockState.matchState.chainTable == (U32*)dstCCtx->blockState.matchState.hashTable + hSize); /* chainTable must follow hashTable */ assert((U32*)dstCCtx->blockState.matchState.hashTable3 == (U32*)dstCCtx->blockState.matchState.chainTable + chainSize); memcpy(dstCCtx->blockState.matchState.hashTable, srcCCtx->blockState.matchState.hashTable, tableSpace); /* presumes all tables follow each other */ } + ZSTD_cwksp_mark_tables_clean(&dstCCtx->workspace); + /* copy dictionary offsets */ { const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState; @@ -1833,6 +1799,20 @@ ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerVa int rowNb; assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */ assert(size < (1U<<31)); /* can be casted to int */ + +#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) + /* To validate that the table re-use logic is sound, and that we don't + * access table space that we haven't cleaned, we re-"poison" the table + * space every time we mark it dirty. + * + * This function however is intended to operate on those dirty tables and + * re-clean them. So when this function is used correctly, we can unpoison + * the memory it operated on. This introduces a blind spot though, since + * if we now try to operate on __actually__ poisoned memory, we will not + * detect that. */ + __msan_unpoison(table, size * sizeof(U32)); +#endif + for (rowNb=0 ; rowNb < nbRows ; rowNb++) { int column; for (column=0; column cParams.windowLog > STREAM_ACCUMULATOR_MIN; @@ -1973,7 +1953,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, ZSTD_disableLiteralsCompression(cctxParams), op, dstCapacity, literals, litSize, - workspace, wkspSize, + entropyWorkspace, entropyWkspSize, bmi2); FORWARD_IF_ERROR(cSize); assert(cSize <= dstCapacity); @@ -1983,12 +1963,17 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, /* Sequences Header */ RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, dstSize_tooSmall); - if (nbSeq < 0x7F) + if (nbSeq < 128) { *op++ = (BYTE)nbSeq; - else if (nbSeq < LONGNBSEQ) - op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; - else - op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; + } else if (nbSeq < LONGNBSEQ) { + op[0] = (BYTE)((nbSeq>>8) + 0x80); + op[1] = (BYTE)nbSeq; + op+=2; + } else { + op[0]=0xFF; + MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)); + op+=3; + } assert(op <= oend); if (nbSeq==0) { /* Copy the old tables over as if we repeated them */ @@ -2004,7 +1989,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, ZSTD_seqToCodes(seqStorePtr); /* build CTable for Literal Lengths */ { unsigned max = MaxLL; - size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ + size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ DEBUGLOG(5, "Building LL table"); nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode; LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode, @@ -2014,10 +1999,14 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, ZSTD_defaultAllowed, strategy); assert(set_basic < set_compressed && set_rle < set_compressed); assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable(op, (size_t)(oend - op), CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, - count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, - prevEntropy->fse.litlengthCTable, sizeof(prevEntropy->fse.litlengthCTable), - workspace, wkspSize); + { size_t const countSize = ZSTD_buildCTable( + op, (size_t)(oend - op), + CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, + count, max, llCodeTable, nbSeq, + LL_defaultNorm, LL_defaultNormLog, MaxLL, + prevEntropy->fse.litlengthCTable, + sizeof(prevEntropy->fse.litlengthCTable), + entropyWorkspace, entropyWkspSize); FORWARD_IF_ERROR(countSize); if (LLtype == set_compressed) lastNCount = op; @@ -2026,7 +2015,8 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, } } /* build CTable for Offsets */ { unsigned max = MaxOff; - size_t const mostFrequent = HIST_countFast_wksp(count, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ + size_t const mostFrequent = HIST_countFast_wksp( + count, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; DEBUGLOG(5, "Building OF table"); @@ -2037,10 +2027,14 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, OF_defaultNorm, OF_defaultNormLog, defaultPolicy, strategy); assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable(op, (size_t)(oend - op), CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, - count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, - prevEntropy->fse.offcodeCTable, sizeof(prevEntropy->fse.offcodeCTable), - workspace, wkspSize); + { size_t const countSize = ZSTD_buildCTable( + op, (size_t)(oend - op), + CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, + count, max, ofCodeTable, nbSeq, + OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, + prevEntropy->fse.offcodeCTable, + sizeof(prevEntropy->fse.offcodeCTable), + entropyWorkspace, entropyWkspSize); FORWARD_IF_ERROR(countSize); if (Offtype == set_compressed) lastNCount = op; @@ -2049,7 +2043,8 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, } } /* build CTable for MatchLengths */ { unsigned max = MaxML; - size_t const mostFrequent = HIST_countFast_wksp(count, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ + size_t const mostFrequent = HIST_countFast_wksp( + count, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode; MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode, @@ -2058,10 +2053,14 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, ML_defaultNorm, ML_defaultNormLog, ZSTD_defaultAllowed, strategy); assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable(op, (size_t)(oend - op), CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, - count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, - prevEntropy->fse.matchlengthCTable, sizeof(prevEntropy->fse.matchlengthCTable), - workspace, wkspSize); + { size_t const countSize = ZSTD_buildCTable( + op, (size_t)(oend - op), + CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, + count, max, mlCodeTable, nbSeq, + ML_defaultNorm, ML_defaultNormLog, MaxML, + prevEntropy->fse.matchlengthCTable, + sizeof(prevEntropy->fse.matchlengthCTable), + entropyWorkspace, entropyWkspSize); FORWARD_IF_ERROR(countSize); if (MLtype == set_compressed) lastNCount = op; @@ -2109,13 +2108,13 @@ ZSTD_compressSequences(seqStore_t* seqStorePtr, const ZSTD_CCtx_params* cctxParams, void* dst, size_t dstCapacity, size_t srcSize, - void* workspace, size_t wkspSize, + void* entropyWorkspace, size_t entropyWkspSize, int bmi2) { size_t const cSize = ZSTD_compressSequences_internal( seqStorePtr, prevEntropy, nextEntropy, cctxParams, dst, dstCapacity, - workspace, wkspSize, bmi2); + entropyWorkspace, entropyWkspSize, bmi2); if (cSize == 0) return 0; /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block. * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block. @@ -2308,8 +2307,8 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, if (frame && /* We don't want to emit our first block as a RLE even if it qualifies because - * doing so will cause the decoder to throw a "should consume all input error." - * https://github.com/facebook/zstd/blob/dev/programs/fileio.c#L1723 + * doing so will cause the decoder (cli only) to throw a "should consume all input error." + * This is only an issue for zstd <= v1.4.3 */ !zc->isFirstBlock && cSize < rleMaxLength && @@ -2337,7 +2336,11 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, } -static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, void const* ip, void const* iend) +static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, + ZSTD_cwksp* ws, + ZSTD_CCtx_params const* params, + void const* ip, + void const* iend) { if (ZSTD_window_needOverflowCorrection(ms->window, iend)) { U32 const maxDist = (U32)1 << params->cParams.windowLog; @@ -2346,7 +2349,9 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, ZSTD_CCtx_params ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); + ZSTD_cwksp_mark_tables_dirty(ws); ZSTD_reduceIndex(ms, params, correction); + ZSTD_cwksp_mark_tables_clean(ws); if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; else ms->nextToUpdate -= correction; /* invalidate dictionaries on overflow correction */ @@ -2355,6 +2360,7 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, ZSTD_CCtx_params } } + /*! ZSTD_compress_frameChunk() : * Compress a chunk of data into one or multiple blocks. * All blocks will be terminated, all input will be consumed. @@ -2388,7 +2394,8 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, "not enough space to store compressed block"); if (remaining < blockSize) blockSize = remaining; - ZSTD_overflowCorrectIfNeeded(ms, &cctx->appliedParams, ip, ip + blockSize); + ZSTD_overflowCorrectIfNeeded( + ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize); ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState); /* Ensure hash/chain table insertion resumes no sooner than lowlimit */ @@ -2427,25 +2434,25 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, - ZSTD_CCtx_params params, U64 pledgedSrcSize, U32 dictID) + const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID) { BYTE* const op = (BYTE*)dst; U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */ - U32 const dictIDSizeCode = params.fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */ - U32 const checksumFlag = params.fParams.checksumFlag>0; - U32 const windowSize = (U32)1 << params.cParams.windowLog; - U32 const singleSegment = params.fParams.contentSizeFlag && (windowSize >= pledgedSrcSize); - BYTE const windowLogByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3); - U32 const fcsCode = params.fParams.contentSizeFlag ? + U32 const dictIDSizeCode = params->fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */ + U32 const checksumFlag = params->fParams.checksumFlag>0; + U32 const windowSize = (U32)1 << params->cParams.windowLog; + U32 const singleSegment = params->fParams.contentSizeFlag && (windowSize >= pledgedSrcSize); + BYTE const windowLogByte = (BYTE)((params->cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3); + U32 const fcsCode = params->fParams.contentSizeFlag ? (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */ BYTE const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) ); size_t pos=0; - assert(!(params.fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)); + assert(!(params->fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)); RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall); DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u", - !params.fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode); + !params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode); - if (params.format == ZSTD_f_zstd1) { + if (params->format == ZSTD_f_zstd1) { MEM_writeLE32(dst, ZSTD_MAGICNUMBER); pos = 4; } @@ -2511,7 +2518,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, "missing init (ZSTD_compressBegin)"); if (frame && (cctx->stage==ZSTDcs_init)) { - fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, + fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, cctx->pledgedSrcSizePlusOne-1, cctx->dictID); FORWARD_IF_ERROR(fhSize); assert(fhSize <= dstCapacity); @@ -2531,7 +2538,9 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, if (!frame) { /* overflow check and correction for block mode */ - ZSTD_overflowCorrectIfNeeded(ms, &cctx->appliedParams, src, (BYTE const*)src + srcSize); + ZSTD_overflowCorrectIfNeeded( + ms, &cctx->workspace, &cctx->appliedParams, + src, (BYTE const*)src + srcSize); } DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize); @@ -2584,6 +2593,7 @@ size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const * @return : 0, or an error code */ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, + ZSTD_cwksp* ws, ZSTD_CCtx_params const* params, const void* src, size_t srcSize, ZSTD_dictTableLoadMethod_e dtlm) @@ -2604,7 +2614,7 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX); const BYTE* const ichunk = ip + chunk; - ZSTD_overflowCorrectIfNeeded(ms, params, ip, ichunk); + ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, ichunk); switch(params->cParams.strategy) { @@ -2667,6 +2677,7 @@ static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSym */ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, ZSTD_matchState_t* ms, + ZSTD_cwksp* ws, ZSTD_CCtx_params const* params, const void* dict, size_t dictSize, ZSTD_dictTableLoadMethod_e dtlm, @@ -2762,7 +2773,8 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, bs->entropy.fse.offcode_repeatMode = FSE_repeat_valid; bs->entropy.fse.matchlength_repeatMode = FSE_repeat_valid; bs->entropy.fse.litlength_repeatMode = FSE_repeat_valid; - FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(ms, params, dictPtr, dictContentSize, dtlm)); + FORWARD_IF_ERROR(ZSTD_loadDictionaryContent( + ms, ws, params, dictPtr, dictContentSize, dtlm)); return dictID; } } @@ -2772,6 +2784,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, static size_t ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, ZSTD_matchState_t* ms, + ZSTD_cwksp* ws, const ZSTD_CCtx_params* params, const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType, @@ -2785,19 +2798,21 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, /* dict restricted modes */ if (dictContentType == ZSTD_dct_rawContent) - return ZSTD_loadDictionaryContent(ms, params, dict, dictSize, dtlm); + return ZSTD_loadDictionaryContent(ms, ws, params, dict, dictSize, dtlm); if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) { if (dictContentType == ZSTD_dct_auto) { DEBUGLOG(4, "raw content dictionary detected"); - return ZSTD_loadDictionaryContent(ms, params, dict, dictSize, dtlm); + return ZSTD_loadDictionaryContent( + ms, ws, params, dict, dictSize, dtlm); } RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong); assert(0); /* impossible */ } /* dict as full zstd dictionary */ - return ZSTD_loadZstdDictionary(bs, ms, params, dict, dictSize, dtlm, workspace); + return ZSTD_loadZstdDictionary( + bs, ms, ws, params, dict, dictSize, dtlm, workspace); } /*! ZSTD_compressBegin_internal() : @@ -2807,23 +2822,24 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, ZSTD_dictContentType_e dictContentType, ZSTD_dictTableLoadMethod_e dtlm, const ZSTD_CDict* cdict, - ZSTD_CCtx_params params, U64 pledgedSrcSize, + const ZSTD_CCtx_params* params, U64 pledgedSrcSize, ZSTD_buffered_policy_e zbuff) { - DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params.cParams.windowLog); + DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog); /* params are supposed to be fully validated at this point */ - assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); assert(!((dict) && (cdict))); /* either dict or cdict, not both */ if (cdict && cdict->dictContentSize>0) { return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); } - FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, - ZSTDcrp_continue, zbuff) ); + FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize, + ZSTDcrp_makeClean, zbuff) ); { size_t const dictID = ZSTD_compress_insertDictionary( cctx->blockState.prevCBlock, &cctx->blockState.matchState, - ¶ms, dict, dictSize, dictContentType, dtlm, cctx->entropyWorkspace); + &cctx->workspace, params, dict, dictSize, dictContentType, dtlm, + cctx->entropyWorkspace); FORWARD_IF_ERROR(dictID); assert(dictID <= UINT_MAX); cctx->dictID = (U32)dictID; @@ -2836,12 +2852,12 @@ size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, ZSTD_dictContentType_e dictContentType, ZSTD_dictTableLoadMethod_e dtlm, const ZSTD_CDict* cdict, - ZSTD_CCtx_params params, + const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize) { - DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params.cParams.windowLog); + DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params->cParams.windowLog); /* compression parameters verification and optimization */ - FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) ); + FORWARD_IF_ERROR( ZSTD_checkCParams(params->cParams) ); return ZSTD_compressBegin_internal(cctx, dict, dictSize, dictContentType, dtlm, cdict, @@ -2856,21 +2872,21 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, ZSTD_parameters params, unsigned long long pledgedSrcSize) { ZSTD_CCtx_params const cctxParams = - ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params); + ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params); return ZSTD_compressBegin_advanced_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL /*cdict*/, - cctxParams, pledgedSrcSize); + &cctxParams, pledgedSrcSize); } size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) { ZSTD_parameters const params = ZSTD_getParams(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); ZSTD_CCtx_params const cctxParams = - ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params); + ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params); DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize); return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, - cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered); + &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered); } size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel) @@ -2893,7 +2909,7 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) /* special case : empty frame */ if (cctx->stage == ZSTDcs_init) { - fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, 0, 0); + fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0); FORWARD_IF_ERROR(fhSize); dstCapacity -= fhSize; op += fhSize; @@ -2954,13 +2970,13 @@ static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx, ZSTD_parameters params) { ZSTD_CCtx_params const cctxParams = - ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params); + ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params); DEBUGLOG(4, "ZSTD_compress_internal"); return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, - cctxParams); + &cctxParams); } size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx, @@ -2984,7 +3000,7 @@ size_t ZSTD_compress_advanced_internal( void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict,size_t dictSize, - ZSTD_CCtx_params params) + const ZSTD_CCtx_params* params) { DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize); FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, @@ -3000,9 +3016,9 @@ size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, int compressionLevel) { ZSTD_parameters const params = ZSTD_getParams(compressionLevel, srcSize + (!srcSize), dict ? dictSize : 0); - ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params); + ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params); assert(params.fParams.contentSizeFlag == 1); - return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, cctxParams); + return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctxParams); } size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, @@ -3038,7 +3054,7 @@ size_t ZSTD_estimateCDictSize_advanced( { DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict)); return sizeof(ZSTD_CDict) + HUF_WORKSPACE_SIZE + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) - + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : ZSTD_cwksp_align(dictSize, sizeof(void *))); } size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel) @@ -3051,7 +3067,9 @@ size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict) { if (cdict==NULL) return 0; /* support sizeof on NULL */ DEBUGLOG(5, "sizeof(*cdict) : %u", (unsigned)sizeof(*cdict)); - return cdict->workspaceSize + (cdict->dictBuffer ? cdict->dictContentSize : 0) + sizeof(*cdict); + /* cdict may be in the workspace */ + return (cdict->workspace.workspace == cdict ? 0 : sizeof(*cdict)) + + ZSTD_cwksp_sizeof(&cdict->workspace); } static size_t ZSTD_initCDict_internal( @@ -3065,26 +3083,27 @@ static size_t ZSTD_initCDict_internal( assert(!ZSTD_checkCParams(cParams)); cdict->matchState.cParams = cParams; if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) { - cdict->dictBuffer = NULL; cdict->dictContent = dictBuffer; } else { - void* const internalBuffer = ZSTD_malloc(dictSize, cdict->customMem); - cdict->dictBuffer = internalBuffer; - cdict->dictContent = internalBuffer; + void *internalBuffer = ZSTD_cwksp_reserve_object(&cdict->workspace, ZSTD_cwksp_align(dictSize, sizeof(void*))); RETURN_ERROR_IF(!internalBuffer, memory_allocation); + cdict->dictContent = internalBuffer; memcpy(internalBuffer, dictBuffer, dictSize); } cdict->dictContentSize = dictSize; + cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE); + + /* Reset the state to no dictionary */ ZSTD_reset_compressedBlockState(&cdict->cBlockState); - { void* const end = ZSTD_reset_matchState(&cdict->matchState, - (U32*)cdict->workspace + HUF_WORKSPACE_SIZE_U32, - &cParams, - ZSTDcrp_continue, ZSTD_resetTarget_CDict); - assert(end == (char*)cdict->workspace + cdict->workspaceSize); - (void)end; - } + FORWARD_IF_ERROR(ZSTD_reset_matchState( + &cdict->matchState, + &cdict->workspace, + &cParams, + ZSTDcrp_makeClean, + ZSTDirp_reset, + ZSTD_resetTarget_CDict)); /* (Maybe) load the dictionary * Skips loading the dictionary if it is <= 8 bytes. */ @@ -3094,9 +3113,9 @@ static size_t ZSTD_initCDict_internal( params.fParams.contentSizeFlag = 1; params.cParams = cParams; { size_t const dictID = ZSTD_compress_insertDictionary( - &cdict->cBlockState, &cdict->matchState, ¶ms, - cdict->dictContent, cdict->dictContentSize, - dictContentType, ZSTD_dtlm_full, cdict->workspace); + &cdict->cBlockState, &cdict->matchState, &cdict->workspace, + ¶ms, cdict->dictContent, cdict->dictContentSize, + dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace); FORWARD_IF_ERROR(dictID); assert(dictID <= (size_t)(U32)-1); cdict->dictID = (U32)dictID; @@ -3114,18 +3133,27 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, DEBUGLOG(3, "ZSTD_createCDict_advanced, mode %u", (unsigned)dictContentType); if (!customMem.customAlloc ^ !customMem.customFree) return NULL; - { ZSTD_CDict* const cdict = (ZSTD_CDict*)ZSTD_malloc(sizeof(ZSTD_CDict), customMem); - size_t const workspaceSize = HUF_WORKSPACE_SIZE + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0); + { size_t const workspaceSize = + sizeof(ZSTD_CDict) + + HUF_WORKSPACE_SIZE + + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 + : ZSTD_cwksp_align(dictSize, sizeof(void*))); void* const workspace = ZSTD_malloc(workspaceSize, customMem); + ZSTD_cwksp ws; + ZSTD_CDict* cdict; - if (!cdict || !workspace) { - ZSTD_free(cdict, customMem); + if (!workspace) { ZSTD_free(workspace, customMem); return NULL; } + + ZSTD_cwksp_init(&ws, workspace, workspaceSize); + + cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict)); + assert(cdict != NULL); + ZSTD_cwksp_move(&cdict->workspace, &ws); cdict->customMem = customMem; - cdict->workspace = workspace; - cdict->workspaceSize = workspaceSize; if (ZSTD_isError( ZSTD_initCDict_internal(cdict, dictBuffer, dictSize, dictLoadMethod, dictContentType, @@ -3158,8 +3186,11 @@ size_t ZSTD_freeCDict(ZSTD_CDict* cdict) { if (cdict==NULL) return 0; /* support free on NULL */ { ZSTD_customMem const cMem = cdict->customMem; - ZSTD_free(cdict->workspace, cMem); - ZSTD_free(cdict->dictBuffer, cMem); + /* Only free workspace if cdict not in workspace, otherwise the + * workspace will be freed when the cdict itself is freed. */ + if ((void*)cdict->workspace.workspace != (void*)cdict) { + ZSTD_cwksp_free(&cdict->workspace, cMem); + } ZSTD_free(cdict, cMem); return 0; } @@ -3186,28 +3217,27 @@ const ZSTD_CDict* ZSTD_initStaticCDict( ZSTD_compressionParameters cParams) { size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0); - size_t const neededSize = sizeof(ZSTD_CDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize) + size_t const neededSize = sizeof(ZSTD_CDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : ZSTD_cwksp_align(dictSize, sizeof(void*))) + HUF_WORKSPACE_SIZE + matchStateSize; - ZSTD_CDict* const cdict = (ZSTD_CDict*) workspace; - void* ptr; + ZSTD_CDict* cdict; + if ((size_t)workspace & 7) return NULL; /* 8-aligned */ + + { + ZSTD_cwksp ws; + ZSTD_cwksp_init(&ws, workspace, workspaceSize); + cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict)); + if (cdict == NULL) return NULL; + ZSTD_cwksp_move(&cdict->workspace, &ws); + } + DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u", (unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize)); if (workspaceSize < neededSize) return NULL; - if (dictLoadMethod == ZSTD_dlm_byCopy) { - memcpy(cdict+1, dict, dictSize); - dict = cdict+1; - ptr = (char*)workspace + sizeof(ZSTD_CDict) + dictSize; - } else { - ptr = cdict+1; - } - cdict->workspace = ptr; - cdict->workspaceSize = HUF_WORKSPACE_SIZE + matchStateSize; - if (ZSTD_isError( ZSTD_initCDict_internal(cdict, dict, dictSize, - ZSTD_dlm_byRef, dictContentType, + dictLoadMethod, dictContentType, cParams) )) return NULL; @@ -3243,7 +3273,7 @@ size_t ZSTD_compressBegin_usingCDict_advanced( return ZSTD_compressBegin_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, cdict, - params, pledgedSrcSize, + ¶ms, pledgedSrcSize, ZSTDb_not_buffered); } } @@ -3334,7 +3364,7 @@ static size_t ZSTD_resetCStream_internal(ZSTD_CStream* cctx, FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, dict, dictSize, dictContentType, ZSTD_dtlm_fast, cdict, - params, pledgedSrcSize, + ¶ms, pledgedSrcSize, ZSTDb_buffered) ); cctx->inToCompress = 0; @@ -3368,13 +3398,14 @@ size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss) * Assumption 2 : either dict, or cdict, is defined, not both */ size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, const void* dict, size_t dictSize, const ZSTD_CDict* cdict, - ZSTD_CCtx_params params, unsigned long long pledgedSrcSize) + const ZSTD_CCtx_params* params, + unsigned long long pledgedSrcSize) { DEBUGLOG(4, "ZSTD_initCStream_internal"); FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); - assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); - zcs->requestedParams = params; + assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); + zcs->requestedParams = *params; assert(!((dict) && (cdict))); /* either dict or cdict, not both */ if (dict) { FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) ); @@ -3427,7 +3458,7 @@ size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) ); - zcs->requestedParams = ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params); + zcs->requestedParams = ZSTD_assignParamsToCCtxParams(&zcs->requestedParams, params); FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) ); return 0; } diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 3a5c7f2d1b..1140945b1e 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -19,6 +19,7 @@ * Dependencies ***************************************/ #include "zstd_internal.h" +#include "zstd_cwksp.h" #ifdef ZSTD_MULTITHREAD # include "zstdmt_compress.h" #endif @@ -203,6 +204,9 @@ struct ZSTD_CCtx_params_s { size_t targetCBlockSize; /* Tries to fit compressed block size to be around targetCBlockSize. * No target when targetCBlockSize == 0. * There is no guarantee on compressed block size */ + int srcSizeHint; /* User's best guess of source size. + * Hint is not valid when srcSizeHint == 0. + * There is no guarantee that hint is close to actual source size */ ZSTD_dictAttachPref_e attachDictPref; ZSTD_literalCompressionMode_e literalCompressionMode; @@ -228,9 +232,7 @@ struct ZSTD_CCtx_s { ZSTD_CCtx_params appliedParams; U32 dictID; - int workSpaceOversizedDuration; - void* workSpace; - size_t workSpaceSize; + ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */ size_t blockSize; unsigned long long pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */ unsigned long long consumedSrcSize; @@ -338,26 +340,57 @@ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) return (srcSize >> minlog) + 2; } +/*! ZSTD_safecopyLiterals() : + * memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w. + * Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single + * large copies. + */ +static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) { + assert(iend > ilimit_w); + if (ip <= ilimit_w) { + ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap); + op += ilimit_w - ip; + ip = ilimit_w; + } + while (ip < iend) *op++ = *ip++; +} + /*! ZSTD_storeSeq() : - * Store a sequence (literal length, literals, offset code and match length code) into seqStore_t. - * `offsetCode` : distance to match + 3 (values 1-3 are repCodes). + * Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t. + * `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes). * `mlBase` : matchLength - MINMATCH + * Allowed to overread literals up to litLimit. */ -MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t mlBase) +HINT_INLINE UNUSED_ATTR +void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase) { + BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH; + BYTE const* const litEnd = literals + litLength; #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6) static const BYTE* g_start = NULL; if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ { U32 const pos = (U32)((const BYTE*)literals - g_start); DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u", - pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offsetCode); + pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode); } #endif assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); /* copy Literals */ assert(seqStorePtr->maxNbLit <= 128 KB); assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit); - ZSTD_wildcopy(seqStorePtr->lit, literals, (ptrdiff_t)litLength, ZSTD_no_overlap); + assert(literals + litLength <= litLimit); + if (litEnd <= litLimit_w) { + /* Common case we can use wildcopy. + * First copy 16 bytes, because literals are likely short. + */ + assert(WILDCOPY_OVERLENGTH >= 16); + ZSTD_copy16(seqStorePtr->lit, literals); + if (litLength > 16) { + ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap); + } + } else { + ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w); + } seqStorePtr->lit += litLength; /* literal Length */ @@ -369,7 +402,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const v seqStorePtr->sequences[0].litLength = (U16)litLength; /* match offset */ - seqStorePtr->sequences[0].offset = offsetCode + 1; + seqStorePtr->sequences[0].offset = offCode + 1; /* match Length */ if (mlBase>0xFFFF) { @@ -911,7 +944,7 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, const void* dict, size_t dictSize, const ZSTD_CDict* cdict, - ZSTD_CCtx_params params, unsigned long long pledgedSrcSize); + const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize); void ZSTD_resetSeqStore(seqStore_t* ssPtr); @@ -926,7 +959,7 @@ size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, ZSTD_dictContentType_e dictContentType, ZSTD_dictTableLoadMethod_e dtlm, const ZSTD_CDict* cdict, - ZSTD_CCtx_params params, + const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize); /* ZSTD_compress_advanced_internal() : @@ -935,7 +968,7 @@ size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict,size_t dictSize, - ZSTD_CCtx_params params); + const ZSTD_CCtx_params* params); /* ZSTD_writeLastEmptyBlock() : diff --git a/lib/compress/zstd_compress_literals.c b/lib/compress/zstd_compress_literals.c index eb3e5a44bc..6c13331182 100644 --- a/lib/compress/zstd_compress_literals.c +++ b/lib/compress/zstd_compress_literals.c @@ -70,7 +70,7 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, ZSTD_strategy strategy, int disableLiteralCompression, void* dst, size_t dstCapacity, const void* src, size_t srcSize, - void* workspace, size_t wkspSize, + void* entropyWorkspace, size_t entropyWorkspaceSize, const int bmi2) { size_t const minGain = ZSTD_minGain(srcSize, strategy); @@ -99,10 +99,15 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, { HUF_repeat repeat = prevHuf->repeatMode; int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0; if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; - cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, - workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) - : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, - workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2); + cLitSize = singleStream ? + HUF_compress1X_repeat( + ostart+lhSize, dstCapacity-lhSize, src, srcSize, + 255, 11, entropyWorkspace, entropyWorkspaceSize, + (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) : + HUF_compress4X_repeat( + ostart+lhSize, dstCapacity-lhSize, src, srcSize, + 255, 11, entropyWorkspace, entropyWorkspaceSize, + (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2); if (repeat != HUF_repeat_none) { /* reused the existing table */ hType = set_repeat; diff --git a/lib/compress/zstd_compress_literals.h b/lib/compress/zstd_compress_literals.h index 7adbecc0be..97273d7cfd 100644 --- a/lib/compress/zstd_compress_literals.h +++ b/lib/compress/zstd_compress_literals.h @@ -23,7 +23,7 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, ZSTD_strategy strategy, int disableLiteralCompression, void* dst, size_t dstCapacity, const void* src, size_t srcSize, - void* workspace, size_t wkspSize, + void* entropyWorkspace, size_t entropyWorkspaceSize, const int bmi2); #endif /* ZSTD_COMPRESS_LITERALS_H */ diff --git a/lib/compress/zstd_compress_sequences.c b/lib/compress/zstd_compress_sequences.c index 3c3deae08c..0ff7a26823 100644 --- a/lib/compress/zstd_compress_sequences.c +++ b/lib/compress/zstd_compress_sequences.c @@ -222,7 +222,7 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity, const BYTE* codeTable, size_t nbSeq, const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, const FSE_CTable* prevCTable, size_t prevCTableSize, - void* workspace, size_t workspaceSize) + void* entropyWorkspace, size_t entropyWorkspaceSize) { BYTE* op = (BYTE*)dst; const BYTE* const oend = op + dstCapacity; @@ -238,7 +238,7 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity, memcpy(nextCTable, prevCTable, prevCTableSize); return 0; case set_basic: - FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize)); /* note : could be pre-calculated */ + FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize)); /* note : could be pre-calculated */ return 0; case set_compressed: { S16 norm[MaxSeq + 1]; @@ -252,7 +252,7 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity, FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max)); { size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ FORWARD_IF_ERROR(NCountSize); - FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, workspace, workspaceSize)); + FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize)); return NCountSize; } } diff --git a/lib/compress/zstd_compress_sequences.h b/lib/compress/zstd_compress_sequences.h index f5234d94c8..57e8e367b0 100644 --- a/lib/compress/zstd_compress_sequences.h +++ b/lib/compress/zstd_compress_sequences.h @@ -35,7 +35,7 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity, const BYTE* codeTable, size_t nbSeq, const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, const FSE_CTable* prevCTable, size_t prevCTableSize, - void* workspace, size_t workspaceSize); + void* entropyWorkspace, size_t entropyWorkspaceSize); size_t ZSTD_encodeSequences( void* dst, size_t dstCapacity, diff --git a/lib/compress/zstd_cwksp.h b/lib/compress/zstd_cwksp.h new file mode 100644 index 0000000000..39d064c490 --- /dev/null +++ b/lib/compress/zstd_cwksp.h @@ -0,0 +1,449 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_CWKSP_H +#define ZSTD_CWKSP_H + +/*-************************************* +* Dependencies +***************************************/ +#include "zstd_internal.h" + +#if defined (__cplusplus) +extern "C" { +#endif + +/*-************************************* +* Constants +***************************************/ + +/* define "workspace is too large" as this number of times larger than needed */ +#define ZSTD_WORKSPACETOOLARGE_FACTOR 3 + +/* when workspace is continuously too large + * during at least this number of times, + * context's memory usage is considered wasteful, + * because it's sized to handle a worst case scenario which rarely happens. + * In which case, resize it down to free some memory */ +#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 + +/*-************************************* +* Structures +***************************************/ +typedef enum { + ZSTD_cwksp_alloc_objects, + ZSTD_cwksp_alloc_buffers, + ZSTD_cwksp_alloc_aligned +} ZSTD_cwksp_alloc_phase_e; + +/** + * Zstd fits all its internal datastructures into a single continuous buffer, + * so that it only needs to perform a single OS allocation (or so that a buffer + * can be provided to it and it can perform no allocations at all). This buffer + * is called the workspace. + * + * Several optimizations complicate that process of allocating memory ranges + * from this workspace for each internal datastructure: + * + * - These different internal datastructures have different setup requirements: + * + * - The static objects need to be cleared once and can then be trivially + * reused for each compression. + * + * - Various buffers don't need to be initialized at all--they are always + * written into before they're read. + * + * - The matchstate tables have a unique requirement that they don't need + * their memory to be totally cleared, but they do need the memory to have + * some bound, i.e., a guarantee that all values in the memory they've been + * allocated is less than some maximum value (which is the starting value + * for the indices that they will then use for compression). When this + * guarantee is provided to them, they can use the memory without any setup + * work. When it can't, they have to clear the area. + * + * - These buffers also have different alignment requirements. + * + * - We would like to reuse the objects in the workspace for multiple + * compressions without having to perform any expensive reallocation or + * reinitialization work. + * + * - We would like to be able to efficiently reuse the workspace across + * multiple compressions **even when the compression parameters change** and + * we need to resize some of the objects (where possible). + * + * To attempt to manage this buffer, given these constraints, the ZSTD_cwksp + * abstraction was created. It works as follows: + * + * Workspace Layout: + * + * [ ... workspace ... ] + * [objects][tables ... ->] free space [<- ... aligned][<- ... buffers] + * + * The various objects that live in the workspace are divided into the + * following categories, and are allocated separately: + * + * - Static objects: this is optionally the enclosing ZSTD_CCtx or ZSTD_CDict, + * so that literally everything fits in a single buffer. Note: if present, + * this must be the first object in the workspace, since ZSTD_free{CCtx, + * CDict}() rely on a pointer comparison to see whether one or two frees are + * required. + * + * - Fixed size objects: these are fixed-size, fixed-count objects that are + * nonetheless "dynamically" allocated in the workspace so that we can + * control how they're initialized separately from the broader ZSTD_CCtx. + * Examples: + * - Entropy Workspace + * - 2 x ZSTD_compressedBlockState_t + * - CDict dictionary contents + * + * - Tables: these are any of several different datastructures (hash tables, + * chain tables, binary trees) that all respect a common format: they are + * uint32_t arrays, all of whose values are between 0 and (nextSrc - base). + * Their sizes depend on the cparams. + * + * - Aligned: these buffers are used for various purposes that require 4 byte + * alignment, but don't require any initialization before they're used. + * + * - Buffers: these buffers are used for various purposes that don't require + * any alignment or initialization before they're used. This means they can + * be moved around at no cost for a new compression. + * + * Allocating Memory: + * + * The various types of objects must be allocated in order, so they can be + * correctly packed into the workspace buffer. That order is: + * + * 1. Objects + * 2. Buffers + * 3. Aligned + * 4. Tables + * + * Attempts to reserve objects of different types out of order will fail. + */ +typedef struct { + void* workspace; + void* workspaceEnd; + + void* objectEnd; + void* tableEnd; + void* tableValidEnd; + void* allocStart; + + int allocFailed; + int workspaceOversizedDuration; + ZSTD_cwksp_alloc_phase_e phase; +} ZSTD_cwksp; + +/*-************************************* +* Functions +***************************************/ + +MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws); + +MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) { + (void)ws; + assert(ws->workspace <= ws->objectEnd); + assert(ws->objectEnd <= ws->tableEnd); + assert(ws->objectEnd <= ws->tableValidEnd); + assert(ws->tableEnd <= ws->allocStart); + assert(ws->tableValidEnd <= ws->allocStart); + assert(ws->allocStart <= ws->workspaceEnd); +} + +/** + * Align must be a power of 2. + */ +MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) { + size_t const mask = align - 1; + assert((align & mask) == 0); + return (size + mask) & ~mask; +} + +MEM_STATIC void ZSTD_cwksp_internal_advance_phase( + ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) { + assert(phase >= ws->phase); + if (phase > ws->phase) { + if (ws->phase < ZSTD_cwksp_alloc_buffers && + phase >= ZSTD_cwksp_alloc_buffers) { + ws->tableValidEnd = ws->objectEnd; + } + if (ws->phase < ZSTD_cwksp_alloc_aligned && + phase >= ZSTD_cwksp_alloc_aligned) { + /* If unaligned allocations down from a too-large top have left us + * unaligned, we need to realign our alloc ptr. Technically, this + * can consume space that is unaccounted for in the neededSpace + * calculation. However, I believe this can only happen when the + * workspace is too large, and specifically when it is too large + * by a larger margin than the space that will be consumed. */ + /* TODO: cleaner, compiler warning friendly way to do this??? */ + ws->allocStart = (BYTE*)ws->allocStart - ((size_t)ws->allocStart & (sizeof(U32)-1)); + if (ws->allocStart < ws->tableValidEnd) { + ws->tableValidEnd = ws->allocStart; + } + } + ws->phase = phase; + } +} + +/** + * Internal function. Do not use directly. + */ +MEM_STATIC void* ZSTD_cwksp_reserve_internal( + ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) { + void* alloc; + void* bottom = ws->tableEnd; + ZSTD_cwksp_internal_advance_phase(ws, phase); + alloc = (BYTE *)ws->allocStart - bytes; + DEBUGLOG(5, "cwksp: reserving %zd bytes, %zd bytes remaining", + bytes, ZSTD_cwksp_available_space(ws) - bytes); + ZSTD_cwksp_assert_internal_consistency(ws); + assert(alloc >= bottom); + if (alloc < bottom) { + DEBUGLOG(4, "cwksp: alloc failed!"); + ws->allocFailed = 1; + return NULL; + } + if (alloc < ws->tableValidEnd) { + ws->tableValidEnd = alloc; + } + ws->allocStart = alloc; + return alloc; +} + +/** + * Reserves and returns unaligned memory. + */ +MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) { + return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers); +} + +/** + * Reserves and returns memory sized on and aligned on sizeof(unsigned). + */ +MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) { + assert((bytes & (sizeof(U32)-1)) == 0); + return ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, sizeof(U32)), ZSTD_cwksp_alloc_aligned); +} + +/** + * Aligned on sizeof(unsigned). These buffers have the special property that + * their values remain constrained, allowing us to re-use them without + * memset()-ing them. + */ +MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) { + const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned; + void* alloc = ws->tableEnd; + void* end = (BYTE *)alloc + bytes; + void* top = ws->allocStart; + DEBUGLOG(5, "cwksp: reserving table %zd bytes, %zd bytes remaining", + bytes, ZSTD_cwksp_available_space(ws) - bytes); + assert((bytes & (sizeof(U32)-1)) == 0); + ZSTD_cwksp_internal_advance_phase(ws, phase); + ZSTD_cwksp_assert_internal_consistency(ws); + assert(end <= top); + if (end > top) { + DEBUGLOG(4, "cwksp: table alloc failed!"); + ws->allocFailed = 1; + return NULL; + } + ws->tableEnd = end; + return alloc; +} + +/** + * Aligned on sizeof(void*). + */ +MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) { + size_t roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*)); + void* start = ws->objectEnd; + void* end = (BYTE*)start + roundedBytes; + DEBUGLOG(5, + "cwksp: reserving object %zd bytes (rounded to %zd), %zd bytes remaining", + bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes); + assert(((size_t)start & (sizeof(void*)-1)) == 0); + assert((bytes & (sizeof(void*)-1)) == 0); + ZSTD_cwksp_assert_internal_consistency(ws); + /* we must be in the first phase, no advance is possible */ + if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) { + DEBUGLOG(4, "cwksp: object alloc failed!"); + ws->allocFailed = 1; + return NULL; + } + ws->objectEnd = end; + ws->tableEnd = end; + ws->tableValidEnd = end; + return start; +} + +MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty"); + +#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) + /* To validate that the table re-use logic is sound, and that we don't + * access table space that we haven't cleaned, we re-"poison" the table + * space every time we mark it dirty. */ + { + size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd; + assert(__msan_test_shadow(ws->objectEnd, size) == -1); + __msan_poison(ws->objectEnd, size); + } +#endif + + assert(ws->tableValidEnd >= ws->objectEnd); + assert(ws->tableValidEnd <= ws->allocStart); + ws->tableValidEnd = ws->objectEnd; + ZSTD_cwksp_assert_internal_consistency(ws); +} + +MEM_STATIC void ZSTD_cwksp_mark_tables_clean(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_clean"); + assert(ws->tableValidEnd >= ws->objectEnd); + assert(ws->tableValidEnd <= ws->allocStart); + if (ws->tableValidEnd < ws->tableEnd) { + ws->tableValidEnd = ws->tableEnd; + } + ZSTD_cwksp_assert_internal_consistency(ws); +} + +/** + * Zero the part of the allocated tables not already marked clean. + */ +MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: ZSTD_cwksp_clean_tables"); + assert(ws->tableValidEnd >= ws->objectEnd); + assert(ws->tableValidEnd <= ws->allocStart); + if (ws->tableValidEnd < ws->tableEnd) { + memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd); + } + ZSTD_cwksp_mark_tables_clean(ws); +} + +/** + * Invalidates table allocations. + * All other allocations remain valid. + */ +MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: clearing tables!"); + ws->tableEnd = ws->objectEnd; + ZSTD_cwksp_assert_internal_consistency(ws); +} + +/** + * Invalidates all buffer, aligned, and table allocations. + * Object allocations remain valid. + */ +MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: clearing!"); + +#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) + /* To validate that the context re-use logic is sound, and that we don't + * access stuff that this compression hasn't initialized, we re-"poison" + * the workspace (or at least the non-static, non-table parts of it) + * every time we start a new compression. */ + { + size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->tableValidEnd; + __msan_poison(ws->tableValidEnd, size); + } +#endif + + ws->tableEnd = ws->objectEnd; + ws->allocStart = ws->workspaceEnd; + ws->allocFailed = 0; + if (ws->phase > ZSTD_cwksp_alloc_buffers) { + ws->phase = ZSTD_cwksp_alloc_buffers; + } + ZSTD_cwksp_assert_internal_consistency(ws); +} + +/** + * The provided workspace takes ownership of the buffer [start, start+size). + * Any existing values in the workspace are ignored (the previously managed + * buffer, if present, must be separately freed). + */ +MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size) { + DEBUGLOG(4, "cwksp: init'ing workspace with %zd bytes", size); + assert(((size_t)start & (sizeof(void*)-1)) == 0); /* ensure correct alignment */ + ws->workspace = start; + ws->workspaceEnd = (BYTE*)start + size; + ws->objectEnd = ws->workspace; + ws->tableValidEnd = ws->objectEnd; + ws->phase = ZSTD_cwksp_alloc_objects; + ZSTD_cwksp_clear(ws); + ws->workspaceOversizedDuration = 0; + ZSTD_cwksp_assert_internal_consistency(ws); +} + +MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) { + void* workspace = ZSTD_malloc(size, customMem); + DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size); + RETURN_ERROR_IF(workspace == NULL, memory_allocation); + ZSTD_cwksp_init(ws, workspace, size); + return 0; +} + +MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) { + DEBUGLOG(4, "cwksp: freeing workspace"); + ZSTD_free(ws->workspace, customMem); + memset(ws, 0, sizeof(ZSTD_cwksp)); +} + +/** + * Moves the management of a workspace from one cwksp to another. The src cwksp + * is left in an invalid state (src must be re-init()'ed before its used again). + */ +MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) { + *dst = *src; + memset(src, 0, sizeof(ZSTD_cwksp)); +} + +MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) { + return (BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace; +} + +MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) { + return ws->allocFailed; +} + +/*-************************************* +* Functions Checking Free Space +***************************************/ + +MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) { + return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd); +} + +MEM_STATIC int ZSTD_cwksp_check_available(ZSTD_cwksp* ws, size_t additionalNeededSpace) { + return ZSTD_cwksp_available_space(ws) >= additionalNeededSpace; +} + +MEM_STATIC int ZSTD_cwksp_check_too_large(ZSTD_cwksp* ws, size_t additionalNeededSpace) { + return ZSTD_cwksp_check_available( + ws, additionalNeededSpace * ZSTD_WORKSPACETOOLARGE_FACTOR); +} + +MEM_STATIC int ZSTD_cwksp_check_wasteful(ZSTD_cwksp* ws, size_t additionalNeededSpace) { + return ZSTD_cwksp_check_too_large(ws, additionalNeededSpace) + && ws->workspaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION; +} + +MEM_STATIC void ZSTD_cwksp_bump_oversized_duration( + ZSTD_cwksp* ws, size_t additionalNeededSpace) { + if (ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)) { + ws->workspaceOversizedDuration++; + } else { + ws->workspaceOversizedDuration = 0; + } +} + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_CWKSP_H */ diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c index 54467cc31b..a661a48534 100644 --- a/lib/compress/zstd_double_fast.c +++ b/lib/compress/zstd_double_fast.c @@ -148,7 +148,7 @@ size_t ZSTD_compressBlock_doubleFast_generic( const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); goto _match_stored; } @@ -157,7 +157,7 @@ size_t ZSTD_compressBlock_doubleFast_generic( && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); goto _match_stored; } @@ -247,7 +247,7 @@ size_t ZSTD_compressBlock_doubleFast_generic( offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); _match_stored: /* match found */ @@ -278,7 +278,7 @@ size_t ZSTD_compressBlock_doubleFast_generic( const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; ip += repLength2; @@ -297,7 +297,7 @@ size_t ZSTD_compressBlock_doubleFast_generic( U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); - ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH); + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH); ip += rLength; anchor = ip; continue; /* faster when present ... (?) */ @@ -411,7 +411,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); } else { if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; @@ -422,7 +422,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) { size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); @@ -447,7 +447,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( } offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); } else { ip += ((ip-anchor) >> kSearchStrength) + 1; @@ -479,7 +479,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; ip += repLength2; diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index 59267ffbbc..6dbefee6b7 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -8,7 +8,7 @@ * You may select, at your option, one of the above-listed licenses. */ -#include "zstd_compress_internal.h" +#include "zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */ #include "zstd_fast.h" @@ -43,8 +43,8 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms, } -FORCE_INLINE_TEMPLATE -size_t ZSTD_compressBlock_fast_generic( +FORCE_INLINE_TEMPLATE size_t +ZSTD_compressBlock_fast_generic( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize, U32 const mls) @@ -74,8 +74,7 @@ size_t ZSTD_compressBlock_fast_generic( DEBUGLOG(5, "ZSTD_compressBlock_fast_generic"); ip0 += (ip0 == prefixStart); ip1 = ip0 + 1; - { - U32 const maxRep = (U32)(ip0 - prefixStart); + { U32 const maxRep = (U32)(ip0 - prefixStart); if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; } @@ -118,8 +117,7 @@ size_t ZSTD_compressBlock_fast_generic( match0 = match1; goto _offset; } - { - size_t const step = ((ip0-anchor) >> (kSearchStrength - 1)) + stepSize; + { size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize; assert(step >= 2); ip0 += step; ip1 += step; @@ -138,7 +136,7 @@ size_t ZSTD_compressBlock_fast_generic( _match: /* Requires: ip0, match0, offcode */ /* Count the forward length */ mLength += ZSTD_count(ip0+mLength+4, match0+mLength+4, iend) + 4; - ZSTD_storeSeq(seqStore, ip0-anchor, anchor, offcode, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH); /* match found */ ip0 += mLength; anchor = ip0; @@ -150,16 +148,15 @@ size_t ZSTD_compressBlock_fast_generic( hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */ hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); - while ( (ip0 <= ilimit) - && ( (offset_2>0) - & (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) )) { + while ( ((ip0 <= ilimit) & (offset_2>0)) /* offset_2==0 means offset_2 is invalidated */ + && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) { /* store sequence */ size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4; - U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ + { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); ip0 += rLength; ip1 = ip0 + 1; - ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH); + ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH); anchor = ip0; continue; /* faster when present (confirmed on gcc-8) ... (?) */ } @@ -179,8 +176,7 @@ size_t ZSTD_compressBlock_fast( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - ZSTD_compressionParameters const* cParams = &ms->cParams; - U32 const mls = cParams->minMatch; + U32 const mls = ms->cParams.minMatch; assert(ms->dictMatchState == NULL); switch(mls) { @@ -265,7 +261,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); } else if ( (matchIndex <= prefixStartIndex) ) { size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls); U32 const dictMatchIndex = dictHashTable[dictHash]; @@ -285,7 +281,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( } /* catch up */ offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); } } else if (MEM_read32(match) != MEM_read32(ip)) { /* it's not a match, and we're not going to check the dictionary */ @@ -300,7 +296,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); } /* match found */ @@ -325,7 +321,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; ip += repLength2; anchor = ip; @@ -348,8 +344,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - ZSTD_compressionParameters const* cParams = &ms->cParams; - U32 const mls = cParams->minMatch; + U32 const mls = ms->cParams.minMatch; assert(ms->dictMatchState != NULL); switch(mls) { @@ -408,16 +403,17 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( const U32 repIndex = current + 1 - offset_1; const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - size_t mLength; hashTable[h] = current; /* update hash table */ assert(offset_1 <= current +1); /* check repIndex */ if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex)) && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; - mLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4; + size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4; ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH); + ip += rLength; + anchor = ip; } else { if ( (matchIndex < dictStartIndex) || (MEM_read32(match) != MEM_read32(ip)) ) { @@ -427,19 +423,15 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( } { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; - U32 offset; - mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; + U32 const offset = current - matchIndex; + size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ - offset = current - matchIndex; - offset_2 = offset_1; - offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + offset_2 = offset_1; offset_1 = offset; /* update offset history */ + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ip += mLength; + anchor = ip; } } - /* found a match : store it */ - ip += mLength; - anchor = ip; - if (ip <= ilimit) { /* Fill Table */ hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; @@ -448,13 +440,13 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( while (ip <= ilimit) { U32 const current2 = (U32)(ip-base); U32 const repIndex2 = current2 - offset_2; - const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; + const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */ && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; - U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); + { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH); hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; ip += repLength2; anchor = ip; @@ -476,8 +468,7 @@ size_t ZSTD_compressBlock_fast_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - ZSTD_compressionParameters const* cParams = &ms->cParams; - U32 const mls = cParams->minMatch; + U32 const mls = ms->cParams.minMatch; switch(mls) { default: /* includes case 3 */ diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index 0af41724c7..9ad7e03b54 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -810,7 +810,7 @@ ZSTD_compressBlock_lazy_generic( /* store sequence */ _storeSequence: { size_t const litLength = start - anchor; - ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH); + ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH); anchor = ip = start + matchLength; } @@ -828,7 +828,7 @@ ZSTD_compressBlock_lazy_generic( const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend; matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4; offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH); + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); ip += matchLength; anchor = ip; continue; @@ -843,7 +843,7 @@ ZSTD_compressBlock_lazy_generic( /* store sequence */ matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */ - ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH); + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); ip += matchLength; anchor = ip; continue; /* faster when present ... (?) */ @@ -1051,7 +1051,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( /* store sequence */ _storeSequence: { size_t const litLength = start - anchor; - ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH); + ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH); anchor = ip = start + matchLength; } @@ -1066,7 +1066,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */ - ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH); + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); ip += matchLength; anchor = ip; continue; /* faster when present ... (?) */ diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c index 3dcf86e6e8..fc3f469430 100644 --- a/lib/compress/zstd_ldm.c +++ b/lib/compress/zstd_ldm.c @@ -583,7 +583,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, rep[i] = rep[i-1]; rep[0] = sequence.offset; /* Store the sequence */ - ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, + ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend, sequence.offset + ZSTD_REP_MOVE, sequence.matchLength - MINMATCH); ip += sequence.matchLength; diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index 2da363f93e..2e50fca6ff 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -1098,7 +1098,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, assert(anchor + llen <= iend); ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen); - ZSTD_storeSeq(seqStore, llen, anchor, offCode, mlen-MINMATCH); + ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH); anchor += advance; ip = anchor; } } diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 9e537b8848..44cbd94be2 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -668,7 +668,7 @@ static void ZSTDMT_compressionJob(void* jobDescription) /* init */ if (job->cdict) { - size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, jobParams, job->fullFrameSize); + size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, &jobParams, job->fullFrameSize); assert(job->firstJob); /* only allowed for first job */ if (ZSTD_isError(initError)) JOB_ERROR(initError); } else { /* srcStart points at reloaded section */ @@ -680,7 +680,7 @@ static void ZSTDMT_compressionJob(void* jobDescription) job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */ ZSTD_dtlm_fast, NULL, /*cdict*/ - jobParams, pledgedSrcSize); + &jobParams, pledgedSrcSize); if (ZSTD_isError(initError)) JOB_ERROR(initError); } } @@ -1028,9 +1028,9 @@ size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, /* Sets parameters relevant to the compression job, * initializing others to default values. */ -static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params) +static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(const ZSTD_CCtx_params* params) { - ZSTD_CCtx_params jobParams = params; + ZSTD_CCtx_params jobParams = *params; /* Clear parameters related to multithreading */ jobParams.forceWindow = 0; jobParams.nbWorkers = 0; @@ -1151,16 +1151,16 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx) /* ===== Multi-threaded compression ===== */ /* ------------------------------------------ */ -static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params) +static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params) { unsigned jobLog; - if (params.ldmParams.enableLdm) { + if (params->ldmParams.enableLdm) { /* In Long Range Mode, the windowLog is typically oversized. * In which case, it's preferable to determine the jobSize * based on chainLog instead. */ - jobLog = MAX(21, params.cParams.chainLog + 4); + jobLog = MAX(21, params->cParams.chainLog + 4); } else { - jobLog = MAX(20, params.cParams.windowLog + 2); + jobLog = MAX(20, params->cParams.windowLog + 2); } return MIN(jobLog, (unsigned)ZSTDMT_JOBLOG_MAX); } @@ -1193,27 +1193,27 @@ static int ZSTDMT_overlapLog(int ovlog, ZSTD_strategy strat) return ovlog; } -static size_t ZSTDMT_computeOverlapSize(ZSTD_CCtx_params const params) +static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params) { - int const overlapRLog = 9 - ZSTDMT_overlapLog(params.overlapLog, params.cParams.strategy); - int ovLog = (overlapRLog >= 8) ? 0 : (params.cParams.windowLog - overlapRLog); + int const overlapRLog = 9 - ZSTDMT_overlapLog(params->overlapLog, params->cParams.strategy); + int ovLog = (overlapRLog >= 8) ? 0 : (params->cParams.windowLog - overlapRLog); assert(0 <= overlapRLog && overlapRLog <= 8); - if (params.ldmParams.enableLdm) { + if (params->ldmParams.enableLdm) { /* In Long Range Mode, the windowLog is typically oversized. * In which case, it's preferable to determine the jobSize * based on chainLog instead. * Then, ovLog becomes a fraction of the jobSize, rather than windowSize */ - ovLog = MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2) + ovLog = MIN(params->cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2) - overlapRLog; } assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX); - DEBUGLOG(4, "overlapLog : %i", params.overlapLog); + DEBUGLOG(4, "overlapLog : %i", params->overlapLog); DEBUGLOG(4, "overlap size : %i", 1 << ovLog); return (ovLog==0) ? 0 : (size_t)1 << ovLog; } static unsigned -ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers) +ZSTDMT_computeNbJobs(const ZSTD_CCtx_params* params, size_t srcSize, unsigned nbWorkers) { assert(nbWorkers>0); { size_t const jobSizeTarget = (size_t)1 << ZSTDMT_computeTargetJobLog(params); @@ -1236,9 +1236,9 @@ static size_t ZSTDMT_compress_advanced_internal( const ZSTD_CDict* cdict, ZSTD_CCtx_params params) { - ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(params); - size_t const overlapSize = ZSTDMT_computeOverlapSize(params); - unsigned const nbJobs = ZSTDMT_computeNbJobs(params, srcSize, params.nbWorkers); + ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(¶ms); + size_t const overlapSize = ZSTDMT_computeOverlapSize(¶ms); + unsigned const nbJobs = ZSTDMT_computeNbJobs(¶ms, srcSize, params.nbWorkers); size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs; size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize; /* avoid too small last block */ const char* const srcStart = (const char*)src; @@ -1256,7 +1256,7 @@ static size_t ZSTDMT_compress_advanced_internal( ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0]; DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: fallback to single-thread mode"); if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, jobParams.fParams); - return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, jobParams); + return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, &jobParams); } assert(avgJobSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */ @@ -1404,12 +1404,12 @@ size_t ZSTDMT_initCStream_internal( mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */ if (mtctx->singleBlockingThread) { - ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(params); + ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(¶ms); DEBUGLOG(5, "ZSTDMT_initCStream_internal: switch to single blocking thread mode"); assert(singleThreadParams.nbWorkers == 0); return ZSTD_initCStream_internal(mtctx->cctxPool->cctx[0], dict, dictSize, cdict, - singleThreadParams, pledgedSrcSize); + &singleThreadParams, pledgedSrcSize); } DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers); @@ -1435,11 +1435,11 @@ size_t ZSTDMT_initCStream_internal( mtctx->cdict = cdict; } - mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(params); + mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(¶ms); DEBUGLOG(4, "overlapLog=%i => %u KB", params.overlapLog, (U32)(mtctx->targetPrefixSize>>10)); mtctx->targetSectionSize = params.jobSize; if (mtctx->targetSectionSize == 0) { - mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params); + mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(¶ms); } assert(mtctx->targetSectionSize <= (size_t)ZSTDMT_JOBSIZE_MAX); diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c index 3f8bd29732..bb2d0a96bc 100644 --- a/lib/decompress/huf_decompress.c +++ b/lib/decompress/huf_decompress.c @@ -61,7 +61,9 @@ * Error Management ****************************************************************/ #define HUF_isError ERR_isError +#ifndef CHECK_F #define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; } +#endif /* ************************************************************** diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index cbcfc08406..cbb66c8dba 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -573,38 +573,118 @@ typedef struct { size_t pos; } seqState_t; +/*! ZSTD_overlapCopy8() : + * Copies 8 bytes from ip to op and updates op and ip where ip <= op. + * If the offset is < 8 then the offset is spread to at least 8 bytes. + * + * Precondition: *ip <= *op + * Postcondition: *op - *op >= 8 + */ +static void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) { + assert(*ip <= *op); + if (offset < 8) { + /* close range match, overlap */ + static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ + static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */ + int const sub2 = dec64table[offset]; + (*op)[0] = (*ip)[0]; + (*op)[1] = (*ip)[1]; + (*op)[2] = (*ip)[2]; + (*op)[3] = (*ip)[3]; + *ip += dec32table[offset]; + ZSTD_copy4(*op+4, *ip); + *ip -= sub2; + } else { + ZSTD_copy8(*op, *ip); + } + *ip += 8; + *op += 8; + assert(*op - *ip >= 8); +} + +/*! ZSTD_safecopy() : + * Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer + * and write up to 16 bytes past oend_w (op >= oend_w is allowed). + * This function is only called in the uncommon case where the sequence is near the end of the block. It + * should be fast for a single long sequence, but can be slow for several short sequences. + * + * @param ovtype controls the overlap detection + * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart. + * - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart. + * The src buffer must be before the dst buffer. + */ +static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) { + ptrdiff_t const diff = op - ip; + BYTE* const oend = op + length; -/* ZSTD_execSequenceLast7(): - * exceptional case : decompress a match starting within last 7 bytes of output buffer. - * requires more careful checks, to ensure there is no overflow. - * performance does not matter though. - * note : this case is supposed to be never generated "naturally" by reference encoder, - * since in most cases it needs at least 8 bytes to look for a match. - * but it's allowed by the specification. */ + assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8)) || + (ovtype == ZSTD_overlap_src_before_dst && diff >= 0)); + + if (length < 8) { + /* Handle short lengths. */ + while (op < oend) *op++ = *ip++; + return; + } + if (ovtype == ZSTD_overlap_src_before_dst) { + /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */ + assert(length >= 8); + ZSTD_overlapCopy8(&op, &ip, diff); + assert(op - ip >= 8); + assert(op <= oend); + } + + if (oend <= oend_w) { + /* No risk of overwrite. */ + ZSTD_wildcopy(op, ip, length, ovtype); + return; + } + if (op <= oend_w) { + /* Wildcopy until we get close to the end. */ + assert(oend > oend_w); + ZSTD_wildcopy(op, ip, oend_w - op, ovtype); + ip += oend_w - op; + op = oend_w; + } + /* Handle the leftovers. */ + while (op < oend) *op++ = *ip++; +} + +/* ZSTD_execSequenceEnd(): + * This version handles cases that are near the end of the output buffer. It requires + * more careful checks to make sure there is no overflow. By separating out these hard + * and unlikely cases, we can speed up the common cases. + * + * NOTE: This function needs to be fast for a single long sequence, but doesn't need + * to be optimized for many small sequences, since those fall into ZSTD_execSequence(). + */ FORCE_NOINLINE -size_t ZSTD_execSequenceLast7(BYTE* op, - BYTE* const oend, seq_t sequence, - const BYTE** litPtr, const BYTE* const litLimit, - const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd) +size_t ZSTD_execSequenceEnd(BYTE* op, + BYTE* const oend, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) { BYTE* const oLitEnd = op + sequence.litLength; size_t const sequenceLength = sequence.litLength + sequence.matchLength; BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ const BYTE* const iLitEnd = *litPtr + sequence.litLength; const BYTE* match = oLitEnd - sequence.offset; + BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; - /* check */ - RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must fit within dstBuffer"); + /* bounds checks */ + assert(oLitEnd < oMatchEnd); + RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must fit within dstBuffer"); RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "try to read beyond literal buffer"); /* copy literals */ - while (op < oLitEnd) *op++ = *(*litPtr)++; + ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap); + op = oLitEnd; + *litPtr = iLitEnd; /* copy Match */ - if (sequence.offset > (size_t)(oLitEnd - base)) { + if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { /* offset beyond prefix */ - RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - vBase),corruption_detected); - match = dictEnd - (base-match); + RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected); + match = dictEnd - (prefixStart-match); if (match + sequence.matchLength <= dictEnd) { memmove(oLitEnd, match, sequence.matchLength); return sequenceLength; @@ -614,13 +694,12 @@ size_t ZSTD_execSequenceLast7(BYTE* op, memmove(oLitEnd, match, length1); op = oLitEnd + length1; sequence.matchLength -= length1; - match = base; + match = prefixStart; } } - while (op < oMatchEnd) *op++ = *match++; + ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst); return sequenceLength; } - HINT_INLINE size_t ZSTD_execSequence(BYTE* op, BYTE* const oend, seq_t sequence, @@ -634,20 +713,29 @@ size_t ZSTD_execSequence(BYTE* op, const BYTE* const iLitEnd = *litPtr + sequence.litLength; const BYTE* match = oLitEnd - sequence.offset; - /* check */ - RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend"); - RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer"); - if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd); - - /* copy Literals */ - if (sequence.litLength > 8) - ZSTD_wildcopy_16min(op, (*litPtr), sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ - else - ZSTD_copy8(op, *litPtr); + /* Errors and uncommon cases handled here. */ + assert(oLitEnd < oMatchEnd); + if (iLitEnd > litLimit || oMatchEnd > oend_w) + return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd); + + /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */ + assert(iLitEnd <= litLimit /* Literal length is in bounds */); + assert(oLitEnd <= oend_w /* Can wildcopy literals */); + assert(oMatchEnd <= oend_w /* Can wildcopy matches */); + + /* Copy Literals: + * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9. + * We likely don't need the full 32-byte wildcopy. + */ + assert(WILDCOPY_OVERLENGTH >= 16); + ZSTD_copy16(op, (*litPtr)); + if (sequence.litLength > 16) { + ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap); + } op = oLitEnd; *litPtr = iLitEnd; /* update for next sequence */ - /* copy Match */ + /* Copy Match */ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { /* offset beyond prefix -> go into extDict */ RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected); @@ -662,123 +750,33 @@ size_t ZSTD_execSequence(BYTE* op, op = oLitEnd + length1; sequence.matchLength -= length1; match = prefixStart; - if (op > oend_w || sequence.matchLength < MINMATCH) { - U32 i; - for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i]; - return sequenceLength; - } } } - /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */ - - /* match within prefix */ - if (sequence.offset < 8) { - /* close range match, overlap */ - static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ - static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */ - int const sub2 = dec64table[sequence.offset]; - op[0] = match[0]; - op[1] = match[1]; - op[2] = match[2]; - op[3] = match[3]; - match += dec32table[sequence.offset]; - ZSTD_copy4(op+4, match); - match -= sub2; - } else { - ZSTD_copy8(op, match); - } - op += 8; match += 8; - - if (oMatchEnd > oend-(16-MINMATCH)) { - if (op < oend_w) { - ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst); - match += oend_w - op; - op = oend_w; - } - while (op < oMatchEnd) *op++ = *match++; - } else { - ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */ + /* Match within prefix of 1 or more bytes */ + assert(op <= oMatchEnd); + assert(oMatchEnd <= oend_w); + assert(match >= prefixStart); + assert(sequence.matchLength >= 1); + + /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy + * without overlap checking. + */ + if (sequence.offset >= WILDCOPY_VECLEN) { + /* We bet on a full wildcopy for matches, since we expect matches to be + * longer than literals (in general). In silesia, ~10% of matches are longer + * than 16 bytes. + */ + ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap); + return sequenceLength; } - return sequenceLength; -} - - -HINT_INLINE -size_t ZSTD_execSequenceLong(BYTE* op, - BYTE* const oend, seq_t sequence, - const BYTE** litPtr, const BYTE* const litLimit, - const BYTE* const prefixStart, const BYTE* const dictStart, const BYTE* const dictEnd) -{ - BYTE* const oLitEnd = op + sequence.litLength; - size_t const sequenceLength = sequence.litLength + sequence.matchLength; - BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ - BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; - const BYTE* const iLitEnd = *litPtr + sequence.litLength; - const BYTE* match = sequence.match; + assert(sequence.offset < WILDCOPY_VECLEN); - /* check */ - RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend"); - RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer"); - if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd); - - /* copy Literals */ - if (sequence.litLength > 8) - ZSTD_wildcopy_16min(op, *litPtr, sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ - else - ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */ + /* Copy 8 bytes and spread the offset to be >= 8. */ + ZSTD_overlapCopy8(&op, &match, sequence.offset); - op = oLitEnd; - *litPtr = iLitEnd; /* update for next sequence */ - - /* copy Match */ - if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { - /* offset beyond prefix */ - RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - dictStart), corruption_detected); - if (match + sequence.matchLength <= dictEnd) { - memmove(oLitEnd, match, sequence.matchLength); - return sequenceLength; - } - /* span extDict & currentPrefixSegment */ - { size_t const length1 = dictEnd - match; - memmove(oLitEnd, match, length1); - op = oLitEnd + length1; - sequence.matchLength -= length1; - match = prefixStart; - if (op > oend_w || sequence.matchLength < MINMATCH) { - U32 i; - for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i]; - return sequenceLength; - } - } } - assert(op <= oend_w); - assert(sequence.matchLength >= MINMATCH); - - /* match within prefix */ - if (sequence.offset < 8) { - /* close range match, overlap */ - static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ - static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */ - int const sub2 = dec64table[sequence.offset]; - op[0] = match[0]; - op[1] = match[1]; - op[2] = match[2]; - op[3] = match[3]; - match += dec32table[sequence.offset]; - ZSTD_copy4(op+4, match); - match -= sub2; - } else { - ZSTD_copy8(op, match); - } - op += 8; match += 8; - - if (oMatchEnd > oend-(16-MINMATCH)) { - if (op < oend_w) { - ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst); - match += oend_w - op; - op = oend_w; - } - while (op < oMatchEnd) *op++ = *match++; - } else { - ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */ + /* If the match length is > 8 bytes, then continue with the wildcopy. */ + if (sequence.matchLength > 8) { + assert(op < oMatchEnd); + ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); } return sequenceLength; } @@ -1098,7 +1096,7 @@ ZSTD_decompressSequencesLong_body( /* decode and decompress */ for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb = " "10! This may lead to a subpar dictionary! We recommend " - "training on sources at least 10x, and up to 100x the " - "size of the dictionary!\n", (U32)maxDictSize, + "training on sources at least 10x, and preferably 100x " + "the size of the dictionary! \n", (U32)maxDictSize, (U32)nbDmers, ratio); } @@ -919,13 +919,12 @@ void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters, } } /* Save the dictionary, parameters, and size */ - if (!dict) { - return; + if (dict) { + memcpy(best->dict, dict, dictSize); + best->dictSize = dictSize; + best->parameters = parameters; + best->compressedSize = compressedSize; } - memcpy(best->dict, dict, dictSize); - best->dictSize = dictSize; - best->parameters = parameters; - best->compressedSize = compressedSize; } if (liveJobs == 0) { ZSTD_pthread_cond_broadcast(&best->cond); diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c index ee21ee1a9d..4a263d822d 100644 --- a/lib/dictBuilder/zdict.c +++ b/lib/dictBuilder/zdict.c @@ -571,7 +571,7 @@ static void ZDICT_fillNoise(void* buffer, size_t length) unsigned const prime1 = 2654435761U; unsigned const prime2 = 2246822519U; unsigned acc = prime1; - size_t p=0;; + size_t p=0; for (p=0; p > 21); diff --git a/lib/legacy/zstd_v01.c b/lib/legacy/zstd_v01.c index ae8cba2a3f..8112527f05 100644 --- a/lib/legacy/zstd_v01.c +++ b/lib/legacy/zstd_v01.c @@ -346,7 +346,7 @@ FORCE_INLINE unsigned FSE_highbit32 (U32 val) _BitScanReverse ( &r, val ); return (unsigned) r; # elif defined(__GNUC__) && (GCC_VERSION >= 304) /* GCC Intrinsic */ - return 31 - __builtin_clz (val); + return __builtin_clz (val) ^ 31; # else /* Software version */ static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; U32 v = val; diff --git a/lib/legacy/zstd_v02.c b/lib/legacy/zstd_v02.c index 793df6024b..c8783799b9 100644 --- a/lib/legacy/zstd_v02.c +++ b/lib/legacy/zstd_v02.c @@ -353,7 +353,7 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val) _BitScanReverse ( &r, val ); return (unsigned) r; # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ - return 31 - __builtin_clz (val); + return __builtin_clz (val) ^ 31; # else /* Software version */ static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; U32 v = val; @@ -2889,6 +2889,7 @@ static size_t ZSTD_decodeLiteralsBlock(void* ctx, const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */ if (litSize > srcSize-11) /* risk of reading too far with wildcopy */ { + if (litSize > BLOCKSIZE) return ERROR(corruption_detected); if (litSize > srcSize-3) return ERROR(corruption_detected); memcpy(dctx->litBuffer, istart, litSize); dctx->litPtr = dctx->litBuffer; diff --git a/lib/legacy/zstd_v03.c b/lib/legacy/zstd_v03.c index dbc83f1ee7..162bd63024 100644 --- a/lib/legacy/zstd_v03.c +++ b/lib/legacy/zstd_v03.c @@ -356,7 +356,7 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val) _BitScanReverse ( &r, val ); return (unsigned) r; # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ - return 31 - __builtin_clz (val); + return __builtin_clz (val) ^ 31; # else /* Software version */ static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; U32 v = val; diff --git a/lib/legacy/zstd_v04.c b/lib/legacy/zstd_v04.c index 645a6e313c..4dec308168 100644 --- a/lib/legacy/zstd_v04.c +++ b/lib/legacy/zstd_v04.c @@ -627,7 +627,7 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val) _BitScanReverse ( &r, val ); return (unsigned) r; # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ - return 31 - __builtin_clz (val); + return __builtin_clz (val) ^ 31; # else /* Software version */ static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; U32 v = val; @@ -2655,6 +2655,7 @@ static size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */ if (litSize > srcSize-11) /* risk of reading too far with wildcopy */ { + if (litSize > BLOCKSIZE) return ERROR(corruption_detected); if (litSize > srcSize-3) return ERROR(corruption_detected); memcpy(dctx->litBuffer, istart, litSize); dctx->litPtr = dctx->litBuffer; @@ -3034,9 +3035,12 @@ static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, { /* blockType == blockCompressed */ const BYTE* ip = (const BYTE*)src; + size_t litCSize; + + if (srcSize > BLOCKSIZE) return ERROR(corruption_detected); /* Decode literals sub-block */ - size_t litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); + litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); if (ZSTD_isError(litCSize)) return litCSize; ip += litCSize; srcSize -= litCSize; diff --git a/lib/legacy/zstd_v05.c b/lib/legacy/zstd_v05.c index e347b00dcf..570e0ff86e 100644 --- a/lib/legacy/zstd_v05.c +++ b/lib/legacy/zstd_v05.c @@ -756,7 +756,7 @@ MEM_STATIC unsigned BITv05_highbit32 (U32 val) _BitScanReverse ( &r, val ); return (unsigned) r; # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ - return 31 - __builtin_clz (val); + return __builtin_clz (val) ^ 31; # else /* Software version */ static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; U32 v = val; diff --git a/lib/legacy/zstd_v06.c b/lib/legacy/zstd_v06.c index f907a3a712..2a08e8dee8 100644 --- a/lib/legacy/zstd_v06.c +++ b/lib/legacy/zstd_v06.c @@ -860,7 +860,7 @@ MEM_STATIC unsigned BITv06_highbit32 ( U32 val) _BitScanReverse ( &r, val ); return (unsigned) r; # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ - return 31 - __builtin_clz (val); + return __builtin_clz (val) ^ 31; # else /* Software version */ static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; U32 v = val; diff --git a/lib/legacy/zstd_v07.c b/lib/legacy/zstd_v07.c index a83ddc9a68..a2eeff808e 100644 --- a/lib/legacy/zstd_v07.c +++ b/lib/legacy/zstd_v07.c @@ -530,7 +530,7 @@ MEM_STATIC unsigned BITv07_highbit32 (U32 val) _BitScanReverse ( &r, val ); return (unsigned) r; # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ - return 31 - __builtin_clz (val); + return __builtin_clz (val) ^ 31; # else /* Software version */ static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; U32 v = val; diff --git a/lib/zstd.h b/lib/zstd.h index f8e95f2283..42d4188c8d 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -15,6 +15,7 @@ extern "C" { #define ZSTD_H_235446 /* ====== Dependency ======*/ +#include /* INT_MAX */ #include /* size_t */ @@ -71,7 +72,7 @@ extern "C" { /*------ Version ------*/ #define ZSTD_VERSION_MAJOR 1 #define ZSTD_VERSION_MINOR 4 -#define ZSTD_VERSION_RELEASE 3 +#define ZSTD_VERSION_RELEASE 4 #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library version */ @@ -386,6 +387,7 @@ typedef enum { * ZSTD_c_forceAttachDict * ZSTD_c_literalCompressionMode * ZSTD_c_targetCBlockSize + * ZSTD_c_srcSizeHint * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * note : never ever use experimentalParam? names directly; * also, the enums values themselves are unstable and can still change. @@ -396,6 +398,7 @@ typedef enum { ZSTD_c_experimentalParam4=1001, ZSTD_c_experimentalParam5=1002, ZSTD_c_experimentalParam6=1003, + ZSTD_c_experimentalParam7=1004, } ZSTD_cParameter; typedef struct { @@ -1063,6 +1066,8 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); /* Advanced parameter bounds */ #define ZSTD_TARGETCBLOCKSIZE_MIN 64 #define ZSTD_TARGETCBLOCKSIZE_MAX ZSTD_BLOCKSIZE_MAX +#define ZSTD_SRCSIZEHINT_MIN 0 +#define ZSTD_SRCSIZEHINT_MAX INT_MAX /* internal */ #define ZSTD_HASHLOG3_MAX 17 @@ -1216,14 +1221,23 @@ ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); ***************************************/ /*! ZSTD_estimate*() : - * These functions make it possible to estimate memory usage - * of a future {D,C}Ctx, before its creation. - * ZSTD_estimateCCtxSize() will provide a budget large enough for any compression level up to selected one. - * It will also consider src size to be arbitrarily "large", which is worst case. - * If srcSize is known to always be small, ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation. - * ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. - * ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. - * Note : CCtx size estimation is only correct for single-threaded compression. */ + * These functions make it possible to estimate memory usage of a future + * {D,C}Ctx, before its creation. + * + * ZSTD_estimateCCtxSize() will provide a budget large enough for any + * compression level up to selected one. Unlike ZSTD_estimateCStreamSize*(), + * this estimate does not include space for a window buffer, so this estimate + * is guaranteed to be enough for single-shot compressions, but not streaming + * compressions. It will however assume the input may be arbitrarily large, + * which is the worst case. If srcSize is known to always be small, + * ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation. + * ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with + * ZSTD_getCParams() to create cParams from compressionLevel. + * ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with + * ZSTD_CCtxParams_setParameter(). + * + * Note: only single-threaded compression is supported. This function will + * return an error code if ZSTD_c_nbWorkers is >= 1. */ ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel); ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); @@ -1441,6 +1455,12 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre * There is no guarantee on compressed block size (default:0) */ #define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6 +/* User's best guess of source size. + * Hint is not valid when srcSizeHint == 0. + * There is no guarantee that hint is close to actual source size, + * but compression ratio may regress significantly if guess considerably underestimates */ +#define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7 + /*! ZSTD_CCtx_getParameter() : * Get the requested compression parameter value, selected by enum ZSTD_cParameter, * and store it into int* value. @@ -1630,7 +1650,10 @@ ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dic /**! ZSTD_initCStream_advanced() : * This function is deprecated, and is approximately equivalent to: * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * ZSTD_CCtx_setZstdParams(zcs, params); // Set the zstd params and leave the rest as-is + * // Pseudocode: Set each zstd parameter and leave the rest as-is. + * for ((param, value) : params) { + * ZSTD_CCtx_setParameter(zcs, param, value); + * } * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); * @@ -1650,7 +1673,10 @@ ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDi /**! ZSTD_initCStream_usingCDict_advanced() : * This function is deprecated, and is approximately equivalent to: * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * ZSTD_CCtx_setZstdFrameParams(zcs, fParams); // Set the zstd frame params and leave the rest as-is + * // Pseudocode: Set each zstd frame parameter and leave the rest as-is. + * for ((fParam, value) : fParams) { + * ZSTD_CCtx_setParameter(zcs, fParam, value); + * } * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); * ZSTD_CCtx_refCDict(zcs, cdict); * diff --git a/programs/dibio.c b/programs/dibio.c index 12eb326808..ea4bb4bf1f 100644 --- a/programs/dibio.c +++ b/programs/dibio.c @@ -201,7 +201,7 @@ static void DiB_fillNoise(void* buffer, size_t length) unsigned const prime1 = 2654435761U; unsigned const prime2 = 2246822519U; unsigned acc = prime1; - size_t p=0;; + size_t p=0; for (p=0; p /* strcmp, strlen */ #include #include /* errno */ +#include /* INT_MAX */ #include #include "timefn.h" /* UTIL_getTime, UTIL_clockSpanMicro */ @@ -304,7 +305,9 @@ struct FIO_prefs_s { int ldmMinMatch; int ldmBucketSizeLog; int ldmHashRateLog; + size_t streamSrcSize; size_t targetCBlockSize; + int srcSizeHint; ZSTD_literalCompressionMode_e literalCompressionMode; /* IO preferences */ @@ -349,7 +352,9 @@ FIO_prefs_t* FIO_createPreferences(void) ret->ldmMinMatch = 0; ret->ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET; ret->ldmHashRateLog = FIO_LDM_PARAM_NOTSET; + ret->streamSrcSize = 0; ret->targetCBlockSize = 0; + ret->srcSizeHint = 0; ret->literalCompressionMode = ZSTD_lcm_auto; return ret; } @@ -418,10 +423,18 @@ void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable) { prefs->rsyncable = rsyncable; } +void FIO_setStreamSrcSize(FIO_prefs_t* const prefs, size_t streamSrcSize) { + prefs->streamSrcSize = streamSrcSize; +} + void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize) { prefs->targetCBlockSize = targetCBlockSize; } +void FIO_setSrcSizeHint(FIO_prefs_t* const prefs, size_t srcSizeHint) { + prefs->srcSizeHint = (int)MIN((size_t)INT_MAX, srcSizeHint); +} + void FIO_setLiteralCompressionMode( FIO_prefs_t* const prefs, ZSTD_literalCompressionMode_e mode) { @@ -633,7 +646,6 @@ typedef struct { static cRess_t FIO_createCResources(FIO_prefs_t* const prefs, const char* dictFileName, int cLevel, - U64 srcSize, ZSTD_compressionParameters comprParams) { cRess_t ress; memset(&ress, 0, sizeof(ress)); @@ -667,6 +679,8 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs, CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, cLevel) ); /* max compressed block size */ CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetCBlockSize, (int)prefs->targetCBlockSize) ); + /* source size hint */ + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_srcSizeHint, (int)prefs->srcSizeHint) ); /* long distance matching */ CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableLongDistanceMatching, prefs->ldmFlag) ); CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashLog, prefs->ldmHashLog) ); @@ -698,10 +712,7 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs, CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_rsyncable, prefs->rsyncable) ); #endif /* dictionary */ - CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, srcSize) ); /* set the value temporarily for dictionary loading, to adapt compression parameters */ CHECK( ZSTD_CCtx_loadDictionary(ress.cctx, dictBuffer, dictBuffSize) ); - CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, ZSTD_CONTENTSIZE_UNKNOWN) ); /* reset */ - free(dictBuffer); } @@ -1003,6 +1014,9 @@ FIO_compressZstdFrame(FIO_prefs_t* const prefs, /* init */ if (fileSize != UTIL_FILESIZE_UNKNOWN) { CHECK(ZSTD_CCtx_setPledgedSrcSize(ress.cctx, fileSize)); + } else if (prefs->streamSrcSize > 0) { + /* unknown source size; use the declared stream size */ + CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, prefs->streamSrcSize) ); } (void)srcFileName; @@ -1361,10 +1375,7 @@ int FIO_compressFilename(FIO_prefs_t* const prefs, const char* dictFileName, int compressionLevel, ZSTD_compressionParameters comprParams) { - U64 const fileSize = UTIL_getFileSize(srcFileName); - U64 const srcSize = (fileSize == UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : fileSize; - - cRess_t const ress = FIO_createCResources(prefs, dictFileName, compressionLevel, srcSize, comprParams); + cRess_t const ress = FIO_createCResources(prefs, dictFileName, compressionLevel, comprParams); int const result = FIO_compressFilename_srcFile(prefs, ress, dstFileName, srcFileName, compressionLevel); @@ -1415,10 +1426,7 @@ int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs, ZSTD_compressionParameters comprParams) { int error = 0; - U64 const firstFileSize = UTIL_getFileSize(inFileNamesTable[0]); - U64 const firstSrcSize = (firstFileSize == UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : firstFileSize; - U64 const srcSize = (nbFiles != 1) ? ZSTD_CONTENTSIZE_UNKNOWN : firstSrcSize ; - cRess_t ress = FIO_createCResources(prefs, dictFileName, compressionLevel, srcSize, comprParams); + cRess_t ress = FIO_createCResources(prefs, dictFileName, compressionLevel, comprParams); /* init */ assert(outFileName != NULL || suffix != NULL); @@ -1711,11 +1719,6 @@ static unsigned long long FIO_decompressZstdFrame( } if (readSizeHint == 0) break; /* end of frame */ - if (inBuff.size != inBuff.pos) { - DISPLAYLEVEL(1, "%s : Decoding error (37) : should consume entire input \n", - srcFileName); - return FIO_ERROR_FRAME_DECODING; - } /* Fill input buffer */ { size_t const toDecode = MIN(readSizeHint, ress->srcBufferSize); /* support large skippable frames */ diff --git a/programs/fileio.h b/programs/fileio.h index 311f8c0e1f..096d90b5ca 100644 --- a/programs/fileio.h +++ b/programs/fileio.h @@ -71,7 +71,9 @@ void FIO_setOverlapLog(FIO_prefs_t* const prefs, int overlapLog); void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, unsigned flag); void FIO_setSparseWrite(FIO_prefs_t* const prefs, unsigned sparse); /**< 0: no sparse; 1: disable on stdout; 2: always enabled */ void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable); +void FIO_setStreamSrcSize(FIO_prefs_t* const prefs, size_t streamSrcSize); void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize); +void FIO_setSrcSizeHint(FIO_prefs_t* const prefs, size_t srcSizeHint); void FIO_setLiteralCompressionMode( FIO_prefs_t* const prefs, ZSTD_literalCompressionMode_e mode); diff --git a/programs/platform.h b/programs/platform.h index 38ded87274..5934e59cf1 100644 --- a/programs/platform.h +++ b/programs/platform.h @@ -92,7 +92,7 @@ extern "C" { # if defined(__linux__) || defined(__linux) # ifndef _POSIX_C_SOURCE -# define _POSIX_C_SOURCE 200112L /* feature test macro : https://www.gnu.org/software/libc/manual/html_node/Feature-Test-Macros.html */ +# define _POSIX_C_SOURCE 200809L /* feature test macro : https://www.gnu.org/software/libc/manual/html_node/Feature-Test-Macros.html */ # endif # endif # include /* declares _POSIX_VERSION */ diff --git a/programs/timefn.h b/programs/timefn.h index d1ddd31b1c..2db3765b93 100644 --- a/programs/timefn.h +++ b/programs/timefn.h @@ -19,12 +19,6 @@ extern "C" { /*-**************************************** * Dependencies ******************************************/ -#include /* utime */ -#if defined(_MSC_VER) -# include /* utime */ -#else -# include /* utime */ -#endif #include /* clock_t, clock, CLOCKS_PER_SEC */ diff --git a/programs/util.c b/programs/util.c index 347e769866..3988295d45 100644 --- a/programs/util.c +++ b/programs/util.c @@ -54,14 +54,25 @@ int UTIL_getFileStat(const char* infilename, stat_t *statbuf) int UTIL_setFileStat(const char *filename, stat_t *statbuf) { int res = 0; - struct utimbuf timebuf; if (!UTIL_isRegularFile(filename)) return -1; - timebuf.actime = time(NULL); - timebuf.modtime = statbuf->st_mtime; - res += utime(filename, &timebuf); /* set access and modification times */ + /* set access and modification times */ +#if defined(_WIN32) || (PLATFORM_POSIX_VERSION < 200809L) + { + struct utimbuf timebuf; + timebuf.actime = time(NULL); + timebuf.modtime = statbuf->st_mtime; + res += utime(filename, &timebuf); + } +#else + { + /* (atime, mtime) */ + struct timespec timebuf[2] = { {0, UTIME_NOW}, statbuf->st_mtim }; + res += utimensat(AT_FDCWD, filename, timebuf, 0); + } +#endif #if !defined(_WIN32) res += chown(filename, statbuf->st_uid, statbuf->st_gid); /* Copy ownership */ diff --git a/programs/util.h b/programs/util.h index d6e5bb550e..0080b63c7a 100644 --- a/programs/util.h +++ b/programs/util.h @@ -25,12 +25,17 @@ extern "C" { #include /* fprintf */ #include /* stat, utime */ #include /* stat, chmod */ -#if defined(_MSC_VER) +#if defined(_WIN32) # include /* utime */ # include /* _chmod */ #else # include /* chown, stat */ +#if PLATFORM_POSIX_VERSION < 200809L # include /* utime */ +#else +# include /* AT_FDCWD */ +# include /* utimensat */ +#endif #endif #include /* clock_t, clock, CLOCKS_PER_SEC, nanosleep */ #include "mem.h" /* U32, U64 */ diff --git a/programs/zstd.1 b/programs/zstd.1 index 4b7273ff43..bb5103c612 100644 --- a/programs/zstd.1 +++ b/programs/zstd.1 @@ -1,5 +1,5 @@ . -.TH "ZSTD" "1" "August 2019" "zstd 1.4.3" "User Commands" +.TH "ZSTD" "1" "September 2019" "zstd 1.4.4" "User Commands" . .SH "NAME" \fBzstd\fR \- zstd, zstdmt, unzstd, zstdcat \- Compress or decompress \.zst files @@ -127,6 +127,14 @@ Does not spawn a thread for compression, use a single thread for both I/O and co \fBzstd\fR will dynamically adapt compression level to perceived I/O conditions\. Compression level adaptation can be observed live by using command \fB\-v\fR\. Adaptation can be constrained between supplied \fBmin\fR and \fBmax\fR levels\. The feature works when combined with multi\-threading and \fB\-\-long\fR mode\. It does not work with \fB\-\-single\-thread\fR\. It sets window size to 8 MB by default (can be changed manually, see \fBwlog\fR)\. Due to the chaotic nature of dynamic adaptation, compressed result is not reproducible\. \fInote\fR : at the time of this writing, \fB\-\-adapt\fR can remain stuck at low speed when combined with multiple worker threads (>=2)\. . .TP +\fB\-\-stream\-size=#\fR +Sets the pledged source size of input coming from a stream\. This value must be exact, as it will be included in the produced frame header\. Incorrect stream sizes will cause an error\. This information will be used to better optimize compression parameters, resulting in better and potentially faster compression, especially for smaller source sizes\. +. +.TP +\fB\-\-size\-hint=#\fR +When handling input from a stream, \fBzstd\fR must guess how large the source size will be when optimizing compression parameters\. If the stream size is relatively small, this guess may be a poor one, resulting in a higher compression ratio than expected\. This feature allows for controlling the guess when needed\. Exact guesses result in better compression ratios\. Overestimates result in slightly degraded compression ratios, while underestimates may result in significant degradation\. +. +.TP \fB\-\-rsyncable\fR \fBzstd\fR will periodically synchronize the compression state to make the compressed file more rsync\-friendly\. There is a negligible impact to compression ratio, and the faster compression levels will see a small compression speed hit\. This feature does not work with \fB\-\-single\-thread\fR\. You probably don\'t want to use it with long range mode, since it will decrease the effectiveness of the synchronization points, but your milage may vary\. . diff --git a/programs/zstd.1.md b/programs/zstd.1.md index 3ab2667a04..dff4d9eac5 100644 --- a/programs/zstd.1.md +++ b/programs/zstd.1.md @@ -144,6 +144,18 @@ the last one takes effect. Due to the chaotic nature of dynamic adaptation, compressed result is not reproducible. _note_ : at the time of this writing, `--adapt` can remain stuck at low speed when combined with multiple worker threads (>=2). +* `--stream-size=#` : + Sets the pledged source size of input coming from a stream. This value must be exact, as it + will be included in the produced frame header. Incorrect stream sizes will cause an error. + This information will be used to better optimize compression parameters, resulting in + better and potentially faster compression, especially for smaller source sizes. +* `--size-hint=#`: + When handling input from a stream, `zstd` must guess how large the source size + will be when optimizing compression parameters. If the stream size is relatively + small, this guess may be a poor one, resulting in a higher compression ratio than + expected. This feature allows for controlling the guess when needed. + Exact guesses result in better compression ratios. Overestimates result in slightly + degraded compression ratios, while underestimates may result in significant degradation. * `--rsyncable` : `zstd` will periodically synchronize the compression state to make the compressed file more rsync-friendly. There is a negligible impact to diff --git a/programs/zstdcli.c b/programs/zstdcli.c index de286cdf28..98df728a98 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -141,6 +141,8 @@ static int usage_advanced(const char* programName) DISPLAY( "--long[=#]: enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog); DISPLAY( "--fast[=#]: switch to ultra fast compression level (default: %u)\n", 1); DISPLAY( "--adapt : dynamically adapt compression level to I/O conditions \n"); + DISPLAY( "--stream-size=# : optimize compression parameters for streaming input of given number of bytes \n"); + DISPLAY( "--size-hint=# optimize compression parameters for streaming input of approximately this size\n"); DISPLAY( "--target-compressed-block-size=# : make compressed block near targeted size \n"); #ifdef ZSTD_MULTITHREAD DISPLAY( " -T# : spawns # compression threads (default: 1, 0==# cores) \n"); @@ -588,7 +590,9 @@ int main(int argCount, const char* argv[]) const char* suffix = ZSTD_EXTENSION; unsigned maxDictSize = g_defaultMaxDictSize; unsigned dictID = 0; + size_t streamSrcSize = 0; size_t targetCBlockSize = 0; + size_t srcSizeHint = 0; int dictCLevel = g_defaultDictCLevel; unsigned dictSelect = g_defaultSelectivityLevel; #ifdef UTIL_HAS_CREATEFILELIST @@ -745,7 +749,9 @@ int main(int argCount, const char* argv[]) if (longCommandWArg(&argument, "--maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--dictID=")) { dictID = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) CLEAN_RETURN(badusage(programName)); continue; } + if (longCommandWArg(&argument, "--stream-size=")) { streamSrcSize = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--target-compressed-block-size=")) { targetCBlockSize = readU32FromChar(&argument); continue; } + if (longCommandWArg(&argument, "--size-hint=")) { srcSizeHint = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--long")) { unsigned ldmWindowLog = 0; ldmFlag = 1; @@ -1150,7 +1156,9 @@ int main(int argCount, const char* argv[]) FIO_setAdaptMin(prefs, adaptMin); FIO_setAdaptMax(prefs, adaptMax); FIO_setRsyncable(prefs, rsyncable); + FIO_setStreamSrcSize(prefs, streamSrcSize); FIO_setTargetCBlockSize(prefs, targetCBlockSize); + FIO_setSrcSizeHint(prefs, srcSizeHint); FIO_setLiteralCompressionMode(prefs, literalCompressionMode); if (adaptMin > cLevel) cLevel = adaptMin; if (adaptMax < cLevel) cLevel = adaptMax; @@ -1160,7 +1168,7 @@ int main(int argCount, const char* argv[]) else operationResult = FIO_compressMultipleFilenames(prefs, filenameTable, filenameIdx, outFileName, suffix, dictFileName, cLevel, compressionParams); #else - (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; /* not used when ZSTD_NOCOMPRESS set */ + (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; (void)streamSrcSize; (void)srcSizeHint; /* not used when ZSTD_NOCOMPRESS set */ DISPLAY("Compression not supported \n"); #endif } else { /* decompression or test */ diff --git a/programs/zstdgrep.1 b/programs/zstdgrep.1 index 456298f861..06927ab785 100644 --- a/programs/zstdgrep.1 +++ b/programs/zstdgrep.1 @@ -1,5 +1,5 @@ . -.TH "ZSTDGREP" "1" "August 2019" "zstd 1.4.3" "User Commands" +.TH "ZSTDGREP" "1" "September 2019" "zstd 1.4.4" "User Commands" . .SH "NAME" \fBzstdgrep\fR \- print lines matching a pattern in zstandard\-compressed files diff --git a/programs/zstdless.1 b/programs/zstdless.1 index 42156fd2f1..d49042276b 100644 --- a/programs/zstdless.1 +++ b/programs/zstdless.1 @@ -1,5 +1,5 @@ . -.TH "ZSTDLESS" "1" "August 2019" "zstd 1.4.3" "User Commands" +.TH "ZSTDLESS" "1" "September 2019" "zstd 1.4.4" "User Commands" . .SH "NAME" \fBzstdless\fR \- view zstandard\-compressed files diff --git a/tests/decodecorpus.c b/tests/decodecorpus.c index dbc27bc900..91873ba460 100644 --- a/tests/decodecorpus.c +++ b/tests/decodecorpus.c @@ -758,8 +758,8 @@ static U32 generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore, DISPLAYLEVEL(7, " repeat offset: %d\n", (int)repIndex); } /* use libzstd sequence handling */ - ZSTD_storeSeq(seqStore, literalLen, literals, offsetCode, - matchLen - MINMATCH); + ZSTD_storeSeq(seqStore, literalLen, literals, literals + literalLen, + offsetCode, matchLen - MINMATCH); literalsSize -= literalLen; excessMatch -= (matchLen - MIN_SEQ_LEN); diff --git a/tests/fuzz/Makefile b/tests/fuzz/Makefile index 8bf16b1fb0..83837e620f 100644 --- a/tests/fuzz/Makefile +++ b/tests/fuzz/Makefile @@ -40,8 +40,8 @@ FUZZ_LDFLAGS := -pthread $(LDFLAGS) FUZZ_ARFLAGS := $(ARFLAGS) FUZZ_TARGET_FLAGS = $(FUZZ_CPPFLAGS) $(FUZZ_CXXFLAGS) $(FUZZ_LDFLAGS) -FUZZ_HEADERS := fuzz_helpers.h fuzz.h zstd_helpers.h -FUZZ_SRC := $(PRGDIR)/util.c zstd_helpers.c +FUZZ_HEADERS := fuzz_helpers.h fuzz.h zstd_helpers.h fuzz_data_producer.h +FUZZ_SRC := $(PRGDIR)/util.c zstd_helpers.c fuzz_data_producer.c ZSTDCOMMON_SRC := $(ZSTDDIR)/common/*.c ZSTDCOMP_SRC := $(ZSTDDIR)/compress/*.c @@ -113,15 +113,6 @@ zstd_frame_info: $(FUZZ_HEADERS) $(FUZZ_OBJ) zstd_frame_info.o libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c regression_driver.o $(AR) $(FUZZ_ARFLAGS) $@ regression_driver.o -# Install libfuzzer (not usable for MSAN testing) -# Provided for convenience. To use this library run make libFuzzer and -# set LDFLAGS=-L. -.PHONY: libFuzzer -libFuzzer: - @$(RM) -rf Fuzzer - @git clone https://chromium.googlesource.com/chromium/llvm-project/compiler-rt/lib/fuzzer Fuzzer - @cd Fuzzer && ./build.sh - corpora/%_seed_corpus.zip: @mkdir -p corpora $(DOWNLOAD) $@ $(CORPORA_URL_PREFIX)$*_seed_corpus.zip diff --git a/tests/fuzz/README.md b/tests/fuzz/README.md index 9e0bb259a9..71afa40631 100644 --- a/tests/fuzz/README.md +++ b/tests/fuzz/README.md @@ -35,6 +35,8 @@ The environment variables can be overridden with the corresponding flags `--cc`, `--cflags`, etc. The specific fuzzing engine is selected with `LIB_FUZZING_ENGINE` or `--lib-fuzzing-engine`, the default is `libregression.a`. +Alternatively, you can use Clang's built in fuzzing engine with +`--enable-fuzzer`. It has flags that can easily set up sanitizers `--enable-{a,ub,m}san`, and coverage instrumentation `--enable-coverage`. It sets sane defaults which can be overridden with flags `--debug`, @@ -51,22 +53,25 @@ The command used to run the fuzzer is printed for debugging. ## LibFuzzer ``` -# Build libfuzzer if necessary -make libFuzzer # Build the fuzz targets -./fuzz.py build all --enable-coverage --enable-asan --enable-ubsan --lib-fuzzing-engine Fuzzer/libFuzzer.a --cc clang --cxx clang++ +./fuzz.py build all --enable-fuzzer --enable-asan --enable-ubsan --cc clang --cxx clang++ # OR equivalently -CC=clang CXX=clang++ LIB_FUZZING_ENGINE=Fuzzer/libFuzzer.a ./fuzz.py build all --enable-coverage --enable-asan --enable-ubsan +CC=clang CXX=clang++ ./fuzz.py build all --enable-fuzzer --enable-asan --enable-ubsan # Run the fuzzer -./fuzz.py libfuzzer TARGET -max_len=8192 -jobs=4 +./fuzz.py libfuzzer TARGET ``` where `TARGET` could be `simple_decompress`, `stream_round_trip`, etc. ### MSAN -Fuzzing with `libFuzzer` and `MSAN` will require building a C++ standard library -and libFuzzer with MSAN. +Fuzzing with `libFuzzer` and `MSAN` is as easy as: + +``` +CC=clang CXX=clang++ ./fuzz.py build all --enable-fuzzer --enable-msan +./fuzz.py libfuzzer TARGET +``` + `fuzz.py` respects the environment variables / flags `MSAN_EXTRA_CPPFLAGS`, `MSAN_EXTRA_CFLAGS`, `MSAN_EXTRA_CXXFLAGS`, `MSAN_EXTRA_LDFLAGS` to easily pass the extra parameters only for MSAN. @@ -85,7 +90,7 @@ CC=afl-clang CXX=afl-clang++ ./fuzz.py build all --enable-asan --enable-ubsan ## Regression Testing -The regression rest supports the `all` target to run all the fuzzers in one +The regression test supports the `all` target to run all the fuzzers in one command. ``` diff --git a/tests/fuzz/block_decompress.c b/tests/fuzz/block_decompress.c index 3cccc32f4e..a904b44624 100644 --- a/tests/fuzz/block_decompress.c +++ b/tests/fuzz/block_decompress.c @@ -28,8 +28,6 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) { size_t const neededBufSize = ZSTD_BLOCKSIZE_MAX; - FUZZ_seed(&src, &size); - /* Allocate all buffers and contexts if not already allocated */ if (neededBufSize > bufSize) { free(rBuf); diff --git a/tests/fuzz/block_round_trip.c b/tests/fuzz/block_round_trip.c index 64ca5fc40f..89f060a6eb 100644 --- a/tests/fuzz/block_round_trip.c +++ b/tests/fuzz/block_round_trip.c @@ -20,21 +20,20 @@ #include #include "fuzz_helpers.h" #include "zstd.h" - -static const int kMaxClevel = 19; +#include "zstd_helpers.h" +#include "fuzz_data_producer.h" static ZSTD_CCtx *cctx = NULL; static ZSTD_DCtx *dctx = NULL; static void* cBuf = NULL; static void* rBuf = NULL; static size_t bufSize = 0; -static uint32_t seed; static size_t roundTripTest(void *result, size_t resultCapacity, void *compressed, size_t compressedCapacity, - const void *src, size_t srcSize) + const void *src, size_t srcSize, + int cLevel) { - int const cLevel = FUZZ_rand(&seed) % kMaxClevel; ZSTD_parameters const params = ZSTD_getParams(cLevel, srcSize, 0); size_t ret = ZSTD_compressBegin_advanced(cctx, NULL, 0, params, srcSize); FUZZ_ZASSERT(ret); @@ -52,12 +51,16 @@ static size_t roundTripTest(void *result, size_t resultCapacity, int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) { - size_t neededBufSize; + /* Give a random portion of src data to the producer, to use for + parameter generation. The rest will be used for (de)compression */ + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); + size = FUZZ_dataProducer_reserveDataPrefix(producer); + + int const cLevel = FUZZ_dataProducer_int32Range(producer, kMinClevel, kMaxClevel); - seed = FUZZ_seed(&src, &size); - neededBufSize = size; + size_t neededBufSize = size; if (size > ZSTD_BLOCKSIZE_MAX) - return 0; + size = ZSTD_BLOCKSIZE_MAX; /* Allocate all buffers and contexts if not already allocated */ if (neededBufSize > bufSize || !cBuf || !rBuf) { @@ -79,11 +82,13 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) { size_t const result = - roundTripTest(rBuf, neededBufSize, cBuf, neededBufSize, src, size); + roundTripTest(rBuf, neededBufSize, cBuf, neededBufSize, src, size, + cLevel); FUZZ_ZASSERT(result); FUZZ_ASSERT_MSG(result == size, "Incorrect regenerated size"); FUZZ_ASSERT_MSG(!memcmp(src, rBuf, size), "Corruption!"); } + FUZZ_dataProducer_free(producer); #ifndef STATEFUL_FUZZING ZSTD_freeCCtx(cctx); cctx = NULL; ZSTD_freeDCtx(dctx); dctx = NULL; diff --git a/tests/fuzz/dictionary_decompress.c b/tests/fuzz/dictionary_decompress.c index e900054f5a..9cc69fa378 100644 --- a/tests/fuzz/dictionary_decompress.c +++ b/tests/fuzz/dictionary_decompress.c @@ -18,33 +18,37 @@ #include #include "fuzz_helpers.h" #include "zstd_helpers.h" +#include "fuzz_data_producer.h" static ZSTD_DCtx *dctx = NULL; int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) { - uint32_t seed = FUZZ_seed(&src, &size); + /* Give a random portion of src data to the producer, to use for + parameter generation. The rest will be used for (de)compression */ + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); + size = FUZZ_dataProducer_reserveDataPrefix(producer); + FUZZ_dict_t dict; ZSTD_DDict* ddict = NULL; - int i; if (!dctx) { dctx = ZSTD_createDCtx(); FUZZ_ASSERT(dctx); } - dict = FUZZ_train(src, size, &seed); - if (FUZZ_rand32(&seed, 0, 1) == 0) { + dict = FUZZ_train(src, size, producer); + if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) { ddict = ZSTD_createDDict(dict.buff, dict.size); FUZZ_ASSERT(ddict); } else { FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary_advanced( dctx, dict.buff, dict.size, - (ZSTD_dictLoadMethod_e)FUZZ_rand32(&seed, 0, 1), - (ZSTD_dictContentType_e)FUZZ_rand32(&seed, 0, 2))); + (ZSTD_dictLoadMethod_e)FUZZ_dataProducer_uint32Range(producer, 0, 1), + (ZSTD_dictContentType_e)FUZZ_dataProducer_uint32Range(producer, 0, 2))); } - /* Run it 10 times over 10 output sizes. Reuse the context and dict. */ - for (i = 0; i < 10; ++i) { - size_t const bufSize = FUZZ_rand32(&seed, 0, 2 * size); + + { + size_t const bufSize = FUZZ_dataProducer_uint32Range(producer, 0, 10 * size); void* rBuf = malloc(bufSize); FUZZ_ASSERT(rBuf); if (ddict) { @@ -55,6 +59,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) free(rBuf); } free(dict.buff); + FUZZ_dataProducer_free(producer); ZSTD_freeDDict(ddict); #ifndef STATEFUL_FUZZING ZSTD_freeDCtx(dctx); dctx = NULL; diff --git a/tests/fuzz/dictionary_round_trip.c b/tests/fuzz/dictionary_round_trip.c index e28c65c98f..9411b50a74 100644 --- a/tests/fuzz/dictionary_round_trip.c +++ b/tests/fuzz/dictionary_round_trip.c @@ -19,22 +19,21 @@ #include #include "fuzz_helpers.h" #include "zstd_helpers.h" - -static const int kMaxClevel = 19; +#include "fuzz_data_producer.h" static ZSTD_CCtx *cctx = NULL; static ZSTD_DCtx *dctx = NULL; -static uint32_t seed; static size_t roundTripTest(void *result, size_t resultCapacity, void *compressed, size_t compressedCapacity, - const void *src, size_t srcSize) + const void *src, size_t srcSize, + FUZZ_dataProducer_t *producer) { ZSTD_dictContentType_e dictContentType = ZSTD_dct_auto; - FUZZ_dict_t dict = FUZZ_train(src, srcSize, &seed); + FUZZ_dict_t dict = FUZZ_train(src, srcSize, producer); size_t cSize; - if ((FUZZ_rand(&seed) & 15) == 0) { - int const cLevel = FUZZ_rand(&seed) % kMaxClevel; + if (FUZZ_dataProducer_uint32Range(producer, 0, 15) == 0) { + int const cLevel = FUZZ_dataProducer_int32Range(producer, kMinClevel, kMaxClevel); cSize = ZSTD_compress_usingDict(cctx, compressed, compressedCapacity, @@ -42,20 +41,20 @@ static size_t roundTripTest(void *result, size_t resultCapacity, dict.buff, dict.size, cLevel); } else { - dictContentType = FUZZ_rand32(&seed, 0, 2); - FUZZ_setRandomParameters(cctx, srcSize, &seed); + dictContentType = FUZZ_dataProducer_uint32Range(producer, 0, 2); + FUZZ_setRandomParameters(cctx, srcSize, producer); /* Disable checksum so we can use sizes smaller than compress bound. */ FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 0)); FUZZ_ZASSERT(ZSTD_CCtx_loadDictionary_advanced( cctx, dict.buff, dict.size, - (ZSTD_dictLoadMethod_e)FUZZ_rand32(&seed, 0, 1), + (ZSTD_dictLoadMethod_e)FUZZ_dataProducer_uint32Range(producer, 0, 1), dictContentType)); cSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize); } FUZZ_ZASSERT(cSize); FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary_advanced( dctx, dict.buff, dict.size, - (ZSTD_dictLoadMethod_e)FUZZ_rand32(&seed, 0, 1), + (ZSTD_dictLoadMethod_e)FUZZ_dataProducer_uint32Range(producer, 0, 1), dictContentType)); { size_t const ret = ZSTD_decompressDCtx( @@ -67,17 +66,20 @@ static size_t roundTripTest(void *result, size_t resultCapacity, int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) { + /* Give a random portion of src data to the producer, to use for + parameter generation. The rest will be used for (de)compression */ + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); + size = FUZZ_dataProducer_reserveDataPrefix(producer); + size_t const rBufSize = size; void* rBuf = malloc(rBufSize); size_t cBufSize = ZSTD_compressBound(size); - void* cBuf; - - seed = FUZZ_seed(&src, &size); + void *cBuf; /* Half of the time fuzz with a 1 byte smaller output size. * This will still succeed because we force the checksum to be disabled, * giving us 4 bytes of overhead. */ - cBufSize -= FUZZ_rand32(&seed, 0, 1); + cBufSize -= FUZZ_dataProducer_uint32Range(producer, 0, 1); cBuf = malloc(cBufSize); if (!cctx) { @@ -91,13 +93,14 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) { size_t const result = - roundTripTest(rBuf, rBufSize, cBuf, cBufSize, src, size); + roundTripTest(rBuf, rBufSize, cBuf, cBufSize, src, size, producer); FUZZ_ZASSERT(result); FUZZ_ASSERT_MSG(result == size, "Incorrect regenerated size"); FUZZ_ASSERT_MSG(!memcmp(src, rBuf, size), "Corruption!"); } free(rBuf); free(cBuf); + FUZZ_dataProducer_free(producer); #ifndef STATEFUL_FUZZING ZSTD_freeCCtx(cctx); cctx = NULL; ZSTD_freeDCtx(dctx); dctx = NULL; diff --git a/tests/fuzz/fuzz.h b/tests/fuzz/fuzz.h index 8850025b0f..6d53aa6d5c 100644 --- a/tests/fuzz/fuzz.h +++ b/tests/fuzz/fuzz.h @@ -17,12 +17,6 @@ * test code paths which are only executed when contexts are reused. * WARNING: Makes reproducing crashes much harder. * Default: Not defined. - * @param FUZZ_RNG_SEED_SIZE: - * The number of bytes of the source to look at when constructing a seed - * for the deterministic RNG. These bytes are discarded before passing - * the data to zstd functions. Every fuzzer initializes the RNG exactly - * once before doing anything else, even if it is unused. - * Default: 4. * @param DEBUGLEVEL: * This is a parameter for the zstd library. Defining `DEBUGLEVEL=1` * enables assert() statements in the zstd library. Higher levels enable @@ -42,10 +36,6 @@ #ifndef FUZZ_H #define FUZZ_H -#ifndef FUZZ_RNG_SEED_SIZE -# define FUZZ_RNG_SEED_SIZE 4 -#endif - #include #include diff --git a/tests/fuzz/fuzz.py b/tests/fuzz/fuzz.py index d993209a07..9df68df0ce 100755 --- a/tests/fuzz/fuzz.py +++ b/tests/fuzz/fuzz.py @@ -24,21 +24,38 @@ def abs_join(a, *p): return os.path.abspath(os.path.join(a, *p)) +class InputType(object): + RAW_DATA = 1 + COMPRESSED_DATA = 2 + + +class FrameType(object): + ZSTD = 1 + BLOCK = 2 + + +class TargetInfo(object): + def __init__(self, input_type, frame_type=FrameType.ZSTD): + self.input_type = input_type + self.frame_type = frame_type + + # Constants FUZZ_DIR = os.path.abspath(os.path.dirname(__file__)) CORPORA_DIR = abs_join(FUZZ_DIR, 'corpora') -TARGETS = [ - 'simple_round_trip', - 'stream_round_trip', - 'block_round_trip', - 'simple_decompress', - 'stream_decompress', - 'block_decompress', - 'dictionary_round_trip', - 'dictionary_decompress', - 'zstd_frame_info', - 'simple_compress', -] +TARGET_INFO = { + 'simple_round_trip': TargetInfo(InputType.RAW_DATA), + 'stream_round_trip': TargetInfo(InputType.RAW_DATA), + 'block_round_trip': TargetInfo(InputType.RAW_DATA, FrameType.BLOCK), + 'simple_decompress': TargetInfo(InputType.COMPRESSED_DATA), + 'stream_decompress': TargetInfo(InputType.COMPRESSED_DATA), + 'block_decompress': TargetInfo(InputType.COMPRESSED_DATA, FrameType.BLOCK), + 'dictionary_round_trip': TargetInfo(InputType.RAW_DATA), + 'dictionary_decompress': TargetInfo(InputType.COMPRESSED_DATA), + 'zstd_frame_info': TargetInfo(InputType.COMPRESSED_DATA), + 'simple_compress': TargetInfo(InputType.RAW_DATA), +} +TARGETS = list(TARGET_INFO.keys()) ALL_TARGETS = TARGETS + ['all'] FUZZ_RNG_SEED_SIZE = 4 @@ -67,7 +84,7 @@ def abs_join(a, *p): def create(r): d = os.path.abspath(r) if not os.path.isdir(d): - os.mkdir(d) + os.makedirs(d) return d @@ -158,7 +175,7 @@ def compiler_version(cc, cxx): assert(b'clang' in cxx_version_bytes) compiler = 'clang' elif b'gcc' in cc_version_bytes: - assert(b'gcc' in cxx_version_bytes) + assert(b'gcc' in cxx_version_bytes or b'g++' in cxx_version_bytes) compiler = 'gcc' if compiler is not None: version_regex = b'([0-9])+\.([0-9])+\.([0-9])+' @@ -643,7 +660,7 @@ def gen_parser(args): parser.add_argument( '--max-size-log', type=int, - default=13, + default=18, help='Maximum sample size to generate') parser.add_argument( '--seed', @@ -699,10 +716,11 @@ def gen(args): '-o{}'.format(decompressed), ] - if 'block_' in args.TARGET: + info = TARGET_INFO[args.TARGET] + if info.frame_type == FrameType.BLOCK: cmd += [ '--gen-blocks', - '--max-block-size-log={}'.format(args.max_size_log) + '--max-block-size-log={}'.format(min(args.max_size_log, 17)) ] else: cmd += ['--max-content-size-log={}'.format(args.max_size_log)] @@ -710,10 +728,11 @@ def gen(args): print(' '.join(cmd)) subprocess.check_call(cmd) - if '_round_trip' in args.TARGET: + if info.input_type == InputType.RAW_DATA: print('using decompressed data in {}'.format(decompressed)) samples = decompressed - elif '_decompress' in args.TARGET: + else: + assert info.input_type == InputType.COMPRESSED_DATA print('using compressed data in {}'.format(compressed)) samples = compressed @@ -721,10 +740,8 @@ def gen(args): for name in os.listdir(samples): samplename = abs_join(samples, name) outname = abs_join(seed, name) - rng_seed = os.urandom(args.fuzz_rng_seed_size) with open(samplename, 'rb') as sample: with open(outname, 'wb') as out: - out.write(rng_seed) CHUNK_SIZE = 131072 chunk = sample.read(CHUNK_SIZE) while len(chunk) > 0: diff --git a/tests/fuzz/fuzz_data_producer.c b/tests/fuzz/fuzz_data_producer.c new file mode 100644 index 0000000000..b465337eac --- /dev/null +++ b/tests/fuzz/fuzz_data_producer.c @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2016-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +#include "fuzz_data_producer.h" + +struct FUZZ_dataProducer_s{ + const uint8_t *data; + size_t size; +}; + +FUZZ_dataProducer_t *FUZZ_dataProducer_create(const uint8_t *data, size_t size) { + FUZZ_dataProducer_t *producer = malloc(sizeof(FUZZ_dataProducer_t)); + + FUZZ_ASSERT(producer != NULL); + + producer->data = data; + producer->size = size; + return producer; +} + +void FUZZ_dataProducer_free(FUZZ_dataProducer_t *producer) { free(producer); } + +uint32_t FUZZ_dataProducer_uint32Range(FUZZ_dataProducer_t *producer, uint32_t min, + uint32_t max) { + FUZZ_ASSERT(min <= max); + + uint32_t range = max - min; + uint32_t rolling = range; + uint32_t result = 0; + + while (rolling > 0 && producer->size > 0) { + uint8_t next = *(producer->data + producer->size - 1); + producer->size -= 1; + result = (result << 8) | next; + rolling >>= 8; + } + + if (range == 0xffffffff) { + return result; + } + + return min + result % (range + 1); +} + +uint32_t FUZZ_dataProducer_uint32(FUZZ_dataProducer_t *producer) { + return FUZZ_dataProducer_uint32Range(producer, 0, 0xffffffff); +} + +int32_t FUZZ_dataProducer_int32Range(FUZZ_dataProducer_t *producer, + int32_t min, int32_t max) +{ + FUZZ_ASSERT(min <= max); + + if (min < 0) + return (int)FUZZ_dataProducer_uint32Range(producer, 0, max - min) + min; + + return FUZZ_dataProducer_uint32Range(producer, min, max); +} + +size_t FUZZ_dataProducer_remainingBytes(FUZZ_dataProducer_t *producer){ + return producer->size; +} + +size_t FUZZ_dataProducer_contract(FUZZ_dataProducer_t *producer, size_t newSize) +{ + newSize = newSize > producer->size ? producer->size : newSize; + + size_t remaining = producer->size - newSize; + producer->data = producer->data + remaining; + producer->size = newSize; + return remaining; +} + +size_t FUZZ_dataProducer_reserveDataPrefix(FUZZ_dataProducer_t *producer) +{ + size_t producerSliceSize = FUZZ_dataProducer_uint32Range( + producer, 0, producer->size); + return FUZZ_dataProducer_contract(producer, producerSliceSize); +} diff --git a/tests/fuzz/fuzz_data_producer.h b/tests/fuzz/fuzz_data_producer.h new file mode 100644 index 0000000000..f2b609677d --- /dev/null +++ b/tests/fuzz/fuzz_data_producer.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2016-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +/** + * Helper APIs for generating random data from input data stream. + The producer reads bytes from the end of the input and appends them together + to generate a random number in the requested range. If it runs out of input + data, it will keep returning the same value (min) over and over again. + + */ + +#ifndef FUZZ_DATA_PRODUCER_H +#define FUZZ_DATA_PRODUCER_H + +#include +#include +#include +#include + +#include "fuzz_helpers.h" + +/* Struct used for maintaining the state of the data */ +typedef struct FUZZ_dataProducer_s FUZZ_dataProducer_t; + +/* Returns a data producer state struct. Use for producer initialization. */ +FUZZ_dataProducer_t *FUZZ_dataProducer_create(const uint8_t *data, size_t size); + +/* Frees the data producer */ +void FUZZ_dataProducer_free(FUZZ_dataProducer_t *producer); + +/* Returns value between [min, max] */ +uint32_t FUZZ_dataProducer_uint32Range(FUZZ_dataProducer_t *producer, uint32_t min, + uint32_t max); + +/* Returns a uint32 value */ +uint32_t FUZZ_dataProducer_uint32(FUZZ_dataProducer_t *producer); + +/* Returns a signed value between [min, max] */ +int32_t FUZZ_dataProducer_int32Range(FUZZ_dataProducer_t *producer, + int32_t min, int32_t max); + +/* Returns the size of the remaining bytes of data in the producer */ +size_t FUZZ_dataProducer_remainingBytes(FUZZ_dataProducer_t *producer); + +/* Restricts the producer to only the last newSize bytes of data. +If newSize > current data size, nothing happens. Returns the number of bytes +the producer won't use anymore, after contracting. */ +size_t FUZZ_dataProducer_contract(FUZZ_dataProducer_t *producer, size_t newSize); + +/* Restricts the producer to use only the last X bytes of data, where X is + a random number in the interval [0, data_size]. Returns the size of the + remaining data the producer won't use anymore (the prefix). */ +size_t FUZZ_dataProducer_reserveDataPrefix(FUZZ_dataProducer_t *producer); +#endif // FUZZ_DATA_PRODUCER_H diff --git a/tests/fuzz/fuzz_helpers.h b/tests/fuzz/fuzz_helpers.h index 0cf79d0d7c..3de917fd12 100644 --- a/tests/fuzz/fuzz_helpers.h +++ b/tests/fuzz/fuzz_helpers.h @@ -14,6 +14,7 @@ #ifndef FUZZ_HELPERS_H #define FUZZ_HELPERS_H +#include "debug.h" #include "fuzz.h" #include "xxhash.h" #include "zstd.h" @@ -54,37 +55,6 @@ extern "C" { #define FUZZ_STATIC static #endif -/** - * Deterministically constructs a seed based on the fuzz input. - * Consumes up to the first FUZZ_RNG_SEED_SIZE bytes of the input. - */ -FUZZ_STATIC uint32_t FUZZ_seed(uint8_t const **src, size_t* size) { - uint8_t const *data = *src; - size_t const toHash = MIN(FUZZ_RNG_SEED_SIZE, *size); - *size -= toHash; - *src += toHash; - return XXH32(data, toHash, 0); -} - -#define FUZZ_rotl32(x, r) (((x) << (r)) | ((x) >> (32 - (r)))) - -FUZZ_STATIC uint32_t FUZZ_rand(uint32_t *state) { - static const uint32_t prime1 = 2654435761U; - static const uint32_t prime2 = 2246822519U; - uint32_t rand32 = *state; - rand32 *= prime1; - rand32 += prime2; - rand32 = FUZZ_rotl32(rand32, 13); - *state = rand32; - return rand32 >> 5; -} - -/* Returns a random numer in the range [min, max]. */ -FUZZ_STATIC uint32_t FUZZ_rand32(uint32_t *state, uint32_t min, uint32_t max) { - uint32_t random = FUZZ_rand(state); - return min + (random % (max - min + 1)); -} - #ifdef __cplusplus } #endif diff --git a/tests/fuzz/regression_driver.c b/tests/fuzz/regression_driver.c index 658c685f4f..e3ebcd5ced 100644 --- a/tests/fuzz/regression_driver.c +++ b/tests/fuzz/regression_driver.c @@ -36,6 +36,7 @@ int main(int argc, char const **argv) { fprintf(stderr, "WARNING: No files passed to %s\n", argv[0]); for (i = 0; i < numFiles; ++i) { char const *fileName = files[i]; + DEBUGLOG(3, "Running %s", fileName); size_t const fileSize = UTIL_getFileSize(fileName); size_t readSize; FILE *file; diff --git a/tests/fuzz/simple_compress.c b/tests/fuzz/simple_compress.c index aaed403575..487be3a354 100644 --- a/tests/fuzz/simple_compress.c +++ b/tests/fuzz/simple_compress.c @@ -18,28 +18,33 @@ #include #include "fuzz_helpers.h" #include "zstd.h" +#include "zstd_helpers.h" +#include "fuzz_data_producer.h" static ZSTD_CCtx *cctx = NULL; int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) { - uint32_t seed = FUZZ_seed(&src, &size); + /* Give a random portion of src data to the producer, to use for + parameter generation. The rest will be used for (de)compression */ + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); + size = FUZZ_dataProducer_reserveDataPrefix(producer); + size_t const maxSize = ZSTD_compressBound(size); - int i; + size_t const bufSize = FUZZ_dataProducer_uint32Range(producer, 0, maxSize); + + int const cLevel = FUZZ_dataProducer_int32Range(producer, kMinClevel, kMaxClevel); + if (!cctx) { cctx = ZSTD_createCCtx(); FUZZ_ASSERT(cctx); } - /* Run it 10 times over 10 output sizes. Reuse the context. */ - for (i = 0; i < 10; ++i) { - int const level = (int)FUZZ_rand32(&seed, 0, 19 + 3) - 3; /* [-3, 19] */ - size_t const bufSize = FUZZ_rand32(&seed, 0, maxSize); - void* rBuf = malloc(bufSize); - FUZZ_ASSERT(rBuf); - ZSTD_compressCCtx(cctx, rBuf, bufSize, src, size, level); - free(rBuf); - } + void *rBuf = malloc(bufSize); + FUZZ_ASSERT(rBuf); + ZSTD_compressCCtx(cctx, rBuf, bufSize, src, size, cLevel); + free(rBuf); + FUZZ_dataProducer_free(producer); #ifndef STATEFUL_FUZZING ZSTD_freeCCtx(cctx); cctx = NULL; #endif diff --git a/tests/fuzz/simple_decompress.c b/tests/fuzz/simple_decompress.c index af3f302bb0..6182746a1d 100644 --- a/tests/fuzz/simple_decompress.c +++ b/tests/fuzz/simple_decompress.c @@ -17,26 +17,30 @@ #include #include "fuzz_helpers.h" #include "zstd.h" +#include "fuzz_data_producer.h" static ZSTD_DCtx *dctx = NULL; int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) { + /* Give a random portion of src data to the producer, to use for + parameter generation. The rest will be used for (de)compression */ + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); + size = FUZZ_dataProducer_reserveDataPrefix(producer); - uint32_t seed = FUZZ_seed(&src, &size); - int i; if (!dctx) { dctx = ZSTD_createDCtx(); FUZZ_ASSERT(dctx); } - /* Run it 10 times over 10 output sizes. Reuse the context. */ - for (i = 0; i < 10; ++i) { - size_t const bufSize = FUZZ_rand32(&seed, 0, 2 * size); - void* rBuf = malloc(bufSize); - FUZZ_ASSERT(rBuf); - ZSTD_decompressDCtx(dctx, rBuf, bufSize, src, size); - free(rBuf); - } + + size_t const bufSize = FUZZ_dataProducer_uint32Range(producer, 0, 10 * size); + void *rBuf = malloc(bufSize); + FUZZ_ASSERT(rBuf); + + ZSTD_decompressDCtx(dctx, rBuf, bufSize, src, size); + free(rBuf); + + FUZZ_dataProducer_free(producer); #ifndef STATEFUL_FUZZING ZSTD_freeDCtx(dctx); dctx = NULL; diff --git a/tests/fuzz/simple_round_trip.c b/tests/fuzz/simple_round_trip.c index 7e3b660982..2d1d0598a1 100644 --- a/tests/fuzz/simple_round_trip.c +++ b/tests/fuzz/simple_round_trip.c @@ -20,23 +20,23 @@ #include #include "fuzz_helpers.h" #include "zstd_helpers.h" - -static const int kMaxClevel = 19; +#include "fuzz_data_producer.h" static ZSTD_CCtx *cctx = NULL; static ZSTD_DCtx *dctx = NULL; -static uint32_t seed; static size_t roundTripTest(void *result, size_t resultCapacity, void *compressed, size_t compressedCapacity, - const void *src, size_t srcSize) + const void *src, size_t srcSize, + FUZZ_dataProducer_t *producer) { size_t cSize; - if (FUZZ_rand(&seed) & 1) { - FUZZ_setRandomParameters(cctx, srcSize, &seed); + if (FUZZ_dataProducer_uint32Range(producer, 0, 1)) { + FUZZ_setRandomParameters(cctx, srcSize, producer); cSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize); } else { - int const cLevel = FUZZ_rand(&seed) % kMaxClevel; + int const cLevel = FUZZ_dataProducer_int32Range(producer, kMinClevel, kMaxClevel); + cSize = ZSTD_compressCCtx( cctx, compressed, compressedCapacity, src, srcSize, cLevel); } @@ -51,12 +51,17 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) size_t cBufSize = ZSTD_compressBound(size); void* cBuf; - seed = FUZZ_seed(&src, &size); + /* Give a random portion of src data to the producer, to use for + parameter generation. The rest will be used for (de)compression */ + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); + size = FUZZ_dataProducer_reserveDataPrefix(producer); + /* Half of the time fuzz with a 1 byte smaller output size. * This will still succeed because we don't use a dictionary, so the dictID * field is empty, giving us 4 bytes of overhead. */ - cBufSize -= FUZZ_rand32(&seed, 0, 1); + cBufSize -= FUZZ_dataProducer_uint32Range(producer, 0, 1); + cBuf = malloc(cBufSize); FUZZ_ASSERT(cBuf && rBuf); @@ -72,13 +77,14 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) { size_t const result = - roundTripTest(rBuf, rBufSize, cBuf, cBufSize, src, size); + roundTripTest(rBuf, rBufSize, cBuf, cBufSize, src, size, producer); FUZZ_ZASSERT(result); FUZZ_ASSERT_MSG(result == size, "Incorrect regenerated size"); FUZZ_ASSERT_MSG(!memcmp(src, rBuf, size), "Corruption!"); } free(rBuf); free(cBuf); + FUZZ_dataProducer_free(producer); #ifndef STATEFUL_FUZZING ZSTD_freeCCtx(cctx); cctx = NULL; ZSTD_freeDCtx(dctx); dctx = NULL; diff --git a/tests/fuzz/stream_decompress.c b/tests/fuzz/stream_decompress.c index 68e120d7ef..c71cc9d3ec 100644 --- a/tests/fuzz/stream_decompress.c +++ b/tests/fuzz/stream_decompress.c @@ -19,6 +19,7 @@ #include #include "fuzz_helpers.h" #include "zstd.h" +#include "fuzz_data_producer.h" static size_t const kBufSize = ZSTD_BLOCKSIZE_MAX; @@ -26,22 +27,23 @@ static ZSTD_DStream *dstream = NULL; static void* buf = NULL; uint32_t seed; -static ZSTD_outBuffer makeOutBuffer(void) +static ZSTD_outBuffer makeOutBuffer(FUZZ_dataProducer_t *producer) { ZSTD_outBuffer buffer = { buf, 0, 0 }; - buffer.size = (FUZZ_rand(&seed) % kBufSize) + 1; + buffer.size = (FUZZ_dataProducer_uint32Range(producer, 1, kBufSize)); FUZZ_ASSERT(buffer.size <= kBufSize); return buffer; } -static ZSTD_inBuffer makeInBuffer(const uint8_t **src, size_t *size) +static ZSTD_inBuffer makeInBuffer(const uint8_t **src, size_t *size, + FUZZ_dataProducer_t *producer) { ZSTD_inBuffer buffer = { *src, 0, 0 }; FUZZ_ASSERT(*size > 0); - buffer.size = (FUZZ_rand(&seed) % *size) + 1; + buffer.size = (FUZZ_dataProducer_uint32Range(producer, 1, *size)); FUZZ_ASSERT(buffer.size <= *size); *src += buffer.size; *size -= buffer.size; @@ -51,13 +53,16 @@ static ZSTD_inBuffer makeInBuffer(const uint8_t **src, size_t *size) int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) { - seed = FUZZ_seed(&src, &size); + /* Give a random portion of src data to the producer, to use for + parameter generation. The rest will be used for (de)compression */ + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); + size = FUZZ_dataProducer_reserveDataPrefix(producer); /* Allocate all buffers and contexts if not already allocated */ if (!buf) { buf = malloc(kBufSize); - FUZZ_ASSERT(buf); - } + FUZZ_ASSERT(buf); + } if (!dstream) { dstream = ZSTD_createDStream(); @@ -67,9 +72,9 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) } while (size > 0) { - ZSTD_inBuffer in = makeInBuffer(&src, &size); + ZSTD_inBuffer in = makeInBuffer(&src, &size, producer); while (in.pos != in.size) { - ZSTD_outBuffer out = makeOutBuffer(); + ZSTD_outBuffer out = makeOutBuffer(producer); size_t const rc = ZSTD_decompressStream(dstream, &out, &in); if (ZSTD_isError(rc)) goto error; } @@ -79,5 +84,6 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) #ifndef STATEFUL_FUZZING ZSTD_freeDStream(dstream); dstream = NULL; #endif + FUZZ_dataProducer_free(producer); return 0; } diff --git a/tests/fuzz/stream_round_trip.c b/tests/fuzz/stream_round_trip.c index d13c2dbe7e..c534a904a1 100644 --- a/tests/fuzz/stream_round_trip.c +++ b/tests/fuzz/stream_round_trip.c @@ -20,31 +20,33 @@ #include #include "fuzz_helpers.h" #include "zstd_helpers.h" +#include "fuzz_data_producer.h" ZSTD_CCtx *cctx = NULL; static ZSTD_DCtx *dctx = NULL; static uint8_t* cBuf = NULL; static uint8_t* rBuf = NULL; static size_t bufSize = 0; -static uint32_t seed; -static ZSTD_outBuffer makeOutBuffer(uint8_t *dst, size_t capacity) +static ZSTD_outBuffer makeOutBuffer(uint8_t *dst, size_t capacity, + FUZZ_dataProducer_t *producer) { ZSTD_outBuffer buffer = { dst, 0, 0 }; FUZZ_ASSERT(capacity > 0); - buffer.size = (FUZZ_rand(&seed) % capacity) + 1; + buffer.size = (FUZZ_dataProducer_uint32Range(producer, 1, capacity)); FUZZ_ASSERT(buffer.size <= capacity); return buffer; } -static ZSTD_inBuffer makeInBuffer(const uint8_t **src, size_t *size) +static ZSTD_inBuffer makeInBuffer(const uint8_t **src, size_t *size, + FUZZ_dataProducer_t *producer) { ZSTD_inBuffer buffer = { *src, 0, 0 }; FUZZ_ASSERT(*size > 0); - buffer.size = (FUZZ_rand(&seed) % *size) + 1; + buffer.size = (FUZZ_dataProducer_uint32Range(producer, 1, *size)); FUZZ_ASSERT(buffer.size <= *size); *src += buffer.size; *size -= buffer.size; @@ -53,23 +55,24 @@ static ZSTD_inBuffer makeInBuffer(const uint8_t **src, size_t *size) } static size_t compress(uint8_t *dst, size_t capacity, - const uint8_t *src, size_t srcSize) + const uint8_t *src, size_t srcSize, + FUZZ_dataProducer_t *producer) { size_t dstSize = 0; ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); - FUZZ_setRandomParameters(cctx, srcSize, &seed); + FUZZ_setRandomParameters(cctx, srcSize, producer); while (srcSize > 0) { - ZSTD_inBuffer in = makeInBuffer(&src, &srcSize); + ZSTD_inBuffer in = makeInBuffer(&src, &srcSize, producer); /* Mode controls the action. If mode == -1 we pick a new mode */ int mode = -1; while (in.pos < in.size || mode != -1) { - ZSTD_outBuffer out = makeOutBuffer(dst, capacity); + ZSTD_outBuffer out = makeOutBuffer(dst, capacity, producer); /* Previous action finished, pick a new mode. */ - if (mode == -1) mode = FUZZ_rand(&seed) % 10; + if (mode == -1) mode = FUZZ_dataProducer_uint32Range(producer, 0, 9); switch (mode) { - case 0: /* fall-though */ - case 1: /* fall-though */ + case 0: /* fall-through */ + case 1: /* fall-through */ case 2: { size_t const ret = ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_flush); @@ -85,9 +88,9 @@ static size_t compress(uint8_t *dst, size_t capacity, /* Reset the compressor when the frame is finished */ if (ret == 0) { ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); - if ((FUZZ_rand(&seed) & 7) == 0) { + if (FUZZ_dataProducer_uint32Range(producer, 0, 7) == 0) { size_t const remaining = in.size - in.pos; - FUZZ_setRandomParameters(cctx, remaining, &seed); + FUZZ_setRandomParameters(cctx, remaining, producer); } mode = -1; } @@ -107,7 +110,7 @@ static size_t compress(uint8_t *dst, size_t capacity, } for (;;) { ZSTD_inBuffer in = {NULL, 0, 0}; - ZSTD_outBuffer out = makeOutBuffer(dst, capacity); + ZSTD_outBuffer out = makeOutBuffer(dst, capacity, producer); size_t const ret = ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end); FUZZ_ZASSERT(ret); @@ -122,10 +125,13 @@ static size_t compress(uint8_t *dst, size_t capacity, int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) { - size_t neededBufSize; + /* Give a random portion of src data to the producer, to use for + parameter generation. The rest will be used for (de)compression */ + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); + size = FUZZ_dataProducer_reserveDataPrefix(producer); - seed = FUZZ_seed(&src, &size); - neededBufSize = ZSTD_compressBound(size) * 2; + size_t neededBufSize; + neededBufSize = ZSTD_compressBound(size) * 5; /* Allocate all buffers and contexts if not already allocated */ if (neededBufSize > bufSize) { @@ -146,7 +152,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) } { - size_t const cSize = compress(cBuf, neededBufSize, src, size); + size_t const cSize = compress(cBuf, neededBufSize, src, size, producer); size_t const rSize = ZSTD_decompressDCtx(dctx, rBuf, neededBufSize, cBuf, cSize); FUZZ_ZASSERT(rSize); @@ -154,6 +160,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) FUZZ_ASSERT_MSG(!memcmp(src, rBuf, size), "Corruption!"); } + FUZZ_dataProducer_free(producer); #ifndef STATEFUL_FUZZING ZSTD_freeCCtx(cctx); cctx = NULL; ZSTD_freeDCtx(dctx); dctx = NULL; diff --git a/tests/fuzz/zstd_frame_info.c b/tests/fuzz/zstd_frame_info.c index 7512d5f493..359cf128f5 100644 --- a/tests/fuzz/zstd_frame_info.c +++ b/tests/fuzz/zstd_frame_info.c @@ -21,10 +21,6 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) { ZSTD_frameHeader zfh; - /* Consume the seed to be compatible with the corpora of other decompression - * fuzzers. - */ - FUZZ_seed(&src, &size); /* You can fuzz any helper functions here that are fast, and take zstd * compressed data as input. E.g. don't expect the input to be a dictionary, * so don't fuzz ZSTD_getDictID_fromDict(). diff --git a/tests/fuzz/zstd_helpers.c b/tests/fuzz/zstd_helpers.c index 9dff2895a9..90bf1a1567 100644 --- a/tests/fuzz/zstd_helpers.c +++ b/tests/fuzz/zstd_helpers.c @@ -17,53 +17,56 @@ #include "zstd.h" #include "zdict.h" +const int kMinClevel = -3; +const int kMaxClevel = 19; + static void set(ZSTD_CCtx *cctx, ZSTD_cParameter param, int value) { FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, param, value)); } static void setRand(ZSTD_CCtx *cctx, ZSTD_cParameter param, unsigned min, - unsigned max, uint32_t *state) { - unsigned const value = FUZZ_rand32(state, min, max); + unsigned max, FUZZ_dataProducer_t *producer) { + unsigned const value = FUZZ_dataProducer_uint32Range(producer, min, max); set(cctx, param, value); } -ZSTD_compressionParameters FUZZ_randomCParams(size_t srcSize, uint32_t *state) +ZSTD_compressionParameters FUZZ_randomCParams(size_t srcSize, FUZZ_dataProducer_t *producer) { /* Select compression parameters */ ZSTD_compressionParameters cParams; - cParams.windowLog = FUZZ_rand32(state, ZSTD_WINDOWLOG_MIN, 15); - cParams.hashLog = FUZZ_rand32(state, ZSTD_HASHLOG_MIN, 15); - cParams.chainLog = FUZZ_rand32(state, ZSTD_CHAINLOG_MIN, 16); - cParams.searchLog = FUZZ_rand32(state, ZSTD_SEARCHLOG_MIN, 9); - cParams.minMatch = FUZZ_rand32(state, ZSTD_MINMATCH_MIN, + cParams.windowLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, 15); + cParams.hashLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_HASHLOG_MIN, 15); + cParams.chainLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_CHAINLOG_MIN, 16); + cParams.searchLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_SEARCHLOG_MIN, 9); + cParams.minMatch = FUZZ_dataProducer_uint32Range(producer, ZSTD_MINMATCH_MIN, ZSTD_MINMATCH_MAX); - cParams.targetLength = FUZZ_rand32(state, 0, 512); - cParams.strategy = FUZZ_rand32(state, ZSTD_STRATEGY_MIN, ZSTD_STRATEGY_MAX); + cParams.targetLength = FUZZ_dataProducer_uint32Range(producer, 0, 512); + cParams.strategy = FUZZ_dataProducer_uint32Range(producer, ZSTD_STRATEGY_MIN, ZSTD_STRATEGY_MAX); return ZSTD_adjustCParams(cParams, srcSize, 0); } -ZSTD_frameParameters FUZZ_randomFParams(uint32_t *state) +ZSTD_frameParameters FUZZ_randomFParams(FUZZ_dataProducer_t *producer) { /* Select frame parameters */ ZSTD_frameParameters fParams; - fParams.contentSizeFlag = FUZZ_rand32(state, 0, 1); - fParams.checksumFlag = FUZZ_rand32(state, 0, 1); - fParams.noDictIDFlag = FUZZ_rand32(state, 0, 1); + fParams.contentSizeFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1); + fParams.checksumFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1); + fParams.noDictIDFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1); return fParams; } -ZSTD_parameters FUZZ_randomParams(size_t srcSize, uint32_t *state) +ZSTD_parameters FUZZ_randomParams(size_t srcSize, FUZZ_dataProducer_t *producer) { ZSTD_parameters params; - params.cParams = FUZZ_randomCParams(srcSize, state); - params.fParams = FUZZ_randomFParams(state); + params.cParams = FUZZ_randomCParams(srcSize, producer); + params.fParams = FUZZ_randomFParams(producer); return params; } -void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, uint32_t *state) +void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer_t *producer) { - ZSTD_compressionParameters cParams = FUZZ_randomCParams(srcSize, state); + ZSTD_compressionParameters cParams = FUZZ_randomCParams(srcSize, producer); set(cctx, ZSTD_c_windowLog, cParams.windowLog); set(cctx, ZSTD_c_hashLog, cParams.hashLog); set(cctx, ZSTD_c_chainLog, cParams.chainLog); @@ -72,27 +75,30 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, uint32_t *state) set(cctx, ZSTD_c_targetLength, cParams.targetLength); set(cctx, ZSTD_c_strategy, cParams.strategy); /* Select frame parameters */ - setRand(cctx, ZSTD_c_contentSizeFlag, 0, 1, state); - setRand(cctx, ZSTD_c_checksumFlag, 0, 1, state); - setRand(cctx, ZSTD_c_dictIDFlag, 0, 1, state); + setRand(cctx, ZSTD_c_contentSizeFlag, 0, 1, producer); + setRand(cctx, ZSTD_c_checksumFlag, 0, 1, producer); + setRand(cctx, ZSTD_c_dictIDFlag, 0, 1, producer); /* Select long distance matching parameters */ - setRand(cctx, ZSTD_c_enableLongDistanceMatching, 0, 1, state); - setRand(cctx, ZSTD_c_ldmHashLog, ZSTD_HASHLOG_MIN, 16, state); + setRand(cctx, ZSTD_c_enableLongDistanceMatching, 0, 1, producer); + setRand(cctx, ZSTD_c_ldmHashLog, ZSTD_HASHLOG_MIN, 16, producer); setRand(cctx, ZSTD_c_ldmMinMatch, ZSTD_LDM_MINMATCH_MIN, - ZSTD_LDM_MINMATCH_MAX, state); + ZSTD_LDM_MINMATCH_MAX, producer); setRand(cctx, ZSTD_c_ldmBucketSizeLog, 0, ZSTD_LDM_BUCKETSIZELOG_MAX, - state); + producer); setRand(cctx, ZSTD_c_ldmHashRateLog, ZSTD_LDM_HASHRATELOG_MIN, - ZSTD_LDM_HASHRATELOG_MAX, state); + ZSTD_LDM_HASHRATELOG_MAX, producer); /* Set misc parameters */ - setRand(cctx, ZSTD_c_nbWorkers, 0, 2, state); - setRand(cctx, ZSTD_c_rsyncable, 0, 1, state); - setRand(cctx, ZSTD_c_forceMaxWindow, 0, 1, state); - setRand(cctx, ZSTD_c_literalCompressionMode, 0, 2, state); - setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, state); + setRand(cctx, ZSTD_c_nbWorkers, 0, 2, producer); + setRand(cctx, ZSTD_c_rsyncable, 0, 1, producer); + setRand(cctx, ZSTD_c_forceMaxWindow, 0, 1, producer); + setRand(cctx, ZSTD_c_literalCompressionMode, 0, 2, producer); + setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, producer); + if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) { + setRand(cctx, ZSTD_c_srcSizeHint, ZSTD_SRCSIZEHINT_MIN, 2 * srcSize, producer); + } } -FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, uint32_t *state) +FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, FUZZ_dataProducer_t *producer) { size_t const dictSize = MAX(srcSize / 8, 1024); size_t const totalSampleSize = dictSize * 11; @@ -107,7 +113,7 @@ FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, uint32_t *state) for (sample = 0; sample < nbSamples; ++sample) { size_t const remaining = totalSampleSize - pos; - size_t const offset = FUZZ_rand32(state, 0, MAX(srcSize, 1) - 1); + size_t const offset = FUZZ_dataProducer_uint32Range(producer, 0, MAX(srcSize, 1) - 1); size_t const limit = MIN(srcSize - offset, remaining); size_t const toCopy = MIN(limit, remaining / (nbSamples - sample)); memcpy(samples + pos, src + offset, toCopy); diff --git a/tests/fuzz/zstd_helpers.h b/tests/fuzz/zstd_helpers.h index 457e6e995f..2210bcaf8f 100644 --- a/tests/fuzz/zstd_helpers.h +++ b/tests/fuzz/zstd_helpers.h @@ -17,17 +17,21 @@ #define ZSTD_STATIC_LINKING_ONLY #include "zstd.h" +#include "fuzz_data_producer.h" #include #ifdef __cplusplus extern "C" { #endif -void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, uint32_t *state); +extern const int kMinClevel; +extern const int kMaxClevel; -ZSTD_compressionParameters FUZZ_randomCParams(size_t srcSize, uint32_t *state); -ZSTD_frameParameters FUZZ_randomFParams(uint32_t *state); -ZSTD_parameters FUZZ_randomParams(size_t srcSize, uint32_t *state); +void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer_t *producer); + +ZSTD_compressionParameters FUZZ_randomCParams(size_t srcSize, FUZZ_dataProducer_t *producer); +ZSTD_frameParameters FUZZ_randomFParams(FUZZ_dataProducer_t *producer); +ZSTD_parameters FUZZ_randomParams(size_t srcSize, FUZZ_dataProducer_t *producer); typedef struct { void* buff; @@ -38,8 +42,7 @@ typedef struct { * NOTE: Don't use this to train production dictionaries, it is only optimized * for speed, and doesn't care about dictionary quality. */ -FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, uint32_t *state); - +FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, FUZZ_dataProducer_t *producer); #ifdef __cplusplus } diff --git a/tests/fuzzer.c b/tests/fuzzer.c index c256389a59..3baa9b0793 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -401,7 +401,7 @@ static int basicUnitTests(U32 const seed, double compressibility) DISPLAYLEVEL(3, "test%3i : check decompressed result : ", testNb++); { size_t u; for (u=0; u simple tests " ./datagen > tmp @@ -242,6 +241,11 @@ $ZSTD -f tmp && die "attempt to compress a non existing file" test -f tmp.zst # destination file should still be present rm tmp* +println "\n===> decompression only tests " +head -c 1048576 /dev/zero > tmp +$ZSTD -d -o tmp1 "$TESTDIR/golden-decompression/rle-first-block.zst" +$DIFF -s tmp1 tmp +rm tmp* println "test : compress multiple files" println hello > tmp1 @@ -409,6 +413,53 @@ println "compress multiple files including a missing one (notHere) : " $ZSTD -f tmp1 notHere tmp2 && die "missing file not detected!" +println "\n===> stream-size mode" + +./datagen -g11000 > tmp +println "test : basic file compression vs sized streaming compression" +file_size=$($ZSTD -14 -f tmp -o tmp.zst && wc -c < tmp.zst) +stream_size=$(cat tmp | $ZSTD -14 --stream-size=11000 | wc -c) +if [ "$stream_size" -gt "$file_size" ]; then + die "hinted compression larger than expected" +fi +println "test : sized streaming compression and decompression" +cat tmp | $ZSTD -14 -f tmp -o --stream-size=11000 tmp.zst +$ZSTD -df tmp.zst -o tmp_decompress +cmp tmp tmp_decompress || die "difference between original and decompressed file" +println "test : incorrect stream size" +cat tmp | $ZSTD -14 -f -o tmp.zst --stream-size=11001 && die "should fail with incorrect stream size" + + +println "\n===> size-hint mode" + +./datagen -g11000 > tmp +./datagen -g11000 > tmp2 +./datagen > tmpDict +println "test : basic file compression vs hinted streaming compression" +file_size=$($ZSTD -14 -f tmp -o tmp.zst && wc -c < tmp.zst) +stream_size=$(cat tmp | $ZSTD -14 --size-hint=11000 | wc -c) +if [ "$stream_size" -ge "$file_size" ]; then + die "hinted compression larger than expected" +fi +println "test : hinted streaming compression and decompression" +cat tmp | $ZSTD -14 -f -o tmp.zst --size-hint=11000 +$ZSTD -df tmp.zst -o tmp_decompress +cmp tmp tmp_decompress || die "difference between original and decompressed file" +println "test : hinted streaming compression with dictionary" +cat tmp | $ZSTD -14 -f -D tmpDict --size-hint=11000 | $ZSTD -t -D tmpDict +println "test : multiple file compression with hints and dictionary" +$ZSTD -14 -f -D tmpDict --size-hint=11000 tmp tmp2 +$ZSTD -14 -f -o tmp1_.zst -D tmpDict --size-hint=11000 tmp +$ZSTD -14 -f -o tmp2_.zst -D tmpDict --size-hint=11000 tmp2 +cmp tmp.zst tmp1_.zst || die "first file's output differs" +cmp tmp2.zst tmp2_.zst || die "second file's output differs" +println "test : incorrect hinted stream sizes" +cat tmp | $ZSTD -14 -f --size-hint=11050 | $ZSTD -t # slightly too high +cat tmp | $ZSTD -14 -f --size-hint=10950 | $ZSTD -t # slightly too low +cat tmp | $ZSTD -14 -f --size-hint=22000 | $ZSTD -t # considerably too high +cat tmp | $ZSTD -14 -f --size-hint=5500 | $ZSTD -t # considerably too low + + println "\n===> dictionary tests " println "- test with raw dict (content only) " @@ -480,6 +531,15 @@ $ZSTD -o tmpDict --train "$TESTDIR"/*.c "$PRGDIR"/*.c test -f tmpDict $ZSTD --train "$TESTDIR"/*.c "$PRGDIR"/*.c test -f dictionary +println "- Test dictionary training fails" +echo "000000000000000000000000000000000" > tmpz +$ZSTD --train tmpz tmpz tmpz tmpz tmpz tmpz tmpz tmpz tmpz && die "Dictionary training should fail : source is all zeros" +if [ -n "$hasMT" ] +then + $ZSTD --train -T0 tmpz tmpz tmpz tmpz tmpz tmpz tmpz tmpz tmpz && die "Dictionary training should fail : source is all zeros" + println "- Create dictionary with multithreading enabled" + $ZSTD --train -T0 "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpDict +fi rm tmp* dictionary @@ -583,8 +643,8 @@ $ZSTD -t tmpSplit.* && die "bad file not detected !" println "\n===> golden files tests " -$ZSTD -t -r "$TESTDIR/files" -$ZSTD -c -r "$TESTDIR/files" | $ZSTD -t +$ZSTD -t -r "$TESTDIR/golden-compression" +$ZSTD -c -r "$TESTDIR/golden-compression" | $ZSTD -t println "\n===> benchmark mode tests " diff --git a/tests/regression/results.csv b/tests/regression/results.csv index e66787fc02..ba1295c5c4 100644 --- a/tests/regression/results.csv +++ b/tests/regression/results.csv @@ -33,7 +33,7 @@ silesia, level 19, compress silesia, long distance mode, compress cctx, 4849491 silesia, multithreaded, compress cctx, 4849491 silesia, multithreaded long distance mode, compress cctx, 4849491 -silesia, small window log, compress cctx, 7112784 +silesia, small window log, compress cctx, 7112472 silesia, small hash log, compress cctx, 6554898 silesia, small chain log, compress cctx, 4931093 silesia, explicit params, compress cctx, 4794609 @@ -97,7 +97,7 @@ silesia, level 19, zstdcli, silesia, long distance mode, zstdcli, 4839698 silesia, multithreaded, zstdcli, 4849539 silesia, multithreaded long distance mode, zstdcli, 4839698 -silesia, small window log, zstdcli, 7123892 +silesia, small window log, zstdcli, 7123580 silesia, small hash log, zstdcli, 6554946 silesia, small chain log, zstdcli, 4931141 silesia, explicit params, zstdcli, 4797048 @@ -123,7 +123,7 @@ silesia.tar, no source size, zstdcli, silesia.tar, long distance mode, zstdcli, 4853140 silesia.tar, multithreaded, zstdcli, 4861462 silesia.tar, multithreaded long distance mode, zstdcli, 4853140 -silesia.tar, small window log, zstdcli, 7127964 +silesia.tar, small window log, zstdcli, 7127589 silesia.tar, small hash log, zstdcli, 6587841 silesia.tar, small chain log, zstdcli, 4943269 silesia.tar, explicit params, zstdcli, 4822318 @@ -188,7 +188,7 @@ silesia, no source size, advanced silesia, long distance mode, advanced one pass, 4839650 silesia, multithreaded, advanced one pass, 4849491 silesia, multithreaded long distance mode, advanced one pass, 4839650 -silesia, small window log, advanced one pass, 7123844 +silesia, small window log, advanced one pass, 7123532 silesia, small hash log, advanced one pass, 6554898 silesia, small chain log, advanced one pass, 4931093 silesia, explicit params, advanced one pass, 4797035 @@ -214,7 +214,7 @@ silesia.tar, no source size, advanced silesia.tar, long distance mode, advanced one pass, 4848046 silesia.tar, multithreaded, advanced one pass, 4860726 silesia.tar, multithreaded long distance mode, advanced one pass, 4847343 -silesia.tar, small window log, advanced one pass, 7127924 +silesia.tar, small window log, advanced one pass, 7127549 silesia.tar, small hash log, advanced one pass, 6587833 silesia.tar, small chain log, advanced one pass, 4943266 silesia.tar, explicit params, advanced one pass, 4808543 @@ -280,7 +280,7 @@ silesia, no source size, advanced silesia, long distance mode, advanced one pass small out, 4839650 silesia, multithreaded, advanced one pass small out, 4849491 silesia, multithreaded long distance mode, advanced one pass small out, 4839650 -silesia, small window log, advanced one pass small out, 7123844 +silesia, small window log, advanced one pass small out, 7123532 silesia, small hash log, advanced one pass small out, 6554898 silesia, small chain log, advanced one pass small out, 4931093 silesia, explicit params, advanced one pass small out, 4797035 @@ -306,7 +306,7 @@ silesia.tar, no source size, advanced silesia.tar, long distance mode, advanced one pass small out, 4848046 silesia.tar, multithreaded, advanced one pass small out, 4860726 silesia.tar, multithreaded long distance mode, advanced one pass small out, 4847343 -silesia.tar, small window log, advanced one pass small out, 7127924 +silesia.tar, small window log, advanced one pass small out, 7127549 silesia.tar, small hash log, advanced one pass small out, 6587833 silesia.tar, small chain log, advanced one pass small out, 4943266 silesia.tar, explicit params, advanced one pass small out, 4808543 @@ -372,7 +372,7 @@ silesia, no source size, advanced silesia, long distance mode, advanced streaming, 4839650 silesia, multithreaded, advanced streaming, 4849491 silesia, multithreaded long distance mode, advanced streaming, 4839650 -silesia, small window log, advanced streaming, 7123846 +silesia, small window log, advanced streaming, 7123534 silesia, small hash log, advanced streaming, 6554898 silesia, small chain log, advanced streaming, 4931093 silesia, explicit params, advanced streaming, 4797048 @@ -398,7 +398,7 @@ silesia.tar, no source size, advanced silesia.tar, long distance mode, advanced streaming, 4848046 silesia.tar, multithreaded, advanced streaming, 4861458 silesia.tar, multithreaded long distance mode, advanced streaming, 4853136 -silesia.tar, small window log, advanced streaming, 7127924 +silesia.tar, small window log, advanced streaming, 7127549 silesia.tar, small hash log, advanced streaming, 6587834 silesia.tar, small chain log, advanced streaming, 4943271 silesia.tar, explicit params, advanced streaming, 4808570 diff --git a/tests/zbufftest.c b/tests/zbufftest.c index 8cbde3f4f3..9441482626 100644 --- a/tests/zbufftest.c +++ b/tests/zbufftest.c @@ -184,7 +184,7 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo DISPLAYLEVEL(4, "test%3i : check decompressed result : ", testNb++); { size_t i; for (i=0; i